2022-01-27 18:36:45 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import json
|
|
|
|
|
2022-01-30 22:05:51 +00:00
|
|
|
class WordListItem:
|
|
|
|
def __init__(self, line):
|
|
|
|
word = line.split('\t')
|
|
|
|
|
|
|
|
self.word = word[1] if word[1].isalpha() else word[3]
|
|
|
|
self.pos = word[2]
|
|
|
|
self.frequency = int(word[4])
|
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return f"<WordListItem {self.word=} {self.pos=} {self.frequency=}>"
|
|
|
|
|
2022-01-27 18:36:45 +00:00
|
|
|
|
|
|
|
def get_args():
|
|
|
|
""" Get command line arguments """
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('wordlist')
|
|
|
|
parser.add_argument('word_length', type=int)
|
2022-01-27 19:14:29 +00:00
|
|
|
parser.add_argument('frequency_min', type=int)
|
2022-01-27 20:56:18 +00:00
|
|
|
parser.add_argument('allowedtypelist')
|
2022-01-27 18:36:45 +00:00
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
def main(args):
|
|
|
|
""" Entry point for script """
|
2022-01-27 20:56:18 +00:00
|
|
|
|
|
|
|
with open(args.allowedtypelist) as fp:
|
|
|
|
allowed_types = fp.read().split('\n')
|
|
|
|
|
|
|
|
types = set()
|
2022-01-27 18:36:45 +00:00
|
|
|
with open(args.wordlist) as fp:
|
2022-01-30 22:05:51 +00:00
|
|
|
words = [ WordListItem(line) for line in fp.read().strip().lower().split('\n') ]
|
2022-01-27 20:56:18 +00:00
|
|
|
|
2022-01-30 22:05:51 +00:00
|
|
|
[ types.add(word.pos) for word in words ]
|
2022-01-27 18:36:45 +00:00
|
|
|
|
2022-01-30 22:05:51 +00:00
|
|
|
words = [ word.word for word in words if word.frequency >= args.frequency_min and word.word.isalpha() and len(word.word) == args.word_length and word.pos in allowed_types ]
|
2022-01-27 19:14:29 +00:00
|
|
|
|
2022-01-30 22:05:51 +00:00
|
|
|
words.sort()
|
2022-01-27 20:16:33 +00:00
|
|
|
# remove duplicates
|
|
|
|
words = list(set(words))
|
2022-01-27 18:36:45 +00:00
|
|
|
print(f"wordlist = {json.dumps(words)}")
|
2022-01-30 22:05:51 +00:00
|
|
|
print(f"{args=}", file=sys.stderr)
|
2022-01-27 19:14:29 +00:00
|
|
|
print(f"{len(words)=}", file=sys.stderr)
|
2022-01-27 20:56:18 +00:00
|
|
|
print(f"{types=}", file=sys.stderr)
|
2022-01-30 22:05:51 +00:00
|
|
|
print(f"{'cares' in words=}", file=sys.stderr)
|
|
|
|
print(f"{'ideas' in words=}", file=sys.stderr)
|
|
|
|
print(f"{'prose' in words=}", file=sys.stderr)
|
2022-01-27 18:36:45 +00:00
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
try:
|
|
|
|
sys.exit(main(get_args()))
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
sys.exit(0)
|