use a better wordlist source

This commit is contained in:
2022-01-27 19:14:29 +00:00
parent 6f4b1291b0
commit 26647ad351
7 changed files with 794789 additions and 15 deletions

View File

@@ -11,15 +11,21 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('wordlist')
parser.add_argument('word_length', type=int)
parser.add_argument('frequency_min', type=int)
return parser.parse_args()
def main(args):
""" Entry point for script """
with open(args.wordlist) as fp:
words = [ word.lower() for word in fp.read().split('\n') if len(word) == args.word_length and word.isalpha() ]
words = [ (word[1], int(word[4])) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ]
words = [ word[0] for word in words if word[1] > args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length ]
words.sort(key=lambda word: word[1])
print(f"wordlist = {json.dumps(words)}")
print(f"{args}", file=sys.stderr)
print(f"{len(words)=}", file=sys.stderr)
return 0