#!/usr/bin/env python3 import sys import json def get_args(): """ Get command line arguments """ import argparse parser = argparse.ArgumentParser() parser.add_argument('wordlist') parser.add_argument('word_length', type=int) parser.add_argument('frequency_min', type=int) parser.add_argument('allowedtypelist') return parser.parse_args() def main(args): """ Entry point for script """ with open(args.allowedtypelist) as fp: allowed_types = fp.read().split('\n') types = set() with open(args.wordlist) as fp: words = [ (word[1], int(word[4]), word[2]) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ] [ types.add(word[2]) for word in words ] words = [ word[0] for word in words if word[1] >= args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length and word[2] in allowed_types ] words.sort(key=lambda word: word[1]) # remove duplicates words = list(set(words)) print(f"wordlist = {json.dumps(words)}") print(f"{args}", file=sys.stderr) print(f"{len(words)=}", file=sys.stderr) print(f"{types=}", file=sys.stderr) return 0 if __name__ == '__main__': try: sys.exit(main(get_args())) except KeyboardInterrupt: sys.exit(0)