#!/usr/bin/env python3 import sys import json class WordListItem: def __init__(self, line): word = line.split('\t') self.word = word[1] if word[1].isalpha() else word[3] self.pos = word[2] self.freq = int(word[4]) def __repr__(self): return f"" def get_args(): """ Get command line arguments """ import argparse parser = argparse.ArgumentParser() parser.add_argument('wordlist') parser.add_argument('word_length', type=int) parser.add_argument('valid_answer_freq_min', type=int) parser.add_argument('valid_input_freq_min', type=int) parser.add_argument('allowedtypelist') return parser.parse_args() def main(args): """ Entry point for script """ with open(args.allowedtypelist) as fp: allowed_types = fp.read().split('\n') types = set() with open(args.wordlist) as fp: src_words = [ WordListItem(line) for line in fp.read().strip().lower().split('\n') ] src_words = [ word for word in src_words if word.word.isalpha() ] src_words = [ word for word in src_words if len(word.word) == args.word_length ] src_words = [ word for word in src_words if word.pos in allowed_types ] [ types.add(word.pos) for word in src_words ] words = {} words['valid_answers'] = [ w.word for w in src_words if w.freq >= args.valid_answer_freq_min ] words['valid_inputs'] = [ w.word for w in src_words if w.freq >= args.valid_input_freq_min ] # remove duplicates print(f"wordlist = {json.dumps(words)}") print(f"{args=}", file=sys.stderr) print(f"{len(words['valid_answers'])=}", file=sys.stderr) print(f"{len(words['valid_inputs'])=}", file=sys.stderr) print(f"{types=}", file=sys.stderr) return 0 if __name__ == '__main__': try: sys.exit(main(get_args())) except KeyboardInterrupt: sys.exit(0)