From 45ec3d6418d49c3f4b8029b6a2af3c0f3ca9fe20 Mon Sep 17 00:00:00 2001 From: Alvie Rahman Date: Thu, 27 Jan 2022 20:56:18 +0000 Subject: [PATCH] remove names and other weird words from wordlist --- Makefile | 5 +++-- allowed_types | 8 ++++++++ game.js | 1 + scripts/gen_wordlist.py | 15 ++++++++++++--- 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 allowed_types diff --git a/Makefile b/Makefile index c9b8d59..7d141d3 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ WORD_LENGTH=5 -MIN_FREQUENCY=3 +MIN_FREQUENCY=1 SOURCE_WORDLIST=1_1_all_fullalpha.txt +ALLOWED_TYPELIST=allowed_types wordlist.js: - ./scripts/gen_wordlist.py ${SOURCE_WORDLIST} ${WORD_LENGTH} ${MIN_FREQUENCY} > wordlist.js + ./scripts/gen_wordlist.py ${SOURCE_WORDLIST} ${WORD_LENGTH} ${MIN_FREQUENCY} ${ALLOWED_TYPELIST} > wordlist.js clean: rm -rf wordlist.js diff --git a/allowed_types b/allowed_types new file mode 100644 index 0000000..9d62075 --- /dev/null +++ b/allowed_types @@ -0,0 +1,8 @@ +adj +prep +adv +num +int +verb +pron +conj diff --git a/game.js b/game.js index dd7c1ad..650b432 100644 --- a/game.js +++ b/game.js @@ -158,6 +158,7 @@ function setNextLetter(key) { } } + function endGame(won) { gameCompleted = true diff --git a/scripts/gen_wordlist.py b/scripts/gen_wordlist.py index 3f8e23b..317970a 100755 --- a/scripts/gen_wordlist.py +++ b/scripts/gen_wordlist.py @@ -12,15 +12,23 @@ def get_args(): parser.add_argument('wordlist') parser.add_argument('word_length', type=int) parser.add_argument('frequency_min', type=int) + parser.add_argument('allowedtypelist') return parser.parse_args() def main(args): """ Entry point for script """ - with open(args.wordlist) as fp: - words = [ (word[1], int(word[4])) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ] - words = [ word[0] for word in words if word[1] > args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length ] + with open(args.allowedtypelist) as fp: + allowed_types = fp.read().split('\n') + + types = set() + with open(args.wordlist) as fp: + words = [ (word[1], int(word[4]), word[2]) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ] + + [ types.add(word[2]) for word in words ] + + words = [ word[0] for word in words if word[1] >= args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length and word[2] in allowed_types ] words.sort(key=lambda word: word[1]) # remove duplicates @@ -28,6 +36,7 @@ def main(args): print(f"wordlist = {json.dumps(words)}") print(f"{args}", file=sys.stderr) print(f"{len(words)=}", file=sys.stderr) + print(f"{types=}", file=sys.stderr) return 0