remove names and other weird words from wordlist
This commit is contained in:
parent
e7bb0d2bc1
commit
45ec3d6418
5
Makefile
5
Makefile
@ -1,9 +1,10 @@
|
|||||||
WORD_LENGTH=5
|
WORD_LENGTH=5
|
||||||
MIN_FREQUENCY=3
|
MIN_FREQUENCY=1
|
||||||
SOURCE_WORDLIST=1_1_all_fullalpha.txt
|
SOURCE_WORDLIST=1_1_all_fullalpha.txt
|
||||||
|
ALLOWED_TYPELIST=allowed_types
|
||||||
|
|
||||||
wordlist.js:
|
wordlist.js:
|
||||||
./scripts/gen_wordlist.py ${SOURCE_WORDLIST} ${WORD_LENGTH} ${MIN_FREQUENCY} > wordlist.js
|
./scripts/gen_wordlist.py ${SOURCE_WORDLIST} ${WORD_LENGTH} ${MIN_FREQUENCY} ${ALLOWED_TYPELIST} > wordlist.js
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf wordlist.js
|
rm -rf wordlist.js
|
||||||
|
8
allowed_types
Normal file
8
allowed_types
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
adj
|
||||||
|
prep
|
||||||
|
adv
|
||||||
|
num
|
||||||
|
int
|
||||||
|
verb
|
||||||
|
pron
|
||||||
|
conj
|
1
game.js
1
game.js
@ -158,6 +158,7 @@ function setNextLetter(key) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function endGame(won) {
|
function endGame(won) {
|
||||||
gameCompleted = true
|
gameCompleted = true
|
||||||
|
|
||||||
|
@ -12,15 +12,23 @@ def get_args():
|
|||||||
parser.add_argument('wordlist')
|
parser.add_argument('wordlist')
|
||||||
parser.add_argument('word_length', type=int)
|
parser.add_argument('word_length', type=int)
|
||||||
parser.add_argument('frequency_min', type=int)
|
parser.add_argument('frequency_min', type=int)
|
||||||
|
parser.add_argument('allowedtypelist')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
""" Entry point for script """
|
""" Entry point for script """
|
||||||
with open(args.wordlist) as fp:
|
|
||||||
words = [ (word[1], int(word[4])) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ]
|
|
||||||
|
|
||||||
words = [ word[0] for word in words if word[1] > args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length ]
|
with open(args.allowedtypelist) as fp:
|
||||||
|
allowed_types = fp.read().split('\n')
|
||||||
|
|
||||||
|
types = set()
|
||||||
|
with open(args.wordlist) as fp:
|
||||||
|
words = [ (word[1], int(word[4]), word[2]) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ]
|
||||||
|
|
||||||
|
[ types.add(word[2]) for word in words ]
|
||||||
|
|
||||||
|
words = [ word[0] for word in words if word[1] >= args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length and word[2] in allowed_types ]
|
||||||
|
|
||||||
words.sort(key=lambda word: word[1])
|
words.sort(key=lambda word: word[1])
|
||||||
# remove duplicates
|
# remove duplicates
|
||||||
@ -28,6 +36,7 @@ def main(args):
|
|||||||
print(f"wordlist = {json.dumps(words)}")
|
print(f"wordlist = {json.dumps(words)}")
|
||||||
print(f"{args}", file=sys.stderr)
|
print(f"{args}", file=sys.stderr)
|
||||||
print(f"{len(words)=}", file=sys.stderr)
|
print(f"{len(words)=}", file=sys.stderr)
|
||||||
|
print(f"{types=}", file=sys.stderr)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user