use a better wordlist source

2022-01-27 19:14:29 +00:00 · 2022-01-27 19:14:29 +00:00 · 26647ad351
commit 26647ad351
parent 6f4b1291b0
7 changed files with 794789 additions and 15 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "english-words"]
-	path = english-words
-	url = https://github.com/dwyl/english-words
--- a/1_1_all_fullalpha.txt
+++ b/1_1_all_fullalpha.txt
--- a/6
+++ b/6
@ -1,5 +1,9 @@
+WORD_LENGTH=5
+MIN_FREQUENCY=9
+SOURCE_WORDLIST=1_1_all_fullalpha.txt
+
 wordlist.js: english-words
-	scripts/gen_wordlist.py english-words/words.txt 5 > wordlist.js
+	./scripts/gen_wordlist.py ${SOURCE_WORDLIST} ${WORD_LENGTH} ${MIN_FREQUENCY} > wordlist.js

 english-words: .SUBMODULES

--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit 22d7c41119076750a96fca2acd664ed994cc0a75
--- a/index.html
+++ b/index.html
@ -3,22 +3,16 @@
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <link rel="stylesheet" type="text/css" href="styles.css">
-<title>word guessing game: infinite</title>
+<title>words</title>
 </head>
 <body>
-    <h1>word guessing game</h1>
+    <h1>words</h1>
    
    <div id="game_container">
        you need javascript enabled to play this game
-        <div id="letter_grid">
-
-        </div>
    </div>
    
-    <p>
-      built with ❤ and adequate amounts of care by
-      <a href="https://alv.cx">alv</a>
-    </p>
+    <p> built with ❤ and adequate amounts of care by <a href="https://alv.cx">alv</a> </p>
    
    <script type="application/javascript" src="wordlist.js"></script>
    <script type="application/javascript" src="game.js"></script>
--- a/3
+++ b/3
@ -1 +1,4 @@
 you know what it is
+
+wordlist `1_1_all_fullalpha.txt` is from https://ucrel.lancs.ac.uk/bncfreq/flists.html
+
--- a/scripts/gen_wordlist.py
+++ b/scripts/gen_wordlist.py
@ -11,15 +11,21 @@ def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('wordlist')
    parser.add_argument('word_length', type=int)
+    parser.add_argument('frequency_min', type=int)
    return parser.parse_args()


 def main(args):
    """ Entry point for script """
    with open(args.wordlist) as fp:
-        words = [ word.lower() for word in fp.read().split('\n') if len(word) == args.word_length and word.isalpha() ]
+        words = [ (word[1], int(word[4])) for word in [ word.lower().split('\t') for word in fp.read().strip().split('\n') ] ]

+    words = [ word[0] for word in words if word[1] > args.frequency_min and word[0].isalpha() and len(word[0]) == args.word_length ]
+
+    words.sort(key=lambda word: word[1])
    print(f"wordlist = {json.dumps(words)}")
+    print(f"{args}", file=sys.stderr)
+    print(f"{len(words)=}", file=sys.stderr)

    return 0
				`@ -1 +0,0 @@`
				`Subproject commit 22d7c41119076750a96fca2acd664ed994cc0a75`