Add more betterer version of pymacro

2025-07-12 12:12:25 +00:00 · 2019-01-18 17:34:43 +00:00 · 2019-01-18 17:34:43 +00:00 · 4fd09033aa
commit 4fd09033aa
parent a13c7c7c23
2 changed files with 130 additions and 18 deletions
--- a/134
+++ b/134
@ -2,6 +2,7 @@

 import sys
 import re
+import word_utils as words

 SEPARATORS = [' ', '\n', 's']

@ -17,35 +18,132 @@ def get_args():
    return parser.parse_args()


+def pluralize(word):
+    """
+    Returns the plural form of a word.
+    """
+    # TODO add more complex plural forms
+    if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
+        return word + 'es'
+    if word[-1] == 'y':
+        if words.is_consonant(word[-2]):
+            return word[:-1] + 'ies'
+    if word[-1] == 'o':
+        if words.is_consonant(word[-2]):
+            return word + 'es'
+    if word[-1] == 'f':
+        return word[:-1] + 'ves'
+    return word + 's'
+
+def upper_check(token, word):
+    lowercase = False
+    all_caps = True
+    
+    for letter in token:
+        if letter.islower():
+            all_caps = False
+            break
+
+    if all_caps:
+        return word.upper()
+    
+    if token[1].isupper():
+        return word[:1].upper() + word[1:]
+
+    return word
+
+def process(tokens, macros):
+    output = tokens
+
+    for line_number, line in enumerate(tokens):
+        for token_number, token in enumerate(line):
+            if len(token) == 0:
+                continue
+
+            # punctuation is ignored so it is stripped till later
+            # right now only full stops are stripped because I'm lazy
+            # TODO add better end stripping
+            full_stopped = False
+            if token[-1] == '.':
+                full_stopped  = True
+                token = token[:-1]
+            
+            match = False
+            plural = False
+            
+            # if no macro is found (or if it is not a macro at all, the value 
+            # will not be changed
+            value = token
+
+            for macro in macros:
+                if macro[0].lower() == token.lower():
+                    match = True
+                    value = macro[1]
+                    break
+                elif macro[0].lower() + 's' == token.lower():
+                    match = True
+                    plural = True
+                    value = pluralize(macro[1])
+                    break
+
+            output[line_number][token_number] = upper_check(token, value)
+            
+            # re-adding the full stop/period
+            if full_stopped:
+                output[line_number][token_number] += '.'
+    
+    for line_number, line in enumerate(output):
+        output[line_number] = ' '.join(line)
+
+    output = '\n'.join(output)
+
+    return output
+
+def tokenize(input):
+    """
+    Return of list of tokens from string (convert file contents to format to be
+    processed by `process`
+    """
+    return [x.split(' ') for x in input.split('\n')]
+
+
+def get_macros(input):
+    """
+    Turn a string into a list of tuples of macros
+    """
+
+    # turn input into unvalidated list of macros
+    macros = [x.split('\t') for x in input.split('\n')]
+
+    # validate macros
+    for index, macro in enumerate(macros):
+        if len(macro) == 2:
+            macros[index] = tuple(macros[index])
+        else:
+            macros.pop(index)
+
+    return macros
+
 def main(args):
    """ Entry point for script """
+
+    # get macros
+
    macros = []
    for macro_file in args.macros:
        with open(macro_file) as file:
-            macros += [x.split('\t') for x in file.read().split('\n')]
+            macros += get_macros(file.read())

-    for index, macro in enumerate(macros):
-        if len(macro) != 2:
-            macros.pop(index)
-            continue
-        macros[index] = tuple(macros[index])
-
-    macros.sort(key=lambda tup: len(tup[0]), reverse=True)
-    
+    # get tokens (file contents)
    with open(args.input) as file:
-        input = file.read()
+        tokens = tokenize(file.read())

-    for macro in macros:
-        pattern, repl = macro
-        print(macro)
-        for separator in SEPARATORS:
-            input = input.replace(pattern + separator, repl + separator)
-
-    output = input
+    # get output
+    output = process(tokens, macros)

+    # show and save output
    with open(args.output, 'w+') as file:
        file.write(output)
-
    print(output)

    return 0
--- a/word_utils.py
+++ b/word_utils.py
@ -0,0 +1,14 @@
+def is_consonant(letter):
+    if not isinstance(letter, str):
+        raise ValueError("Argument 'letter' must be type str")
+    if len(letter) != 1:
+        raise ValueError("Argument 'letter' must be 1 long")
+    return not is_vowel(letter)
+
+def is_vowel(letter):
+    if not isinstance(letter, str):
+        raise ValueError("Argument 'letter' must be type str")
+    if len(letter) != 1:
+        raise ValueError("Argument 'letter' must be 1 long")
+    return letter in 'aeiou'
+