Add more betterer version of pymacro

2026-02-26 20:51:09 +00:00 · 2019-01-18 17:34:43 +00:00
parent a13c7c7c23
commit 4fd09033aa
2 changed files with 130 additions and 18 deletions
--- a/134
+++ b/134
@@ -2,6 +2,7 @@
 import sys
 import re
 import word_utils as words
 SEPARATORS = [' ', '\n', 's']
@@ -17,35 +18,132 @@ def get_args():
    return parser.parse_args()
 def pluralize(word):
    """
    Returns the plural form of a word.
    """
    # TODO add more complex plural forms
    if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
        return word + 'es'
    if word[-1] == 'y':
        if words.is_consonant(word[-2]):
            return word[:-1] + 'ies'
    if word[-1] == 'o':
        if words.is_consonant(word[-2]):
            return word + 'es'
    if word[-1] == 'f':
        return word[:-1] + 'ves'
    return word + 's'
 def upper_check(token, word):
    lowercase = False
    all_caps = True
    for letter in token:
        if letter.islower():
            all_caps = False
            break
    if all_caps:
        return word.upper()
    if token[1].isupper():
        return word[:1].upper() + word[1:]
    return word
 def process(tokens, macros):
    output = tokens
    for line_number, line in enumerate(tokens):
        for token_number, token in enumerate(line):
            if len(token) == 0:
                continue
            # punctuation is ignored so it is stripped till later
            # right now only full stops are stripped because I'm lazy
            # TODO add better end stripping
            full_stopped = False
            if token[-1] == '.':
                full_stopped  = True
                token = token[:-1]
            match = False
            plural = False
            # if no macro is found (or if it is not a macro at all, the value 
            # will not be changed
            value = token
            for macro in macros:
                if macro[0].lower() == token.lower():
                    match = True
                    value = macro[1]
                    break
                elif macro[0].lower() + 's' == token.lower():
                    match = True
                    plural = True
                    value = pluralize(macro[1])
                    break
            output[line_number][token_number] = upper_check(token, value)
            # re-adding the full stop/period
            if full_stopped:
                output[line_number][token_number] += '.'
    for line_number, line in enumerate(output):
        output[line_number] = ' '.join(line)
    output = '\n'.join(output)
    return output
 def tokenize(input):
    """
    Return of list of tokens from string (convert file contents to format to be
    processed by `process`
    """
    return [x.split(' ') for x in input.split('\n')]
 def get_macros(input):
    """
    Turn a string into a list of tuples of macros
    """
    # turn input into unvalidated list of macros
    macros = [x.split('\t') for x in input.split('\n')]
    # validate macros
    for index, macro in enumerate(macros):
        if len(macro) == 2:
            macros[index] = tuple(macros[index])
        else:
            macros.pop(index)
    return macros
 def main(args):
    """ Entry point for script """
    # get macros
    macros = []
    for macro_file in args.macros:
        with open(macro_file) as file:
-            macros += [x.split('\t') for x in file.read().split('\n')]
+            macros += get_macros(file.read())
    for index, macro in enumerate(macros):
        if len(macro) != 2:
            macros.pop(index)
            continue
        macros[index] = tuple(macros[index])
    macros.sort(key=lambda tup: len(tup[0]), reverse=True)
    # get tokens (file contents)
    with open(args.input) as file:
-        input = file.read()
+        tokens = tokenize(file.read())
-    for macro in macros:
+    # get output
-        pattern, repl = macro
+    output = process(tokens, macros)
        print(macro)
        for separator in SEPARATORS:
            input = input.replace(pattern + separator, repl + separator)
    output = input
    # show and save output
    with open(args.output, 'w+') as file:
        file.write(output)
    print(output)
    return 0
--- a/word_utils.py
+++ b/word_utils.py
@@ -0,0 +1,14 @@
 def is_consonant(letter):
    if not isinstance(letter, str):
        raise ValueError("Argument 'letter' must be type str")
    if len(letter) != 1:
        raise ValueError("Argument 'letter' must be 1 long")
    return not is_vowel(letter)
 def is_vowel(letter):
    if not isinstance(letter, str):
        raise ValueError("Argument 'letter' must be type str")
    if len(letter) != 1:
        raise ValueError("Argument 'letter' must be 1 long")
    return letter in 'aeiou'