diff --git a/pymacro b/pymacro index 4a7e640..08f6c73 100755 --- a/pymacro +++ b/pymacro @@ -2,6 +2,7 @@ import sys import re +import word_utils as words SEPARATORS = [' ', '\n', 's'] @@ -17,35 +18,132 @@ def get_args(): return parser.parse_args() +def pluralize(word): + """ + Returns the plural form of a word. + """ + # TODO add more complex plural forms + if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']: + return word + 'es' + if word[-1] == 'y': + if words.is_consonant(word[-2]): + return word[:-1] + 'ies' + if word[-1] == 'o': + if words.is_consonant(word[-2]): + return word + 'es' + if word[-1] == 'f': + return word[:-1] + 'ves' + return word + 's' + +def upper_check(token, word): + lowercase = False + all_caps = True + + for letter in token: + if letter.islower(): + all_caps = False + break + + if all_caps: + return word.upper() + + if token[1].isupper(): + return word[:1].upper() + word[1:] + + return word + +def process(tokens, macros): + output = tokens + + for line_number, line in enumerate(tokens): + for token_number, token in enumerate(line): + if len(token) == 0: + continue + + # punctuation is ignored so it is stripped till later + # right now only full stops are stripped because I'm lazy + # TODO add better end stripping + full_stopped = False + if token[-1] == '.': + full_stopped = True + token = token[:-1] + + match = False + plural = False + + # if no macro is found (or if it is not a macro at all, the value + # will not be changed + value = token + + for macro in macros: + if macro[0].lower() == token.lower(): + match = True + value = macro[1] + break + elif macro[0].lower() + 's' == token.lower(): + match = True + plural = True + value = pluralize(macro[1]) + break + + output[line_number][token_number] = upper_check(token, value) + + # re-adding the full stop/period + if full_stopped: + output[line_number][token_number] += '.' + + for line_number, line in enumerate(output): + output[line_number] = ' '.join(line) + + output = '\n'.join(output) + + return output + +def tokenize(input): + """ + Return of list of tokens from string (convert file contents to format to be + processed by `process` + """ + return [x.split(' ') for x in input.split('\n')] + + +def get_macros(input): + """ + Turn a string into a list of tuples of macros + """ + + # turn input into unvalidated list of macros + macros = [x.split('\t') for x in input.split('\n')] + + # validate macros + for index, macro in enumerate(macros): + if len(macro) == 2: + macros[index] = tuple(macros[index]) + else: + macros.pop(index) + + return macros + def main(args): """ Entry point for script """ + + # get macros + macros = [] for macro_file in args.macros: with open(macro_file) as file: - macros += [x.split('\t') for x in file.read().split('\n')] + macros += get_macros(file.read()) - for index, macro in enumerate(macros): - if len(macro) != 2: - macros.pop(index) - continue - macros[index] = tuple(macros[index]) - - macros.sort(key=lambda tup: len(tup[0]), reverse=True) - + # get tokens (file contents) with open(args.input) as file: - input = file.read() + tokens = tokenize(file.read()) - for macro in macros: - pattern, repl = macro - print(macro) - for separator in SEPARATORS: - input = input.replace(pattern + separator, repl + separator) - - output = input + # get output + output = process(tokens, macros) + # show and save output with open(args.output, 'w+') as file: file.write(output) - print(output) return 0 diff --git a/word_utils.py b/word_utils.py new file mode 100644 index 0000000..631d854 --- /dev/null +++ b/word_utils.py @@ -0,0 +1,14 @@ +def is_consonant(letter): + if not isinstance(letter, str): + raise ValueError("Argument 'letter' must be type str") + if len(letter) != 1: + raise ValueError("Argument 'letter' must be 1 long") + return not is_vowel(letter) + +def is_vowel(letter): + if not isinstance(letter, str): + raise ValueError("Argument 'letter' must be type str") + if len(letter) != 1: + raise ValueError("Argument 'letter' must be 1 long") + return letter in 'aeiou' +