#!/usr/bin/env python3 import sys import re import word_utils as words SEPARATORS = [' ', '\n', 's'] def get_args(): """ Get command line arguments """ import argparse parser = argparse.ArgumentParser() parser.add_argument("-m", "--macros", default=["macros"], action="append") parser.add_argument("-s", "--silent", default=False, action="store_true") parser.add_argument("input") parser.add_argument("output") return parser.parse_args() def pluralize(word, macro=None): """ Returns the plural form of a word. """ if macro: if len(macro) == 3: return macro[2] # TODO add more complex plural forms if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']: return word + 'es' if word[-1] == 'y': if words.is_consonant(word[-2]): return word[:-1] + 'ies' if word[-1] == 'o': if words.is_consonant(word[-2]): return word + 'es' if word[-1] == 'f': return word[:-1] + 'ves' return word + 's' def upper_check(token, word): lowercase = False all_caps = True for letter in token: if letter.islower(): all_caps = False break if all_caps: return word.upper() if token[1].isupper(): return word[:1].upper() + word[1:] return word def process(tokens, macros): output = tokens for line_number, line in enumerate(tokens): for token_number, token in enumerate(line): if len(token) == 0: continue # punctuation is ignored so it is stripped till later # right now only full stops are stripped because I'm lazy # TODO add better end stripping full_stopped = False if token[-1] == '.': full_stopped = True token = token[:-1] match = False plural = False # if no macro is found (or if it is not a macro at all, the value # will not be changed value = token for macro in macros: if macro[0].lower() == token.lower(): match = True value = macro[1] break elif macro[0].lower() + 's' == token.lower(): match = True plural = True value = pluralize(macro[1], macro=macro) break output[line_number][token_number] = upper_check(token, value) # re-adding the full stop/period if full_stopped: output[line_number][token_number] += '.' for line_number, line in enumerate(output): output[line_number] = ' '.join(line) output = '\n'.join(output) return output def tokenize(input): """ Return of list of tokens from string (convert file contents to format to be processed by `process` """ return [x.split(' ') for x in input.split('\n')] def get_macros(input): """ Turn a string into a list of tuples of macros """ # turn input into unvalidated list of macros macros = [x.split('\t') for x in input.split('\n')] # validate macros for index, macro in enumerate(macros): if len(macro) == 2 or len(macro) == 3: macros[index] = tuple(macros[index]) else: macros.pop(index) return macros def main(args): """ Entry point for script """ # get macros macros = [] for macro_file in args.macros: with open(macro_file) as file: macros += get_macros(file.read()) # get tokens (file contents) with open(args.input) as file: tokens = tokenize(file.read()) # get output output = process(tokens, macros) # show and save output with open(args.output, 'w+') as file: file.write(output) if not args.silent: print(output) return 0 if __name__ == '__main__': try: sys.exit(main(get_args())) except KeyboardInterrupt: sys.exit(0)