2019-01-18 15:45:40 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import re
|
2019-01-18 17:34:43 +00:00
|
|
|
import word_utils as words
|
2019-01-18 15:45:40 +00:00
|
|
|
|
|
|
|
SEPARATORS = [' ', '\n', 's']
|
|
|
|
|
|
|
|
|
|
|
|
def get_args():
|
|
|
|
""" Get command line arguments """
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("-m", "--macros", default=["macros"], action="append")
|
|
|
|
parser.add_argument("input")
|
|
|
|
parser.add_argument("output")
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
2019-01-18 17:34:43 +00:00
|
|
|
def pluralize(word):
|
|
|
|
"""
|
|
|
|
Returns the plural form of a word.
|
|
|
|
"""
|
|
|
|
# TODO add more complex plural forms
|
|
|
|
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
|
|
|
return word + 'es'
|
|
|
|
if word[-1] == 'y':
|
|
|
|
if words.is_consonant(word[-2]):
|
|
|
|
return word[:-1] + 'ies'
|
|
|
|
if word[-1] == 'o':
|
|
|
|
if words.is_consonant(word[-2]):
|
|
|
|
return word + 'es'
|
|
|
|
if word[-1] == 'f':
|
|
|
|
return word[:-1] + 'ves'
|
|
|
|
return word + 's'
|
|
|
|
|
|
|
|
def upper_check(token, word):
|
|
|
|
lowercase = False
|
|
|
|
all_caps = True
|
|
|
|
|
|
|
|
for letter in token:
|
|
|
|
if letter.islower():
|
|
|
|
all_caps = False
|
|
|
|
break
|
|
|
|
|
|
|
|
if all_caps:
|
|
|
|
return word.upper()
|
|
|
|
|
|
|
|
if token[1].isupper():
|
|
|
|
return word[:1].upper() + word[1:]
|
|
|
|
|
|
|
|
return word
|
|
|
|
|
|
|
|
def process(tokens, macros):
|
|
|
|
output = tokens
|
|
|
|
|
|
|
|
for line_number, line in enumerate(tokens):
|
|
|
|
for token_number, token in enumerate(line):
|
|
|
|
if len(token) == 0:
|
|
|
|
continue
|
|
|
|
|
|
|
|
# punctuation is ignored so it is stripped till later
|
|
|
|
# right now only full stops are stripped because I'm lazy
|
|
|
|
# TODO add better end stripping
|
|
|
|
full_stopped = False
|
|
|
|
if token[-1] == '.':
|
|
|
|
full_stopped = True
|
|
|
|
token = token[:-1]
|
|
|
|
|
|
|
|
match = False
|
|
|
|
plural = False
|
|
|
|
|
|
|
|
# if no macro is found (or if it is not a macro at all, the value
|
|
|
|
# will not be changed
|
|
|
|
value = token
|
|
|
|
|
|
|
|
for macro in macros:
|
|
|
|
if macro[0].lower() == token.lower():
|
|
|
|
match = True
|
|
|
|
value = macro[1]
|
|
|
|
break
|
|
|
|
elif macro[0].lower() + 's' == token.lower():
|
|
|
|
match = True
|
|
|
|
plural = True
|
|
|
|
value = pluralize(macro[1])
|
|
|
|
break
|
|
|
|
|
|
|
|
output[line_number][token_number] = upper_check(token, value)
|
|
|
|
|
|
|
|
# re-adding the full stop/period
|
|
|
|
if full_stopped:
|
|
|
|
output[line_number][token_number] += '.'
|
|
|
|
|
|
|
|
for line_number, line in enumerate(output):
|
|
|
|
output[line_number] = ' '.join(line)
|
|
|
|
|
|
|
|
output = '\n'.join(output)
|
|
|
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
def tokenize(input):
|
|
|
|
"""
|
|
|
|
Return of list of tokens from string (convert file contents to format to be
|
|
|
|
processed by `process`
|
|
|
|
"""
|
|
|
|
return [x.split(' ') for x in input.split('\n')]
|
|
|
|
|
|
|
|
|
|
|
|
def get_macros(input):
|
|
|
|
"""
|
|
|
|
Turn a string into a list of tuples of macros
|
|
|
|
"""
|
|
|
|
|
|
|
|
# turn input into unvalidated list of macros
|
|
|
|
macros = [x.split('\t') for x in input.split('\n')]
|
|
|
|
|
|
|
|
# validate macros
|
|
|
|
for index, macro in enumerate(macros):
|
|
|
|
if len(macro) == 2:
|
|
|
|
macros[index] = tuple(macros[index])
|
|
|
|
else:
|
|
|
|
macros.pop(index)
|
|
|
|
|
|
|
|
return macros
|
|
|
|
|
2019-01-18 15:45:40 +00:00
|
|
|
def main(args):
|
|
|
|
""" Entry point for script """
|
2019-01-18 17:34:43 +00:00
|
|
|
|
|
|
|
# get macros
|
|
|
|
|
2019-01-18 15:45:40 +00:00
|
|
|
macros = []
|
|
|
|
for macro_file in args.macros:
|
|
|
|
with open(macro_file) as file:
|
2019-01-18 17:34:43 +00:00
|
|
|
macros += get_macros(file.read())
|
2019-01-18 15:45:40 +00:00
|
|
|
|
2019-01-18 17:34:43 +00:00
|
|
|
# get tokens (file contents)
|
2019-01-18 15:45:40 +00:00
|
|
|
with open(args.input) as file:
|
2019-01-18 17:34:43 +00:00
|
|
|
tokens = tokenize(file.read())
|
2019-01-18 15:45:40 +00:00
|
|
|
|
2019-01-18 17:34:43 +00:00
|
|
|
# get output
|
|
|
|
output = process(tokens, macros)
|
2019-01-18 15:45:40 +00:00
|
|
|
|
2019-01-18 17:34:43 +00:00
|
|
|
# show and save output
|
2019-01-18 15:45:40 +00:00
|
|
|
with open(args.output, 'w+') as file:
|
|
|
|
file.write(output)
|
|
|
|
print(output)
|
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
try:
|
|
|
|
sys.exit(main(get_args()))
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
sys.exit(0)
|