otfmacros/pymacro

162 lines
3.9 KiB
Plaintext
Raw Normal View History

2019-01-18 15:45:40 +00:00
#!/usr/bin/env python3
import sys
import re
2019-01-18 17:34:43 +00:00
import word_utils as words
2019-01-18 15:45:40 +00:00
SEPARATORS = [' ', '\n', 's']
def get_args():
""" Get command line arguments """
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--macros", default=["macros"], action="append")
parser.add_argument("input")
parser.add_argument("output")
return parser.parse_args()
2019-01-18 17:50:00 +00:00
def pluralize(word, macro=None):
2019-01-18 17:34:43 +00:00
"""
Returns the plural form of a word.
"""
2019-01-18 17:50:00 +00:00
if macro:
if len(macro) == 3:
return macro[2]
2019-01-18 17:34:43 +00:00
# TODO add more complex plural forms
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
return word + 'es'
if word[-1] == 'y':
if words.is_consonant(word[-2]):
return word[:-1] + 'ies'
if word[-1] == 'o':
if words.is_consonant(word[-2]):
return word + 'es'
if word[-1] == 'f':
return word[:-1] + 'ves'
return word + 's'
def upper_check(token, word):
lowercase = False
all_caps = True
for letter in token:
if letter.islower():
all_caps = False
break
if all_caps:
return word.upper()
if token[1].isupper():
return word[:1].upper() + word[1:]
return word
def process(tokens, macros):
output = tokens
for line_number, line in enumerate(tokens):
for token_number, token in enumerate(line):
if len(token) == 0:
continue
# punctuation is ignored so it is stripped till later
# right now only full stops are stripped because I'm lazy
# TODO add better end stripping
full_stopped = False
if token[-1] == '.':
full_stopped = True
token = token[:-1]
match = False
plural = False
# if no macro is found (or if it is not a macro at all, the value
# will not be changed
value = token
for macro in macros:
if macro[0].lower() == token.lower():
match = True
value = macro[1]
break
elif macro[0].lower() + 's' == token.lower():
match = True
plural = True
2019-01-18 17:50:00 +00:00
value = pluralize(macro[1], macro=macro)
2019-01-18 17:34:43 +00:00
break
output[line_number][token_number] = upper_check(token, value)
# re-adding the full stop/period
if full_stopped:
output[line_number][token_number] += '.'
for line_number, line in enumerate(output):
output[line_number] = ' '.join(line)
output = '\n'.join(output)
return output
def tokenize(input):
"""
Return of list of tokens from string (convert file contents to format to be
processed by `process`
"""
return [x.split(' ') for x in input.split('\n')]
def get_macros(input):
"""
Turn a string into a list of tuples of macros
"""
# turn input into unvalidated list of macros
macros = [x.split('\t') for x in input.split('\n')]
# validate macros
for index, macro in enumerate(macros):
2019-01-18 17:50:00 +00:00
if len(macro) == 2 or len(macro) == 3:
2019-01-18 17:34:43 +00:00
macros[index] = tuple(macros[index])
else:
macros.pop(index)
return macros
2019-01-18 15:45:40 +00:00
def main(args):
""" Entry point for script """
2019-01-18 17:34:43 +00:00
# get macros
2019-01-18 15:45:40 +00:00
macros = []
for macro_file in args.macros:
with open(macro_file) as file:
2019-01-18 17:34:43 +00:00
macros += get_macros(file.read())
2019-01-18 15:45:40 +00:00
2019-01-18 17:34:43 +00:00
# get tokens (file contents)
2019-01-18 15:45:40 +00:00
with open(args.input) as file:
2019-01-18 17:34:43 +00:00
tokens = tokenize(file.read())
2019-01-18 15:45:40 +00:00
2019-01-18 17:34:43 +00:00
# get output
output = process(tokens, macros)
2019-01-18 15:45:40 +00:00
2019-01-18 17:34:43 +00:00
# show and save output
2019-01-18 15:45:40 +00:00
with open(args.output, 'w+') as file:
file.write(output)
print(output)
return 0
if __name__ == '__main__':
try:
sys.exit(main(get_args()))
except KeyboardInterrupt:
sys.exit(0)