mirror of
https://github.com/alvierahman90/otfmacros.git
synced 2024-12-15 12:01:59 +00:00
Add more betterer version of pymacro
This commit is contained in:
parent
a13c7c7c23
commit
4fd09033aa
134
pymacro
134
pymacro
@ -2,6 +2,7 @@
|
||||
|
||||
import sys
|
||||
import re
|
||||
import word_utils as words
|
||||
|
||||
SEPARATORS = [' ', '\n', 's']
|
||||
|
||||
@ -17,35 +18,132 @@ def get_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def pluralize(word):
|
||||
"""
|
||||
Returns the plural form of a word.
|
||||
"""
|
||||
# TODO add more complex plural forms
|
||||
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
||||
return word + 'es'
|
||||
if word[-1] == 'y':
|
||||
if words.is_consonant(word[-2]):
|
||||
return word[:-1] + 'ies'
|
||||
if word[-1] == 'o':
|
||||
if words.is_consonant(word[-2]):
|
||||
return word + 'es'
|
||||
if word[-1] == 'f':
|
||||
return word[:-1] + 'ves'
|
||||
return word + 's'
|
||||
|
||||
def upper_check(token, word):
|
||||
lowercase = False
|
||||
all_caps = True
|
||||
|
||||
for letter in token:
|
||||
if letter.islower():
|
||||
all_caps = False
|
||||
break
|
||||
|
||||
if all_caps:
|
||||
return word.upper()
|
||||
|
||||
if token[1].isupper():
|
||||
return word[:1].upper() + word[1:]
|
||||
|
||||
return word
|
||||
|
||||
def process(tokens, macros):
|
||||
output = tokens
|
||||
|
||||
for line_number, line in enumerate(tokens):
|
||||
for token_number, token in enumerate(line):
|
||||
if len(token) == 0:
|
||||
continue
|
||||
|
||||
# punctuation is ignored so it is stripped till later
|
||||
# right now only full stops are stripped because I'm lazy
|
||||
# TODO add better end stripping
|
||||
full_stopped = False
|
||||
if token[-1] == '.':
|
||||
full_stopped = True
|
||||
token = token[:-1]
|
||||
|
||||
match = False
|
||||
plural = False
|
||||
|
||||
# if no macro is found (or if it is not a macro at all, the value
|
||||
# will not be changed
|
||||
value = token
|
||||
|
||||
for macro in macros:
|
||||
if macro[0].lower() == token.lower():
|
||||
match = True
|
||||
value = macro[1]
|
||||
break
|
||||
elif macro[0].lower() + 's' == token.lower():
|
||||
match = True
|
||||
plural = True
|
||||
value = pluralize(macro[1])
|
||||
break
|
||||
|
||||
output[line_number][token_number] = upper_check(token, value)
|
||||
|
||||
# re-adding the full stop/period
|
||||
if full_stopped:
|
||||
output[line_number][token_number] += '.'
|
||||
|
||||
for line_number, line in enumerate(output):
|
||||
output[line_number] = ' '.join(line)
|
||||
|
||||
output = '\n'.join(output)
|
||||
|
||||
return output
|
||||
|
||||
def tokenize(input):
|
||||
"""
|
||||
Return of list of tokens from string (convert file contents to format to be
|
||||
processed by `process`
|
||||
"""
|
||||
return [x.split(' ') for x in input.split('\n')]
|
||||
|
||||
|
||||
def get_macros(input):
|
||||
"""
|
||||
Turn a string into a list of tuples of macros
|
||||
"""
|
||||
|
||||
# turn input into unvalidated list of macros
|
||||
macros = [x.split('\t') for x in input.split('\n')]
|
||||
|
||||
# validate macros
|
||||
for index, macro in enumerate(macros):
|
||||
if len(macro) == 2:
|
||||
macros[index] = tuple(macros[index])
|
||||
else:
|
||||
macros.pop(index)
|
||||
|
||||
return macros
|
||||
|
||||
def main(args):
|
||||
""" Entry point for script """
|
||||
|
||||
# get macros
|
||||
|
||||
macros = []
|
||||
for macro_file in args.macros:
|
||||
with open(macro_file) as file:
|
||||
macros += [x.split('\t') for x in file.read().split('\n')]
|
||||
macros += get_macros(file.read())
|
||||
|
||||
for index, macro in enumerate(macros):
|
||||
if len(macro) != 2:
|
||||
macros.pop(index)
|
||||
continue
|
||||
macros[index] = tuple(macros[index])
|
||||
|
||||
macros.sort(key=lambda tup: len(tup[0]), reverse=True)
|
||||
|
||||
# get tokens (file contents)
|
||||
with open(args.input) as file:
|
||||
input = file.read()
|
||||
tokens = tokenize(file.read())
|
||||
|
||||
for macro in macros:
|
||||
pattern, repl = macro
|
||||
print(macro)
|
||||
for separator in SEPARATORS:
|
||||
input = input.replace(pattern + separator, repl + separator)
|
||||
|
||||
output = input
|
||||
# get output
|
||||
output = process(tokens, macros)
|
||||
|
||||
# show and save output
|
||||
with open(args.output, 'w+') as file:
|
||||
file.write(output)
|
||||
|
||||
print(output)
|
||||
|
||||
return 0
|
||||
|
14
word_utils.py
Normal file
14
word_utils.py
Normal file
@ -0,0 +1,14 @@
|
||||
def is_consonant(letter):
|
||||
if not isinstance(letter, str):
|
||||
raise ValueError("Argument 'letter' must be type str")
|
||||
if len(letter) != 1:
|
||||
raise ValueError("Argument 'letter' must be 1 long")
|
||||
return not is_vowel(letter)
|
||||
|
||||
def is_vowel(letter):
|
||||
if not isinstance(letter, str):
|
||||
raise ValueError("Argument 'letter' must be type str")
|
||||
if len(letter) != 1:
|
||||
raise ValueError("Argument 'letter' must be 1 long")
|
||||
return letter in 'aeiou'
|
||||
|
Loading…
Reference in New Issue
Block a user