mirror of
https://github.com/alvierahman90/otfmacros.git
synced 2025-05-19 18:39:41 +00:00
Add more betterer version of pymacro
This commit is contained in:
parent
a13c7c7c23
commit
4fd09033aa
134
pymacro
134
pymacro
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
import word_utils as words
|
||||||
|
|
||||||
SEPARATORS = [' ', '\n', 's']
|
SEPARATORS = [' ', '\n', 's']
|
||||||
|
|
||||||
@ -17,35 +18,132 @@ def get_args():
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def pluralize(word):
|
||||||
|
"""
|
||||||
|
Returns the plural form of a word.
|
||||||
|
"""
|
||||||
|
# TODO add more complex plural forms
|
||||||
|
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
||||||
|
return word + 'es'
|
||||||
|
if word[-1] == 'y':
|
||||||
|
if words.is_consonant(word[-2]):
|
||||||
|
return word[:-1] + 'ies'
|
||||||
|
if word[-1] == 'o':
|
||||||
|
if words.is_consonant(word[-2]):
|
||||||
|
return word + 'es'
|
||||||
|
if word[-1] == 'f':
|
||||||
|
return word[:-1] + 'ves'
|
||||||
|
return word + 's'
|
||||||
|
|
||||||
|
def upper_check(token, word):
|
||||||
|
lowercase = False
|
||||||
|
all_caps = True
|
||||||
|
|
||||||
|
for letter in token:
|
||||||
|
if letter.islower():
|
||||||
|
all_caps = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if all_caps:
|
||||||
|
return word.upper()
|
||||||
|
|
||||||
|
if token[1].isupper():
|
||||||
|
return word[:1].upper() + word[1:]
|
||||||
|
|
||||||
|
return word
|
||||||
|
|
||||||
|
def process(tokens, macros):
|
||||||
|
output = tokens
|
||||||
|
|
||||||
|
for line_number, line in enumerate(tokens):
|
||||||
|
for token_number, token in enumerate(line):
|
||||||
|
if len(token) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# punctuation is ignored so it is stripped till later
|
||||||
|
# right now only full stops are stripped because I'm lazy
|
||||||
|
# TODO add better end stripping
|
||||||
|
full_stopped = False
|
||||||
|
if token[-1] == '.':
|
||||||
|
full_stopped = True
|
||||||
|
token = token[:-1]
|
||||||
|
|
||||||
|
match = False
|
||||||
|
plural = False
|
||||||
|
|
||||||
|
# if no macro is found (or if it is not a macro at all, the value
|
||||||
|
# will not be changed
|
||||||
|
value = token
|
||||||
|
|
||||||
|
for macro in macros:
|
||||||
|
if macro[0].lower() == token.lower():
|
||||||
|
match = True
|
||||||
|
value = macro[1]
|
||||||
|
break
|
||||||
|
elif macro[0].lower() + 's' == token.lower():
|
||||||
|
match = True
|
||||||
|
plural = True
|
||||||
|
value = pluralize(macro[1])
|
||||||
|
break
|
||||||
|
|
||||||
|
output[line_number][token_number] = upper_check(token, value)
|
||||||
|
|
||||||
|
# re-adding the full stop/period
|
||||||
|
if full_stopped:
|
||||||
|
output[line_number][token_number] += '.'
|
||||||
|
|
||||||
|
for line_number, line in enumerate(output):
|
||||||
|
output[line_number] = ' '.join(line)
|
||||||
|
|
||||||
|
output = '\n'.join(output)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def tokenize(input):
|
||||||
|
"""
|
||||||
|
Return of list of tokens from string (convert file contents to format to be
|
||||||
|
processed by `process`
|
||||||
|
"""
|
||||||
|
return [x.split(' ') for x in input.split('\n')]
|
||||||
|
|
||||||
|
|
||||||
|
def get_macros(input):
|
||||||
|
"""
|
||||||
|
Turn a string into a list of tuples of macros
|
||||||
|
"""
|
||||||
|
|
||||||
|
# turn input into unvalidated list of macros
|
||||||
|
macros = [x.split('\t') for x in input.split('\n')]
|
||||||
|
|
||||||
|
# validate macros
|
||||||
|
for index, macro in enumerate(macros):
|
||||||
|
if len(macro) == 2:
|
||||||
|
macros[index] = tuple(macros[index])
|
||||||
|
else:
|
||||||
|
macros.pop(index)
|
||||||
|
|
||||||
|
return macros
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
""" Entry point for script """
|
""" Entry point for script """
|
||||||
|
|
||||||
|
# get macros
|
||||||
|
|
||||||
macros = []
|
macros = []
|
||||||
for macro_file in args.macros:
|
for macro_file in args.macros:
|
||||||
with open(macro_file) as file:
|
with open(macro_file) as file:
|
||||||
macros += [x.split('\t') for x in file.read().split('\n')]
|
macros += get_macros(file.read())
|
||||||
|
|
||||||
for index, macro in enumerate(macros):
|
|
||||||
if len(macro) != 2:
|
|
||||||
macros.pop(index)
|
|
||||||
continue
|
|
||||||
macros[index] = tuple(macros[index])
|
|
||||||
|
|
||||||
macros.sort(key=lambda tup: len(tup[0]), reverse=True)
|
|
||||||
|
|
||||||
|
# get tokens (file contents)
|
||||||
with open(args.input) as file:
|
with open(args.input) as file:
|
||||||
input = file.read()
|
tokens = tokenize(file.read())
|
||||||
|
|
||||||
for macro in macros:
|
# get output
|
||||||
pattern, repl = macro
|
output = process(tokens, macros)
|
||||||
print(macro)
|
|
||||||
for separator in SEPARATORS:
|
|
||||||
input = input.replace(pattern + separator, repl + separator)
|
|
||||||
|
|
||||||
output = input
|
|
||||||
|
|
||||||
|
# show and save output
|
||||||
with open(args.output, 'w+') as file:
|
with open(args.output, 'w+') as file:
|
||||||
file.write(output)
|
file.write(output)
|
||||||
|
|
||||||
print(output)
|
print(output)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
14
word_utils.py
Normal file
14
word_utils.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
def is_consonant(letter):
|
||||||
|
if not isinstance(letter, str):
|
||||||
|
raise ValueError("Argument 'letter' must be type str")
|
||||||
|
if len(letter) != 1:
|
||||||
|
raise ValueError("Argument 'letter' must be 1 long")
|
||||||
|
return not is_vowel(letter)
|
||||||
|
|
||||||
|
def is_vowel(letter):
|
||||||
|
if not isinstance(letter, str):
|
||||||
|
raise ValueError("Argument 'letter' must be type str")
|
||||||
|
if len(letter) != 1:
|
||||||
|
raise ValueError("Argument 'letter' must be 1 long")
|
||||||
|
return letter in 'aeiou'
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user