otfmacros/pymacro

#!/usr/bin/env python3

import sys
import re
import word_utils as words

SEPARATORS = [' ', '\n', 's']


def get_args():
    """ Get command line arguments """

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--macros", default=["macros"], action="append")
    parser.add_argument("input")
    parser.add_argument("output")
    return parser.parse_args()


def pluralize(word, macro=None):
    """
    Returns the plural form of a word.
    """

    if macro:
        if len(macro) == 3:
            return macro[2]

    # TODO add more complex plural forms
    if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
        return word + 'es'
    if word[-1] == 'y':
        if words.is_consonant(word[-2]):
            return word[:-1] + 'ies'
    if word[-1] == 'o':
        if words.is_consonant(word[-2]):
            return word + 'es'
    if word[-1] == 'f':
        return word[:-1] + 'ves'
    return word + 's'

def upper_check(token, word):
    lowercase = False
    all_caps = True
    
    for letter in token:
        if letter.islower():
            all_caps = False
            break

    if all_caps:
        return word.upper()
    
    if token[1].isupper():
        return word[:1].upper() + word[1:]

    return word

def process(tokens, macros):
    output = tokens

    for line_number, line in enumerate(tokens):
        for token_number, token in enumerate(line):
            if len(token) == 0:
                continue

            # punctuation is ignored so it is stripped till later
            # right now only full stops are stripped because I'm lazy
            # TODO add better end stripping
            full_stopped = False
            if token[-1] == '.':
                full_stopped  = True
                token = token[:-1]
            
            match = False
            plural = False
            
            # if no macro is found (or if it is not a macro at all, the value 
            # will not be changed
            value = token

            for macro in macros:
                if macro[0].lower() == token.lower():
                    match = True
                    value = macro[1]
                    break
                elif macro[0].lower() + 's' == token.lower():
                    match = True
                    plural = True
                    value = pluralize(macro[1], macro=macro)
                    break

            output[line_number][token_number] = upper_check(token, value)
            
            # re-adding the full stop/period
            if full_stopped:
                output[line_number][token_number] += '.'
    
    for line_number, line in enumerate(output):
        output[line_number] = ' '.join(line)

    output = '\n'.join(output)

    return output

def tokenize(input):
    """
    Return of list of tokens from string (convert file contents to format to be
    processed by `process`
    """
    return [x.split(' ') for x in input.split('\n')]


def get_macros(input):
    """
    Turn a string into a list of tuples of macros
    """

    # turn input into unvalidated list of macros
    macros = [x.split('\t') for x in input.split('\n')]

    # validate macros
    for index, macro in enumerate(macros):
        if len(macro) == 2 or len(macro) == 3:
            macros[index] = tuple(macros[index])
        else:
            macros.pop(index)

    return macros

def main(args):
    """ Entry point for script """

    # get macros

    macros = []
    for macro_file in args.macros:
        with open(macro_file) as file:
            macros += get_macros(file.read())

    # get tokens (file contents)
    with open(args.input) as file:
        tokens = tokenize(file.read())

    # get output
    output = process(tokens, macros)

    # show and save output
    with open(args.output, 'w+') as file:
        file.write(output)
    print(output)

    return 0


if __name__ == '__main__':
    try:
        sys.exit(main(get_args()))
    except KeyboardInterrupt:
        sys.exit(0)
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00			`#!/usr/bin/env python3`

			`import sys`
			`import re`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`import word_utils as words`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00
			`SEPARATORS = [' ', '\n', 's']`


			`def get_args():`
			`""" Get command line arguments """`

			`import argparse`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("-m", "--macros", default=["macros"], action="append")`
			`parser.add_argument("input")`
			`parser.add_argument("output")`
			`return parser.parse_args()`


Add custom plural support 2019-01-18 17:50:00 +00:00			`def pluralize(word, macro=None):`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`"""`
			`Returns the plural form of a word.`
			`"""`
Add custom plural support 2019-01-18 17:50:00 +00:00
			`if macro:`
			`if len(macro) == 3:`
			`return macro[2]`

Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`# TODO add more complex plural forms`
			`if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:`
			`return word + 'es'`
			`if word[-1] == 'y':`
			`if words.is_consonant(word[-2]):`
			`return word[:-1] + 'ies'`
			`if word[-1] == 'o':`
			`if words.is_consonant(word[-2]):`
			`return word + 'es'`
			`if word[-1] == 'f':`
			`return word[:-1] + 'ves'`
			`return word + 's'`

			`def upper_check(token, word):`
			`lowercase = False`
			`all_caps = True`

			`for letter in token:`
			`if letter.islower():`
			`all_caps = False`
			`break`

			`if all_caps:`
			`return word.upper()`

			`if token[1].isupper():`
			`return word[:1].upper() + word[1:]`

			`return word`

			`def process(tokens, macros):`
			`output = tokens`

			`for line_number, line in enumerate(tokens):`
			`for token_number, token in enumerate(line):`
			`if len(token) == 0:`
			`continue`

			`# punctuation is ignored so it is stripped till later`
			`# right now only full stops are stripped because I'm lazy`
			`# TODO add better end stripping`
			`full_stopped = False`
			`if token[-1] == '.':`
			`full_stopped = True`
			`token = token[:-1]`

			`match = False`
			`plural = False`

			`# if no macro is found (or if it is not a macro at all, the value`
			`# will not be changed`
			`value = token`

			`for macro in macros:`
			`if macro[0].lower() == token.lower():`
			`match = True`
			`value = macro[1]`
			`break`
			`elif macro[0].lower() + 's' == token.lower():`
			`match = True`
			`plural = True`
Add custom plural support 2019-01-18 17:50:00 +00:00			`value = pluralize(macro[1], macro=macro)`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`break`

			`output[line_number][token_number] = upper_check(token, value)`

			`# re-adding the full stop/period`
			`if full_stopped:`
			`output[line_number][token_number] += '.'`

			`for line_number, line in enumerate(output):`
			`output[line_number] = ' '.join(line)`

			`output = '\n'.join(output)`

			`return output`

			`def tokenize(input):`
			`"""`
			`Return of list of tokens from string (convert file contents to format to be`
			processed by `process`
			`"""`
			`return [x.split(' ') for x in input.split('\n')]`


			`def get_macros(input):`
			`"""`
			`Turn a string into a list of tuples of macros`
			`"""`

			`# turn input into unvalidated list of macros`
			`macros = [x.split('\t') for x in input.split('\n')]`

			`# validate macros`
			`for index, macro in enumerate(macros):`
Add custom plural support 2019-01-18 17:50:00 +00:00			`if len(macro) == 2 or len(macro) == 3:`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`macros[index] = tuple(macros[index])`
			`else:`
			`macros.pop(index)`

			`return macros`

Proof of idea (I guess) 2019-01-18 15:45:40 +00:00			`def main(args):`
			`""" Entry point for script """`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00
			`# get macros`

Proof of idea (I guess) 2019-01-18 15:45:40 +00:00			`macros = []`
			`for macro_file in args.macros:`
			`with open(macro_file) as file:`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`macros += get_macros(file.read())`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`# get tokens (file contents)`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00			`with open(args.input) as file:`
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`tokens = tokenize(file.read())`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`# get output`
			`output = process(tokens, macros)`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00
Add more betterer version of pymacro 2019-01-18 17:34:43 +00:00			`# show and save output`
Proof of idea (I guess) 2019-01-18 15:45:40 +00:00			`with open(args.output, 'w+') as file:`
			`file.write(output)`
			`print(output)`

			`return 0`


			`if __name__ == '__main__':`
			`try:`
			`sys.exit(main(get_args()))`
			`except KeyboardInterrupt:`
			`sys.exit(0)`