commit 5f21b2c1dc859024b4789eaae25ec5cf6990178d
Author: Alvie Rahman <alvierahman90@gmail.com>
Date:   Sun Mar 7 14:40:58 2021 +0000

    first commit

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..911d327
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,9 @@
+include config.mk
+
+install:
+	mkdir -p ${DESTDIR}${PREFIX}/bin/
+	cp -f otfm-python ${DESTDIR}${PREFIX}/bin/
+	chmod 755  ${DESTDIR}${PREFIX}/bin/
+
+uninstall:
+	rm -f ${DESTDIR}${PREFIX}/bin/otfm-python
diff --git a/config.mk b/config.mk
new file mode 100644
index 0000000..a67a797
--- /dev/null
+++ b/config.mk
@@ -0,0 +1 @@
+PREFIX=/usr/local
diff --git a/macros b/macros
new file mode 100644
index 0000000..63cb96e
--- /dev/null
+++ b/macros
@@ -0,0 +1,8 @@
+.hc	hydrocarbon
+.hy	hydrogen
+.ca	carbon
+.ox	oxygen
+.wink	😉
+source	tests/test_macros_biology
+source	tests/test_macros_custom_plurals
+source	tests/test_macros_plural
diff --git a/otfm-python b/otfm-python
new file mode 100755
index 0000000..d5a5736
--- /dev/null
+++ b/otfm-python
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+
+
+class MultipleTokens():
+    """
+    Used by process() to tell detokenize() that a macro adds extra tokens without modifying
+    without changing the indexes of other tokens
+    """
+
+    def __init__(self, words):
+        self.words  = words
+
+
+def get_args():
+    """ Get command line arguments """
+
+    import argparse
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("-m", "--macros-file", default="macros",
+                        help="File where macros are stored")
+    parser.add_argument("-i", "--input", help="File to be processed.", default="-")
+    parser.add_argument("-o", "--output", help="Path of output", default="-")
+    return parser.parse_args()
+
+
+def pluralize(input):
+    """ Returns the plural form of a word.  """
+    if isinstance(input, list):
+        # use custom plural if defined
+        if len(input) > 1:
+            return input[1]
+
+        return pluralize_word(input[0])
+
+    return pluralize_word(input)
+
+def pluralize_word(word):
+    def is_vowel(letter):
+        if not isinstance(letter, str):
+            raise ValueError("Argument 'letter' must be type str")
+        if len(letter) != 1:
+            raise ValueError("Argument 'letter' must be 1 long")
+        return letter in 'aeiou'
+
+    # TODO add more complex plural forms
+    if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
+        return word + 'es'
+    if word[-1] == 'y':
+        if not is_vowel(word[-2]):
+            return word[:-1] + 'ies'
+    if word[-1] == 'o':
+        if not is_vowel(word[-2]):
+            return word + 'es'
+    if word[-1] == 'f':
+        return word[:-1] + 'ves'
+    return word + 's'
+
+
+def upper_check(token, word):
+    """ Check if word needs to be capitalized and capitalise appropriately if that is the case. """
+    all_caps = True
+    
+    for letter in token:
+        if letter.islower():
+            all_caps = False
+            break
+
+    if all_caps:
+        return word.upper()
+    
+    if len(token) > 1:
+        if token[1].isupper():
+            return word[:1].upper() + word[1:]
+
+    return word
+
+
+def process(input, macros):
+    """
+    This function takes the string `input` and a dict, ` macros`.
+    It substitutes any keys in `macro` with the corresponding value.
+    It also checks for any otf macros defined in the string and appends them to `macros`,
+    replacing that otf macro and any following instances of it.
+    It returns the substituted string.
+    """
+    tokens = tokenize(input)
+    macros = macros
+
+    in_otf_macro = False
+    tmp_macro_keyword = None
+    tmp_macro_definition = []
+
+    for line_number, line in enumerate(tokens):
+        for token_number, token in enumerate(line):
+            if len(token) == 0:
+                continue
+
+            # detect on the fly macros
+            token_is_otf_macro_start = is_otf_macro_start(token, line)
+
+            # process otf macro tokens
+            if token_is_otf_macro_start:
+                tmp_macro_keyword = token
+                in_otf_macro = True
+                tmp_macro_definition = []
+                tokens[line_number][token_number] = None
+                continue
+            elif in_otf_macro and is_otf_macro_end(token):
+                split_token = re.split(r',.|.,', token)
+                tmp_macro_definition.append(split_token[0])
+                macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition)
+                token = tmp_macro_keyword + split_token[1]
+                in_otf_macro = False
+                # once the end of the macro has been found and stored, continue downn the for loop
+                # so that it can be turned back to normal text
+            elif in_otf_macro:
+                tmp_macro_definition.append(token)
+                tokens[line_number][token_number] = None
+                continue
+
+            # cutting off the end and then adding it back once expanded
+            # e.g. punctuation: from the token "hello...", end would be equal to "..."
+            #      and token would be equal to "hello"
+            end = []
+            token = list(token)
+            for index, char in reversed(list(enumerate(token))):
+                if not char.isalnum():
+                    end.insert(0, token.pop(index))
+                else:
+                    break
+            end = ''.join(end)
+            token = ''.join(token)
+
+            # if no macro is found (or if it is not a macro at all, the value
+            # will not be changed
+            value = token
+
+            if token.lower() in macros.keys():
+                value = macros[token.lower()][0]
+            elif token.lower() in [f"{m}s" for m in macros.keys()]:
+                value = pluralize(macros[token.lower()[:-1]])
+
+            tokens[line_number][token_number] = upper_check(token, value)
+            tokens[line_number][token_number] += end
+
+        # filter out None tokens
+        tokens[line_number] = [token for token in tokens[line_number] if token is not None]
+
+    return detokenize(tokens)
+
+def tokenize(input):
+    """
+    Returns a 2D list of tokens and a list of otf_macros.
+    otf macro definitions are removed and just the keyword definition is kept as well as any
+    punctuation on the final word.
+    """
+    return [x.split(' ') for x in input.split('\n')]
+
+
+def detokenize(tokens):
+    """Turn a list of tokens into plaintext. """
+
+    output = []
+
+    for index, line in enumerate(tokens):
+        output.append([])
+        for token in line:
+            if isinstance(token, MultipleTokens):
+                for word in token.words:
+                    output[index].append(word)
+            elif isinstance(token, str):
+                output[index].append(token)
+            else:
+                raise ValueError(f"Unknown token type: {type(token)}")
+
+    for line_number, line in enumerate(output):
+        output[line_number] = ' '.join(line)
+
+    return  '\n'.join(output)
+
+
+def get_macros(input, child=False):
+    """ Turn a macros string into a list of tuples of macros """
+    response = {}
+
+    # turn input into list of tuples
+    macros = [re.split('[\t]', x) for x in input.split('\n')]
+
+    # check if keyword is `source`, get macros from sourced file if it is
+    for index, macro in enumerate(macros):
+        if macro[0] == "source":
+            with open(macro[1]) as file:
+                macros += get_macros(file.read(), child=True)
+                macros[index] = ()
+
+    if child:
+        return macros
+
+    # store macros as dict and return
+    for index, macro in enumerate(macros):
+        if len(macro) >= 2:
+            response[macro[0].lower()] = macro[1:]
+    return response
+
+def is_otf_macro_start(token, line):
+    """ Returns true if token is the start of an on the fly macro """
+    match = re.search(r'^\.[A-Za-z0-9]+$', token)
+    if match is None:
+        return False
+
+    # don't return true you can't find an end token in the line
+    for line_token in line:
+        if is_otf_macro_end(line_token):
+            return match is not None
+
+    return False
+
+
+def is_otf_macro_end(token):
+    """ Returns true if token is the end of an on the fly macro """
+    match = re.search(r'(\.,|,\.)', f"{token}")
+    return match is not None
+
+
+def main(args):
+    """ Entry point for script """
+
+    # get macros
+
+    with open(args.macros_file) as file:
+        macros = get_macros(file.read())
+
+    # get tokens (file contents)
+    if args.input == "-":
+        input = sys.stdin.read()
+    else:
+        with open(args.input) as file:
+            input = file.read()
+
+    if args.output == "-":
+        return print(process(input, macros))
+    else:
+        with open(args.output, 'w+') as file:
+            return file.write(process(input, macros))
+
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main(get_args()))
+    except KeyboardInterrupt:
+        sys.exit(0)
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..d599d8d
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,24 @@
+# pymacro
+
+A python implementation of [otfmacros](https://github.com/alvierahman90/otfmacros).
+
+## usage
+```
+$ ./pymacro -h
+usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -m MACROS_FILE, --macros-file MACROS_FILE
+                        File where macros are stored (default: macros)
+  -i INPUT, --input INPUT
+                        File to be processed. (default: -)
+  -o OUTPUT, --output OUTPUT
+                        Path of output (default: -)
+```
+
+## testing
+
+Run `test.sh`.
+A `diff` is run on the actual output against what should have come out according
+to the spec.
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..0717231
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,3 @@
+#/usr/bin/env sh
+cat tests/test_input | ./pymacro > tests/test_actual_output
+diff tests/test_actual_output tests/test_expected_output
diff --git a/tests/test_expected_output b/tests/test_expected_output
new file mode 100644
index 0000000..00f1abf
--- /dev/null
+++ b/tests/test_expected_output
@@ -0,0 +1,12 @@
+Hydrocarbons are composed of exclusively hydrogen and carbon.
+
+Chlorophyll is the site of photosynthesis.
+
+😉
+
+1 hydrocarbon 2 hydrocarbons
+1 dress 2 dresses
+1 story 2 stories
+1 hero 2 heroes
+1 leaf 2 leaves
+1 man 2 men
diff --git a/tests/test_input b/tests/test_input
new file mode 100644
index 0000000..babbe4d
--- /dev/null
+++ b/tests/test_input
@@ -0,0 +1,21 @@
+.Hcs are composed of exclusively .hy and .ca.
+
+.Chl is the site of .ps.
+
+.wink
+
+1 .hc 2 .hcs
+1 .dr 2 .drs
+1 .st 2 .sts
+1 .he 2 .hes
+1 .le 2 .les
+1 .ma 2 .mas
+
+This is a test of .otfm on the fly macro.,s!
+
+If this sentence makes sense, then the test of .otfms worked!
+
+.otfms can also be overwritten,
+you could make it equal .otfm on the fly monkey.,s!
+
+They're not just any monkeys, they're .otfms!
diff --git a/tests/test_macros_biology b/tests/test_macros_biology
new file mode 100644
index 0000000..3d40842
--- /dev/null
+++ b/tests/test_macros_biology
@@ -0,0 +1,2 @@
+.chl	chlorophyll
+.ps	photosynthesis
diff --git a/tests/test_macros_custom_plurals b/tests/test_macros_custom_plurals
new file mode 100644
index 0000000..01e66e6
--- /dev/null
+++ b/tests/test_macros_custom_plurals
@@ -0,0 +1 @@
+.l	louse lice
diff --git a/tests/test_macros_plural b/tests/test_macros_plural
new file mode 100644
index 0000000..80b1b34
--- /dev/null
+++ b/tests/test_macros_plural
@@ -0,0 +1,6 @@
+.hc	hydrocarbon
+.dr	dress
+.st	story
+.he	hero
+.le	leaf
+.ma	man	men