add on the fly macros

This commit is contained in:
2021-03-06 15:01:48 +00:00
parent 8792f4df49
commit cc27e52bb1
5 changed files with 150 additions and 41 deletions

View File

@@ -2,7 +2,7 @@
.hy hydrogen
.ca carbon
.ox oxygen
:wink: 😉
.wink 😉
source tests/test_macros_biology
source tests/test_macros_custom_plurals
source tests/test_macros_plural

View File

@@ -4,6 +4,16 @@ import sys
import re
class MultipleTokens():
"""
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
without changing the indexes of other tokens
"""
def __init__(self, words):
self.words = words
def get_args():
""" Get command line arguments """
@@ -11,10 +21,8 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--macros", default=["macros"], action="append",
help="Extra files where macros are stored")
parser.add_argument("-q", "--quiet", default=False, action="store_true",
help="Don't output to stdout")
parser.add_argument("input", help="The file to be processed")
parser.add_argument("output", help="The location of the output")
parser.add_argument("-i", "--input", help="The file to be processed", default="-")
parser.add_argument("-o", "--output", help="The location of the output", default="-")
return parser.parse_args()
@@ -75,16 +83,19 @@ def upper_check(token, word):
return word
def process(tokens, macros):
def process(input, macros):
tokens, otf_macros = tokenize(input)
output = tokens
macros = otf_macros + macros
for line_number, line in enumerate(tokens):
for token_number, token in enumerate(line):
if len(token) == 0:
continue
# cutting of the end and then adding it back once expanded
# cutting off the end and then adding it back once expanded
# e.g. punctuation: from the token "hello...", end would be equal to "..."
# and token would be equal to "hello"
end = []
token = list(token)
for index, char in reversed(list(enumerate(token))):
@@ -108,29 +119,79 @@ def process(tokens, macros):
break
output[line_number][token_number] = upper_check(token, value)
# re adding what was trimmed off
output[line_number][token_number] += end
return detokenize(output)
def tokenize(input):
"""
Returns a 2D list of tokens and a list of otf_macros.
otf macro definitions are removed and just the keyword definition is kept as well as any
punctuation on the final word.
"""
tokens = [x.split(' ') for x in input.split('\n')]
otf_macros = []
in_otf_macro = False
tmp_macro_keyword = None
tmp_macro_definition = []
# find and extract on the fly macros
for line_index, line in enumerate(tokens):
for token_index, token in enumerate(line):
token_is_otf_macro_start = is_otf_macro_start(token, line)
in_otf_macro = token_is_otf_macro_start or in_otf_macro
if token_is_otf_macro_start:
tmp_macro_keyword = token
tmp_macro_definition = []
tokens[line_index][token_index] = None
continue
elif in_otf_macro and is_otf_macro_end(token):
split_token = re.split(r',.|.,', token)
tmp_macro_definition.append(split_token[0])
tokens[line_index][token_index] = tmp_macro_keyword + split_token[1]
otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition)))
in_otf_macro = False
continue
elif in_otf_macro:
tmp_macro_definition.append(token)
tokens[line_index][token_index] = None
continue
# filter out None tokens
tokens[line_index] = [token for token in tokens[line_index] if token is not None]
return tokens, otf_macros
def detokenize(tokens):
"""Turn a list of tokens into plaintext. """
output = []
for index, line in enumerate(tokens):
output.append([])
for token in line:
if isinstance(token, MultipleTokens):
for word in token.words:
output[index].append(word)
elif isinstance(token, str):
output[index].append(token)
else:
raise ValueError(f"Unknown token type: {type(token)}")
for line_number, line in enumerate(output):
output[line_number] = ' '.join(line)
output = '\n'.join(output)
return output
def tokenize(input):
"""
Return of list of tokens from string (convert file contents to format to be
processed by `process`
"""
return [x.split(' ') for x in input.split('\n')]
return '\n'.join(output)
def get_macros(input):
"""
Turn a string into a list of tuples of macros
Turn a macros string into a list of tuples of macros
"""
response = []
@@ -142,11 +203,34 @@ def get_macros(input):
if macro[0] == "source":
with open(macro[1]) as file:
response += get_macros(file.read())
if len(macro) == 2 or len(macro) == 3:
if len(macro) >= 2:
response.append(tuple(macros[index]))
return response
def is_otf_macro_start(token, line):
"""
Returns true if token is the start of an on the fly macro
"""
match = re.search(r'^\.[A-Za-z0-9]+$', token)
if match is None:
return False
# don't return true you can't find an end token in the line
for line_token in line:
if is_otf_macro_end(line_token):
return match is not None
return False
def is_otf_macro_end(token):
"""
Returns true if token is the end of an on the fly macro
"""
match = re.search(r'(\.,|,\.)', token)
return match is not None
def main(args):
""" Entry point for script """
@@ -159,17 +243,13 @@ def main(args):
macros += get_macros(file.read())
# get tokens (file contents)
with open(args.input) as file:
tokens = tokenize(file.read())
if args.input == "-":
input = sys.stdin.read()
else:
with open(args.input) as file:
input = file.read()
# get output
output = process(tokens, macros)
# show and save output
with open(args.output, 'w+') as file:
file.write(output)
return 0
return print(process(input, macros))
if __name__ == '__main__':

View File

@@ -1,3 +1,3 @@
#/usr/bin/env sh
./pymacro -q tests/test_input tests/test_actual_output
cat tests/test_input | ./pymacro > tests/test_actual_output
git diff tests/test_actual_output tests/test_expected_output

View File

@@ -10,3 +10,7 @@
1 .he 2 .hes
1 .le 2 .les
1 .ma 2 .mas
This is a test of .otfm on the fly macro.,s!
If this sentence makes sense, then the test of .otfms worked!