add on the fly macros

This commit is contained in:
Akbar Rahman 2021-03-06 15:01:48 +00:00
parent 8792f4df49
commit cc27e52bb1
5 changed files with 150 additions and 41 deletions

View File

@ -2,7 +2,7 @@
.hy hydrogen
.ca carbon
.ox oxygen
:wink: 😉
.wink 😉
source tests/test_macros_biology
source tests/test_macros_custom_plurals
source tests/test_macros_plural

View File

@ -4,6 +4,16 @@ import sys
import re
class MultipleTokens():
"""
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
without changing the indexes of other tokens
"""
def __init__(self, words):
self.words = words
def get_args():
""" Get command line arguments """
@ -11,10 +21,8 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--macros", default=["macros"], action="append",
help="Extra files where macros are stored")
parser.add_argument("-q", "--quiet", default=False, action="store_true",
help="Don't output to stdout")
parser.add_argument("input", help="The file to be processed")
parser.add_argument("output", help="The location of the output")
parser.add_argument("-i", "--input", help="The file to be processed", default="-")
parser.add_argument("-o", "--output", help="The location of the output", default="-")
return parser.parse_args()
@ -75,16 +83,19 @@ def upper_check(token, word):
return word
def process(tokens, macros):
def process(input, macros):
tokens, otf_macros = tokenize(input)
output = tokens
macros = otf_macros + macros
for line_number, line in enumerate(tokens):
for token_number, token in enumerate(line):
if len(token) == 0:
continue
# cutting of the end and then adding it back once expanded
# cutting off the end and then adding it back once expanded
# e.g. punctuation: from the token "hello...", end would be equal to "..."
# and token would be equal to "hello"
end = []
token = list(token)
for index, char in reversed(list(enumerate(token))):
@ -108,29 +119,79 @@ def process(tokens, macros):
break
output[line_number][token_number] = upper_check(token, value)
# re adding what was trimmed off
output[line_number][token_number] += end
return detokenize(output)
def tokenize(input):
"""
Returns a 2D list of tokens and a list of otf_macros.
otf macro definitions are removed and just the keyword definition is kept as well as any
punctuation on the final word.
"""
tokens = [x.split(' ') for x in input.split('\n')]
otf_macros = []
in_otf_macro = False
tmp_macro_keyword = None
tmp_macro_definition = []
# find and extract on the fly macros
for line_index, line in enumerate(tokens):
for token_index, token in enumerate(line):
token_is_otf_macro_start = is_otf_macro_start(token, line)
in_otf_macro = token_is_otf_macro_start or in_otf_macro
if token_is_otf_macro_start:
tmp_macro_keyword = token
tmp_macro_definition = []
tokens[line_index][token_index] = None
continue
elif in_otf_macro and is_otf_macro_end(token):
split_token = re.split(r',.|.,', token)
tmp_macro_definition.append(split_token[0])
tokens[line_index][token_index] = tmp_macro_keyword + split_token[1]
otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition)))
in_otf_macro = False
continue
elif in_otf_macro:
tmp_macro_definition.append(token)
tokens[line_index][token_index] = None
continue
# filter out None tokens
tokens[line_index] = [token for token in tokens[line_index] if token is not None]
return tokens, otf_macros
def detokenize(tokens):
"""Turn a list of tokens into plaintext. """
output = []
for index, line in enumerate(tokens):
output.append([])
for token in line:
if isinstance(token, MultipleTokens):
for word in token.words:
output[index].append(word)
elif isinstance(token, str):
output[index].append(token)
else:
raise ValueError(f"Unknown token type: {type(token)}")
for line_number, line in enumerate(output):
output[line_number] = ' '.join(line)
output = '\n'.join(output)
return output
def tokenize(input):
"""
Return of list of tokens from string (convert file contents to format to be
processed by `process`
"""
return [x.split(' ') for x in input.split('\n')]
return '\n'.join(output)
def get_macros(input):
"""
Turn a string into a list of tuples of macros
Turn a macros string into a list of tuples of macros
"""
response = []
@ -142,11 +203,34 @@ def get_macros(input):
if macro[0] == "source":
with open(macro[1]) as file:
response += get_macros(file.read())
if len(macro) == 2 or len(macro) == 3:
if len(macro) >= 2:
response.append(tuple(macros[index]))
return response
def is_otf_macro_start(token, line):
"""
Returns true if token is the start of an on the fly macro
"""
match = re.search(r'^\.[A-Za-z0-9]+$', token)
if match is None:
return False
# don't return true you can't find an end token in the line
for line_token in line:
if is_otf_macro_end(line_token):
return match is not None
return False
def is_otf_macro_end(token):
"""
Returns true if token is the end of an on the fly macro
"""
match = re.search(r'(\.,|,\.)', token)
return match is not None
def main(args):
""" Entry point for script """
@ -159,17 +243,13 @@ def main(args):
macros += get_macros(file.read())
# get tokens (file contents)
if args.input == "-":
input = sys.stdin.read()
else:
with open(args.input) as file:
tokens = tokenize(file.read())
input = file.read()
# get output
output = process(tokens, macros)
# show and save output
with open(args.output, 'w+') as file:
file.write(output)
return 0
return print(process(input, macros))
if __name__ == '__main__':

View File

@ -1,3 +1,3 @@
#/usr/bin/env sh
./pymacro -q tests/test_input tests/test_actual_output
cat tests/test_input | ./pymacro > tests/test_actual_output
git diff tests/test_actual_output tests/test_expected_output

View File

@ -10,3 +10,7 @@
1 .he 2 .hes
1 .le 2 .les
1 .ma 2 .mas
This is a test of .otfm on the fly macro.,s!
If this sentence makes sense, then the test of .otfms worked!

View File

@ -30,6 +30,31 @@ This sources macros from its parent and grandparent directories.
This may be useful if you have several related topics in separate folders,
but only some of the macros are shared and some are not.
### on the fly definitions
You can also define macros in the middle of a sentence without having to switch to the macros file.
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
```
# .Hc hydrocarbon
In or
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
.Ocs are compounds which contain .c .h bonds.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
Organic compounds are compounds which contain carbon hydrogen bonds.
```
## plurals
Say you only have the following macro definitions: