add on the fly macros

This commit is contained in:
Akbar Rahman 2021-03-06 15:01:48 +00:00
parent 8792f4df49
commit cc27e52bb1
5 changed files with 150 additions and 41 deletions

View File

@ -2,7 +2,7 @@
.hy hydrogen .hy hydrogen
.ca carbon .ca carbon
.ox oxygen .ox oxygen
:wink: 😉 .wink 😉
source tests/test_macros_biology source tests/test_macros_biology
source tests/test_macros_custom_plurals source tests/test_macros_custom_plurals
source tests/test_macros_plural source tests/test_macros_plural

View File

@ -4,6 +4,16 @@ import sys
import re import re
class MultipleTokens():
"""
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
without changing the indexes of other tokens
"""
def __init__(self, words):
self.words = words
def get_args(): def get_args():
""" Get command line arguments """ """ Get command line arguments """
@ -11,10 +21,8 @@ def get_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-m", "--macros", default=["macros"], action="append", parser.add_argument("-m", "--macros", default=["macros"], action="append",
help="Extra files where macros are stored") help="Extra files where macros are stored")
parser.add_argument("-q", "--quiet", default=False, action="store_true", parser.add_argument("-i", "--input", help="The file to be processed", default="-")
help="Don't output to stdout") parser.add_argument("-o", "--output", help="The location of the output", default="-")
parser.add_argument("input", help="The file to be processed")
parser.add_argument("output", help="The location of the output")
return parser.parse_args() return parser.parse_args()
@ -75,16 +83,19 @@ def upper_check(token, word):
return word return word
def process(tokens, macros): def process(input, macros):
tokens, otf_macros = tokenize(input)
output = tokens output = tokens
macros = otf_macros + macros
for line_number, line in enumerate(tokens): for line_number, line in enumerate(tokens):
for token_number, token in enumerate(line): for token_number, token in enumerate(line):
if len(token) == 0: if len(token) == 0:
continue continue
# cutting off the end and then adding it back once expanded
# cutting of the end and then adding it back once expanded # e.g. punctuation: from the token "hello...", end would be equal to "..."
# and token would be equal to "hello"
end = [] end = []
token = list(token) token = list(token)
for index, char in reversed(list(enumerate(token))): for index, char in reversed(list(enumerate(token))):
@ -108,29 +119,79 @@ def process(tokens, macros):
break break
output[line_number][token_number] = upper_check(token, value) output[line_number][token_number] = upper_check(token, value)
# re adding what was trimmed off
output[line_number][token_number] += end output[line_number][token_number] += end
return detokenize(output)
def tokenize(input):
"""
Returns a 2D list of tokens and a list of otf_macros.
otf macro definitions are removed and just the keyword definition is kept as well as any
punctuation on the final word.
"""
tokens = [x.split(' ') for x in input.split('\n')]
otf_macros = []
in_otf_macro = False
tmp_macro_keyword = None
tmp_macro_definition = []
# find and extract on the fly macros
for line_index, line in enumerate(tokens):
for token_index, token in enumerate(line):
token_is_otf_macro_start = is_otf_macro_start(token, line)
in_otf_macro = token_is_otf_macro_start or in_otf_macro
if token_is_otf_macro_start:
tmp_macro_keyword = token
tmp_macro_definition = []
tokens[line_index][token_index] = None
continue
elif in_otf_macro and is_otf_macro_end(token):
split_token = re.split(r',.|.,', token)
tmp_macro_definition.append(split_token[0])
tokens[line_index][token_index] = tmp_macro_keyword + split_token[1]
otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition)))
in_otf_macro = False
continue
elif in_otf_macro:
tmp_macro_definition.append(token)
tokens[line_index][token_index] = None
continue
# filter out None tokens
tokens[line_index] = [token for token in tokens[line_index] if token is not None]
return tokens, otf_macros
def detokenize(tokens):
"""Turn a list of tokens into plaintext. """
output = []
for index, line in enumerate(tokens):
output.append([])
for token in line:
if isinstance(token, MultipleTokens):
for word in token.words:
output[index].append(word)
elif isinstance(token, str):
output[index].append(token)
else:
raise ValueError(f"Unknown token type: {type(token)}")
for line_number, line in enumerate(output): for line_number, line in enumerate(output):
output[line_number] = ' '.join(line) output[line_number] = ' '.join(line)
output = '\n'.join(output) return '\n'.join(output)
return output
def tokenize(input):
"""
Return of list of tokens from string (convert file contents to format to be
processed by `process`
"""
return [x.split(' ') for x in input.split('\n')]
def get_macros(input): def get_macros(input):
""" """
Turn a string into a list of tuples of macros Turn a macros string into a list of tuples of macros
""" """
response = [] response = []
@ -142,11 +203,34 @@ def get_macros(input):
if macro[0] == "source": if macro[0] == "source":
with open(macro[1]) as file: with open(macro[1]) as file:
response += get_macros(file.read()) response += get_macros(file.read())
if len(macro) == 2 or len(macro) == 3: if len(macro) >= 2:
response.append(tuple(macros[index])) response.append(tuple(macros[index]))
return response return response
def is_otf_macro_start(token, line):
"""
Returns true if token is the start of an on the fly macro
"""
match = re.search(r'^\.[A-Za-z0-9]+$', token)
if match is None:
return False
# don't return true you can't find an end token in the line
for line_token in line:
if is_otf_macro_end(line_token):
return match is not None
return False
def is_otf_macro_end(token):
"""
Returns true if token is the end of an on the fly macro
"""
match = re.search(r'(\.,|,\.)', token)
return match is not None
def main(args): def main(args):
""" Entry point for script """ """ Entry point for script """
@ -159,17 +243,13 @@ def main(args):
macros += get_macros(file.read()) macros += get_macros(file.read())
# get tokens (file contents) # get tokens (file contents)
with open(args.input) as file: if args.input == "-":
tokens = tokenize(file.read()) input = sys.stdin.read()
else:
with open(args.input) as file:
input = file.read()
# get output return print(process(input, macros))
output = process(tokens, macros)
# show and save output
with open(args.output, 'w+') as file:
file.write(output)
return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,3 +1,3 @@
#/usr/bin/env sh #/usr/bin/env sh
./pymacro -q tests/test_input tests/test_actual_output cat tests/test_input | ./pymacro > tests/test_actual_output
git diff tests/test_actual_output tests/test_expected_output git diff tests/test_actual_output tests/test_expected_output

View File

@ -10,3 +10,7 @@
1 .he 2 .hes 1 .he 2 .hes
1 .le 2 .les 1 .le 2 .les
1 .ma 2 .mas 1 .ma 2 .mas
This is a test of .otfm on the fly macro.,s!
If this sentence makes sense, then the test of .otfms worked!

View File

@ -30,6 +30,31 @@ This sources macros from its parent and grandparent directories.
This may be useful if you have several related topics in separate folders, This may be useful if you have several related topics in separate folders,
but only some of the macros are shared and some are not. but only some of the macros are shared and some are not.
### on the fly definitions
You can also define macros in the middle of a sentence without having to switch to the macros file.
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
```
# .Hc hydrocarbon
In or
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
.Ocs are compounds which contain .c .h bonds.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
Organic compounds are compounds which contain carbon hydrogen bonds.
```
## plurals ## plurals
Say you only have the following macro definitions: Say you only have the following macro definitions:
@ -40,8 +65,8 @@ And the file `test.md`:
```markdown ```markdown
# .Hc # .Hc
In organic chemistry, a .hc is an organic compound consisting entirely of In organic chemistry, a .hc is an organic compound consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides. hydrogen and carbon. .Hcs are examples of group 14 hydrides.
``` ```
The output would be the following: The output would be the following:
@ -49,14 +74,14 @@ The output would be the following:
```markdown ```markdown
# Hydrocarbon # Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
``` ```
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
defined. This helps you avoid adding the plural forms of words. defined. This helps you avoid adding the plural forms of words.
### other default plurals ### other default plurals
There are also other default plurals: There are also other default plurals:
@ -102,12 +127,12 @@ plural version at all and forgo defining a custom plural altogether.
## capitalization ## capitalization
- Capitalizing the first character of a macro makes the first letter of the word - Capitalizing the first character of a macro makes the first letter of the word
also capitalized. also capitalized.
- Capitalizing every letter makes the whole word capitalized also, except if the - Capitalizing every letter makes the whole word capitalized also, except if the
shortcut is only one word long. shortcut is only one word long.
- For this reason, you may not want to use one letter definitions. - For this reason, you may not want to use one letter definitions.
- This also means that macro definitions are case insensitive and are completely - This also means that macro definitions are case insensitive and are completely
ignored. ignored.
Say you have the following macros: Say you have the following macros: