mirror of
https://github.com/alvierahman90/otfmacros.git
synced 2024-12-15 12:01:59 +00:00
add on the fly macros
This commit is contained in:
parent
8792f4df49
commit
cc27e52bb1
@ -2,7 +2,7 @@
|
|||||||
.hy hydrogen
|
.hy hydrogen
|
||||||
.ca carbon
|
.ca carbon
|
||||||
.ox oxygen
|
.ox oxygen
|
||||||
:wink: 😉
|
.wink 😉
|
||||||
source tests/test_macros_biology
|
source tests/test_macros_biology
|
||||||
source tests/test_macros_custom_plurals
|
source tests/test_macros_custom_plurals
|
||||||
source tests/test_macros_plural
|
source tests/test_macros_plural
|
||||||
|
144
pymacro/pymacro
144
pymacro/pymacro
@ -4,6 +4,16 @@ import sys
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class MultipleTokens():
|
||||||
|
"""
|
||||||
|
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
|
||||||
|
without changing the indexes of other tokens
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, words):
|
||||||
|
self.words = words
|
||||||
|
|
||||||
|
|
||||||
def get_args():
|
def get_args():
|
||||||
""" Get command line arguments """
|
""" Get command line arguments """
|
||||||
|
|
||||||
@ -11,10 +21,8 @@ def get_args():
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-m", "--macros", default=["macros"], action="append",
|
parser.add_argument("-m", "--macros", default=["macros"], action="append",
|
||||||
help="Extra files where macros are stored")
|
help="Extra files where macros are stored")
|
||||||
parser.add_argument("-q", "--quiet", default=False, action="store_true",
|
parser.add_argument("-i", "--input", help="The file to be processed", default="-")
|
||||||
help="Don't output to stdout")
|
parser.add_argument("-o", "--output", help="The location of the output", default="-")
|
||||||
parser.add_argument("input", help="The file to be processed")
|
|
||||||
parser.add_argument("output", help="The location of the output")
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -75,16 +83,19 @@ def upper_check(token, word):
|
|||||||
return word
|
return word
|
||||||
|
|
||||||
|
|
||||||
def process(tokens, macros):
|
def process(input, macros):
|
||||||
|
tokens, otf_macros = tokenize(input)
|
||||||
output = tokens
|
output = tokens
|
||||||
|
macros = otf_macros + macros
|
||||||
|
|
||||||
for line_number, line in enumerate(tokens):
|
for line_number, line in enumerate(tokens):
|
||||||
for token_number, token in enumerate(line):
|
for token_number, token in enumerate(line):
|
||||||
if len(token) == 0:
|
if len(token) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# cutting off the end and then adding it back once expanded
|
||||||
# cutting of the end and then adding it back once expanded
|
# e.g. punctuation: from the token "hello...", end would be equal to "..."
|
||||||
|
# and token would be equal to "hello"
|
||||||
end = []
|
end = []
|
||||||
token = list(token)
|
token = list(token)
|
||||||
for index, char in reversed(list(enumerate(token))):
|
for index, char in reversed(list(enumerate(token))):
|
||||||
@ -108,29 +119,79 @@ def process(tokens, macros):
|
|||||||
break
|
break
|
||||||
|
|
||||||
output[line_number][token_number] = upper_check(token, value)
|
output[line_number][token_number] = upper_check(token, value)
|
||||||
|
|
||||||
# re adding what was trimmed off
|
|
||||||
output[line_number][token_number] += end
|
output[line_number][token_number] += end
|
||||||
|
|
||||||
|
return detokenize(output)
|
||||||
|
|
||||||
|
def tokenize(input):
|
||||||
|
"""
|
||||||
|
Returns a 2D list of tokens and a list of otf_macros.
|
||||||
|
otf macro definitions are removed and just the keyword definition is kept as well as any
|
||||||
|
punctuation on the final word.
|
||||||
|
"""
|
||||||
|
tokens = [x.split(' ') for x in input.split('\n')]
|
||||||
|
|
||||||
|
otf_macros = []
|
||||||
|
in_otf_macro = False
|
||||||
|
tmp_macro_keyword = None
|
||||||
|
tmp_macro_definition = []
|
||||||
|
|
||||||
|
# find and extract on the fly macros
|
||||||
|
for line_index, line in enumerate(tokens):
|
||||||
|
for token_index, token in enumerate(line):
|
||||||
|
token_is_otf_macro_start = is_otf_macro_start(token, line)
|
||||||
|
in_otf_macro = token_is_otf_macro_start or in_otf_macro
|
||||||
|
|
||||||
|
if token_is_otf_macro_start:
|
||||||
|
tmp_macro_keyword = token
|
||||||
|
tmp_macro_definition = []
|
||||||
|
tokens[line_index][token_index] = None
|
||||||
|
continue
|
||||||
|
elif in_otf_macro and is_otf_macro_end(token):
|
||||||
|
split_token = re.split(r',.|.,', token)
|
||||||
|
tmp_macro_definition.append(split_token[0])
|
||||||
|
tokens[line_index][token_index] = tmp_macro_keyword + split_token[1]
|
||||||
|
otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition)))
|
||||||
|
in_otf_macro = False
|
||||||
|
continue
|
||||||
|
elif in_otf_macro:
|
||||||
|
tmp_macro_definition.append(token)
|
||||||
|
tokens[line_index][token_index] = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
# filter out None tokens
|
||||||
|
tokens[line_index] = [token for token in tokens[line_index] if token is not None]
|
||||||
|
|
||||||
|
return tokens, otf_macros
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def detokenize(tokens):
|
||||||
|
"""Turn a list of tokens into plaintext. """
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
for index, line in enumerate(tokens):
|
||||||
|
output.append([])
|
||||||
|
for token in line:
|
||||||
|
if isinstance(token, MultipleTokens):
|
||||||
|
for word in token.words:
|
||||||
|
output[index].append(word)
|
||||||
|
elif isinstance(token, str):
|
||||||
|
output[index].append(token)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown token type: {type(token)}")
|
||||||
|
|
||||||
for line_number, line in enumerate(output):
|
for line_number, line in enumerate(output):
|
||||||
output[line_number] = ' '.join(line)
|
output[line_number] = ' '.join(line)
|
||||||
|
|
||||||
output = '\n'.join(output)
|
return '\n'.join(output)
|
||||||
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
def tokenize(input):
|
|
||||||
"""
|
|
||||||
Return of list of tokens from string (convert file contents to format to be
|
|
||||||
processed by `process`
|
|
||||||
"""
|
|
||||||
return [x.split(' ') for x in input.split('\n')]
|
|
||||||
|
|
||||||
|
|
||||||
def get_macros(input):
|
def get_macros(input):
|
||||||
"""
|
"""
|
||||||
Turn a string into a list of tuples of macros
|
Turn a macros string into a list of tuples of macros
|
||||||
"""
|
"""
|
||||||
response = []
|
response = []
|
||||||
|
|
||||||
@ -142,11 +203,34 @@ def get_macros(input):
|
|||||||
if macro[0] == "source":
|
if macro[0] == "source":
|
||||||
with open(macro[1]) as file:
|
with open(macro[1]) as file:
|
||||||
response += get_macros(file.read())
|
response += get_macros(file.read())
|
||||||
if len(macro) == 2 or len(macro) == 3:
|
if len(macro) >= 2:
|
||||||
response.append(tuple(macros[index]))
|
response.append(tuple(macros[index]))
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def is_otf_macro_start(token, line):
|
||||||
|
"""
|
||||||
|
Returns true if token is the start of an on the fly macro
|
||||||
|
"""
|
||||||
|
match = re.search(r'^\.[A-Za-z0-9]+$', token)
|
||||||
|
if match is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# don't return true you can't find an end token in the line
|
||||||
|
for line_token in line:
|
||||||
|
if is_otf_macro_end(line_token):
|
||||||
|
return match is not None
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_otf_macro_end(token):
|
||||||
|
"""
|
||||||
|
Returns true if token is the end of an on the fly macro
|
||||||
|
"""
|
||||||
|
match = re.search(r'(\.,|,\.)', token)
|
||||||
|
return match is not None
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
""" Entry point for script """
|
""" Entry point for script """
|
||||||
@ -159,17 +243,13 @@ def main(args):
|
|||||||
macros += get_macros(file.read())
|
macros += get_macros(file.read())
|
||||||
|
|
||||||
# get tokens (file contents)
|
# get tokens (file contents)
|
||||||
with open(args.input) as file:
|
if args.input == "-":
|
||||||
tokens = tokenize(file.read())
|
input = sys.stdin.read()
|
||||||
|
else:
|
||||||
|
with open(args.input) as file:
|
||||||
|
input = file.read()
|
||||||
|
|
||||||
# get output
|
return print(process(input, macros))
|
||||||
output = process(tokens, macros)
|
|
||||||
|
|
||||||
# show and save output
|
|
||||||
with open(args.output, 'w+') as file:
|
|
||||||
file.write(output)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
#/usr/bin/env sh
|
#/usr/bin/env sh
|
||||||
./pymacro -q tests/test_input tests/test_actual_output
|
cat tests/test_input | ./pymacro > tests/test_actual_output
|
||||||
git diff tests/test_actual_output tests/test_expected_output
|
git diff tests/test_actual_output tests/test_expected_output
|
||||||
|
@ -10,3 +10,7 @@
|
|||||||
1 .he 2 .hes
|
1 .he 2 .hes
|
||||||
1 .le 2 .les
|
1 .le 2 .les
|
||||||
1 .ma 2 .mas
|
1 .ma 2 .mas
|
||||||
|
|
||||||
|
This is a test of .otfm on the fly macro.,s!
|
||||||
|
|
||||||
|
If this sentence makes sense, then the test of .otfms worked!
|
||||||
|
25
syntax.md
25
syntax.md
@ -30,6 +30,31 @@ This sources macros from its parent and grandparent directories.
|
|||||||
This may be useful if you have several related topics in separate folders,
|
This may be useful if you have several related topics in separate folders,
|
||||||
but only some of the macros are shared and some are not.
|
but only some of the macros are shared and some are not.
|
||||||
|
|
||||||
|
### on the fly definitions
|
||||||
|
|
||||||
|
You can also define macros in the middle of a sentence without having to switch to the macros file.
|
||||||
|
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
|
||||||
|
|
||||||
|
```
|
||||||
|
# .Hc hydrocarbon
|
||||||
|
In or
|
||||||
|
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
|
||||||
|
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||||
|
|
||||||
|
|
||||||
|
.Ocs are compounds which contain .c .h bonds.
|
||||||
|
```
|
||||||
|
|
||||||
|
The output would be the following:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# Hydrocarbon
|
||||||
|
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||||
|
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||||
|
|
||||||
|
Organic compounds are compounds which contain carbon hydrogen bonds.
|
||||||
|
```
|
||||||
|
|
||||||
## plurals
|
## plurals
|
||||||
|
|
||||||
Say you only have the following macro definitions:
|
Say you only have the following macro definitions:
|
||||||
|
Loading…
Reference in New Issue
Block a user