mirror of
https://github.com/alvierahman90/otfmacros.git
synced 2025-01-20 23:53:30 +00:00
add on the fly macros
This commit is contained in:
parent
8792f4df49
commit
cc27e52bb1
@ -2,7 +2,7 @@
|
||||
.hy hydrogen
|
||||
.ca carbon
|
||||
.ox oxygen
|
||||
:wink: 😉
|
||||
.wink 😉
|
||||
source tests/test_macros_biology
|
||||
source tests/test_macros_custom_plurals
|
||||
source tests/test_macros_plural
|
||||
|
144
pymacro/pymacro
144
pymacro/pymacro
@ -4,6 +4,16 @@ import sys
|
||||
import re
|
||||
|
||||
|
||||
class MultipleTokens():
|
||||
"""
|
||||
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
|
||||
without changing the indexes of other tokens
|
||||
"""
|
||||
|
||||
def __init__(self, words):
|
||||
self.words = words
|
||||
|
||||
|
||||
def get_args():
|
||||
""" Get command line arguments """
|
||||
|
||||
@ -11,10 +21,8 @@ def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--macros", default=["macros"], action="append",
|
||||
help="Extra files where macros are stored")
|
||||
parser.add_argument("-q", "--quiet", default=False, action="store_true",
|
||||
help="Don't output to stdout")
|
||||
parser.add_argument("input", help="The file to be processed")
|
||||
parser.add_argument("output", help="The location of the output")
|
||||
parser.add_argument("-i", "--input", help="The file to be processed", default="-")
|
||||
parser.add_argument("-o", "--output", help="The location of the output", default="-")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -75,16 +83,19 @@ def upper_check(token, word):
|
||||
return word
|
||||
|
||||
|
||||
def process(tokens, macros):
|
||||
def process(input, macros):
|
||||
tokens, otf_macros = tokenize(input)
|
||||
output = tokens
|
||||
macros = otf_macros + macros
|
||||
|
||||
for line_number, line in enumerate(tokens):
|
||||
for token_number, token in enumerate(line):
|
||||
if len(token) == 0:
|
||||
continue
|
||||
|
||||
|
||||
# cutting of the end and then adding it back once expanded
|
||||
# cutting off the end and then adding it back once expanded
|
||||
# e.g. punctuation: from the token "hello...", end would be equal to "..."
|
||||
# and token would be equal to "hello"
|
||||
end = []
|
||||
token = list(token)
|
||||
for index, char in reversed(list(enumerate(token))):
|
||||
@ -108,29 +119,79 @@ def process(tokens, macros):
|
||||
break
|
||||
|
||||
output[line_number][token_number] = upper_check(token, value)
|
||||
|
||||
# re adding what was trimmed off
|
||||
output[line_number][token_number] += end
|
||||
|
||||
return detokenize(output)
|
||||
|
||||
def tokenize(input):
|
||||
"""
|
||||
Returns a 2D list of tokens and a list of otf_macros.
|
||||
otf macro definitions are removed and just the keyword definition is kept as well as any
|
||||
punctuation on the final word.
|
||||
"""
|
||||
tokens = [x.split(' ') for x in input.split('\n')]
|
||||
|
||||
otf_macros = []
|
||||
in_otf_macro = False
|
||||
tmp_macro_keyword = None
|
||||
tmp_macro_definition = []
|
||||
|
||||
# find and extract on the fly macros
|
||||
for line_index, line in enumerate(tokens):
|
||||
for token_index, token in enumerate(line):
|
||||
token_is_otf_macro_start = is_otf_macro_start(token, line)
|
||||
in_otf_macro = token_is_otf_macro_start or in_otf_macro
|
||||
|
||||
if token_is_otf_macro_start:
|
||||
tmp_macro_keyword = token
|
||||
tmp_macro_definition = []
|
||||
tokens[line_index][token_index] = None
|
||||
continue
|
||||
elif in_otf_macro and is_otf_macro_end(token):
|
||||
split_token = re.split(r',.|.,', token)
|
||||
tmp_macro_definition.append(split_token[0])
|
||||
tokens[line_index][token_index] = tmp_macro_keyword + split_token[1]
|
||||
otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition)))
|
||||
in_otf_macro = False
|
||||
continue
|
||||
elif in_otf_macro:
|
||||
tmp_macro_definition.append(token)
|
||||
tokens[line_index][token_index] = None
|
||||
continue
|
||||
|
||||
# filter out None tokens
|
||||
tokens[line_index] = [token for token in tokens[line_index] if token is not None]
|
||||
|
||||
return tokens, otf_macros
|
||||
|
||||
|
||||
|
||||
|
||||
def detokenize(tokens):
|
||||
"""Turn a list of tokens into plaintext. """
|
||||
|
||||
output = []
|
||||
|
||||
for index, line in enumerate(tokens):
|
||||
output.append([])
|
||||
for token in line:
|
||||
if isinstance(token, MultipleTokens):
|
||||
for word in token.words:
|
||||
output[index].append(word)
|
||||
elif isinstance(token, str):
|
||||
output[index].append(token)
|
||||
else:
|
||||
raise ValueError(f"Unknown token type: {type(token)}")
|
||||
|
||||
for line_number, line in enumerate(output):
|
||||
output[line_number] = ' '.join(line)
|
||||
|
||||
output = '\n'.join(output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def tokenize(input):
|
||||
"""
|
||||
Return of list of tokens from string (convert file contents to format to be
|
||||
processed by `process`
|
||||
"""
|
||||
return [x.split(' ') for x in input.split('\n')]
|
||||
return '\n'.join(output)
|
||||
|
||||
|
||||
def get_macros(input):
|
||||
"""
|
||||
Turn a string into a list of tuples of macros
|
||||
Turn a macros string into a list of tuples of macros
|
||||
"""
|
||||
response = []
|
||||
|
||||
@ -142,11 +203,34 @@ def get_macros(input):
|
||||
if macro[0] == "source":
|
||||
with open(macro[1]) as file:
|
||||
response += get_macros(file.read())
|
||||
if len(macro) == 2 or len(macro) == 3:
|
||||
if len(macro) >= 2:
|
||||
response.append(tuple(macros[index]))
|
||||
|
||||
return response
|
||||
|
||||
def is_otf_macro_start(token, line):
|
||||
"""
|
||||
Returns true if token is the start of an on the fly macro
|
||||
"""
|
||||
match = re.search(r'^\.[A-Za-z0-9]+$', token)
|
||||
if match is None:
|
||||
return False
|
||||
|
||||
# don't return true you can't find an end token in the line
|
||||
for line_token in line:
|
||||
if is_otf_macro_end(line_token):
|
||||
return match is not None
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_otf_macro_end(token):
|
||||
"""
|
||||
Returns true if token is the end of an on the fly macro
|
||||
"""
|
||||
match = re.search(r'(\.,|,\.)', token)
|
||||
return match is not None
|
||||
|
||||
|
||||
def main(args):
|
||||
""" Entry point for script """
|
||||
@ -159,17 +243,13 @@ def main(args):
|
||||
macros += get_macros(file.read())
|
||||
|
||||
# get tokens (file contents)
|
||||
with open(args.input) as file:
|
||||
tokens = tokenize(file.read())
|
||||
if args.input == "-":
|
||||
input = sys.stdin.read()
|
||||
else:
|
||||
with open(args.input) as file:
|
||||
input = file.read()
|
||||
|
||||
# get output
|
||||
output = process(tokens, macros)
|
||||
|
||||
# show and save output
|
||||
with open(args.output, 'w+') as file:
|
||||
file.write(output)
|
||||
|
||||
return 0
|
||||
return print(process(input, macros))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,3 +1,3 @@
|
||||
#/usr/bin/env sh
|
||||
./pymacro -q tests/test_input tests/test_actual_output
|
||||
cat tests/test_input | ./pymacro > tests/test_actual_output
|
||||
git diff tests/test_actual_output tests/test_expected_output
|
||||
|
@ -10,3 +10,7 @@
|
||||
1 .he 2 .hes
|
||||
1 .le 2 .les
|
||||
1 .ma 2 .mas
|
||||
|
||||
This is a test of .otfm on the fly macro.,s!
|
||||
|
||||
If this sentence makes sense, then the test of .otfms worked!
|
||||
|
39
syntax.md
39
syntax.md
@ -30,6 +30,31 @@ This sources macros from its parent and grandparent directories.
|
||||
This may be useful if you have several related topics in separate folders,
|
||||
but only some of the macros are shared and some are not.
|
||||
|
||||
### on the fly definitions
|
||||
|
||||
You can also define macros in the middle of a sentence without having to switch to the macros file.
|
||||
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
|
||||
|
||||
```
|
||||
# .Hc hydrocarbon
|
||||
In or
|
||||
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
|
||||
|
||||
.Ocs are compounds which contain .c .h bonds.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
|
||||
Organic compounds are compounds which contain carbon hydrogen bonds.
|
||||
```
|
||||
|
||||
## plurals
|
||||
|
||||
Say you only have the following macro definitions:
|
||||
@ -40,8 +65,8 @@ And the file `test.md`:
|
||||
|
||||
```markdown
|
||||
# .Hc
|
||||
In organic chemistry, a .hc is an organic compound consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
In organic chemistry, a .hc is an organic compound consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
@ -49,14 +74,14 @@ The output would be the following:
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
|
||||
defined. This helps you avoid adding the plural forms of words.
|
||||
|
||||
|
||||
### other default plurals
|
||||
### other default plurals
|
||||
|
||||
There are also other default plurals:
|
||||
|
||||
@ -102,12 +127,12 @@ plural version at all and forgo defining a custom plural altogether.
|
||||
## capitalization
|
||||
|
||||
- Capitalizing the first character of a macro makes the first letter of the word
|
||||
also capitalized.
|
||||
also capitalized.
|
||||
- Capitalizing every letter makes the whole word capitalized also, except if the
|
||||
shortcut is only one word long.
|
||||
shortcut is only one word long.
|
||||
- For this reason, you may not want to use one letter definitions.
|
||||
- This also means that macro definitions are case insensitive and are completely
|
||||
ignored.
|
||||
ignored.
|
||||
|
||||
Say you have the following macros:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user