mirror of
				https://github.com/alvierahman90/otfmacros.git
				synced 2025-10-31 07:10:14 +00:00 
			
		
		
		
	add on the fly macros
This commit is contained in:
		| @@ -2,7 +2,7 @@ | |||||||
| .hy	hydrogen | .hy	hydrogen | ||||||
| .ca	carbon | .ca	carbon | ||||||
| .ox	oxygen | .ox	oxygen | ||||||
| :wink:	😉 | .wink	😉 | ||||||
| source	tests/test_macros_biology | source	tests/test_macros_biology | ||||||
| source	tests/test_macros_custom_plurals | source	tests/test_macros_custom_plurals | ||||||
| source	tests/test_macros_plural | source	tests/test_macros_plural | ||||||
|   | |||||||
							
								
								
									
										142
									
								
								pymacro/pymacro
									
									
									
									
									
								
							
							
						
						
									
										142
									
								
								pymacro/pymacro
									
									
									
									
									
								
							| @@ -4,6 +4,16 @@ import sys | |||||||
| import re | import re | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MultipleTokens(): | ||||||
|  |     """ | ||||||
|  |     Used by process() to tell detokenize() that a macro adds extra tokens without modifying | ||||||
|  |     without changing the indexes of other tokens | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def __init__(self, words): | ||||||
|  |         self.words  = words | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_args(): | def get_args(): | ||||||
|     """ Get command line arguments """ |     """ Get command line arguments """ | ||||||
|  |  | ||||||
| @@ -11,10 +21,8 @@ def get_args(): | |||||||
|     parser = argparse.ArgumentParser() |     parser = argparse.ArgumentParser() | ||||||
|     parser.add_argument("-m", "--macros", default=["macros"], action="append", |     parser.add_argument("-m", "--macros", default=["macros"], action="append", | ||||||
|                         help="Extra files where macros are stored") |                         help="Extra files where macros are stored") | ||||||
|     parser.add_argument("-q", "--quiet", default=False, action="store_true", |     parser.add_argument("-i", "--input", help="The file to be processed", default="-") | ||||||
|                         help="Don't output to stdout") |     parser.add_argument("-o", "--output", help="The location of the output", default="-") | ||||||
|     parser.add_argument("input", help="The file to be processed") |  | ||||||
|     parser.add_argument("output", help="The location of the output") |  | ||||||
|     return parser.parse_args() |     return parser.parse_args() | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -75,16 +83,19 @@ def upper_check(token, word): | |||||||
|     return word |     return word | ||||||
|  |  | ||||||
|  |  | ||||||
| def process(tokens, macros): | def process(input, macros): | ||||||
|  |     tokens, otf_macros = tokenize(input) | ||||||
|     output = tokens |     output = tokens | ||||||
|  |     macros = otf_macros + macros | ||||||
|  |  | ||||||
|     for line_number, line in enumerate(tokens): |     for line_number, line in enumerate(tokens): | ||||||
|         for token_number, token in enumerate(line): |         for token_number, token in enumerate(line): | ||||||
|             if len(token) == 0: |             if len(token) == 0: | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|              |             # cutting off the end and then adding it back once expanded | ||||||
|             # cutting of the end and then adding it back once expanded |             # e.g. punctuation: from the token "hello...", end would be equal to "..." | ||||||
|  |             #      and token would be equal to "hello" | ||||||
|             end = [] |             end = [] | ||||||
|             token = list(token) |             token = list(token) | ||||||
|             for index, char in reversed(list(enumerate(token))): |             for index, char in reversed(list(enumerate(token))): | ||||||
| @@ -108,29 +119,79 @@ def process(tokens, macros): | |||||||
|                     break |                     break | ||||||
|  |  | ||||||
|             output[line_number][token_number] = upper_check(token, value) |             output[line_number][token_number] = upper_check(token, value) | ||||||
|  |  | ||||||
|             # re adding what was trimmed off |  | ||||||
|             output[line_number][token_number] += end |             output[line_number][token_number] += end | ||||||
|  |  | ||||||
|  |     return detokenize(output) | ||||||
|  |  | ||||||
|  | def tokenize(input): | ||||||
|  |     """ | ||||||
|  |     Returns a 2D list of tokens and a list of otf_macros. | ||||||
|  |     otf macro definitions are removed and just the keyword definition is kept as well as any | ||||||
|  |     punctuation on the final word. | ||||||
|  |     """ | ||||||
|  |     tokens =  [x.split(' ') for x in input.split('\n')] | ||||||
|  |  | ||||||
|  |     otf_macros = [] | ||||||
|  |     in_otf_macro = False | ||||||
|  |     tmp_macro_keyword = None | ||||||
|  |     tmp_macro_definition = [] | ||||||
|  |  | ||||||
|  |     # find and extract on the fly macros | ||||||
|  |     for line_index, line in enumerate(tokens): | ||||||
|  |         for token_index, token in enumerate(line): | ||||||
|  |             token_is_otf_macro_start = is_otf_macro_start(token, line) | ||||||
|  |             in_otf_macro = token_is_otf_macro_start or in_otf_macro | ||||||
|  |  | ||||||
|  |             if token_is_otf_macro_start: | ||||||
|  |                 tmp_macro_keyword = token | ||||||
|  |                 tmp_macro_definition = [] | ||||||
|  |                 tokens[line_index][token_index] = None | ||||||
|  |                 continue | ||||||
|  |             elif in_otf_macro and is_otf_macro_end(token): | ||||||
|  |                 split_token = re.split(r',.|.,', token) | ||||||
|  |                 tmp_macro_definition.append(split_token[0]) | ||||||
|  |                 tokens[line_index][token_index] = tmp_macro_keyword + split_token[1] | ||||||
|  |                 otf_macros.append((tmp_macro_keyword, ' '.join(tmp_macro_definition))) | ||||||
|  |                 in_otf_macro = False | ||||||
|  |                 continue | ||||||
|  |             elif in_otf_macro: | ||||||
|  |                 tmp_macro_definition.append(token) | ||||||
|  |                 tokens[line_index][token_index] = None | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |         # filter out None tokens | ||||||
|  |         tokens[line_index] = [token for token in tokens[line_index] if token is not None] | ||||||
|  |  | ||||||
|  |     return tokens, otf_macros | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def detokenize(tokens): | ||||||
|  |     """Turn a list of tokens into plaintext. """ | ||||||
|  |  | ||||||
|  |     output = [] | ||||||
|  |  | ||||||
|  |     for index, line in enumerate(tokens): | ||||||
|  |         output.append([]) | ||||||
|  |         for token in line: | ||||||
|  |             if isinstance(token, MultipleTokens): | ||||||
|  |                 for word in token.words: | ||||||
|  |                     output[index].append(word) | ||||||
|  |             elif isinstance(token, str): | ||||||
|  |                 output[index].append(token) | ||||||
|  |             else: | ||||||
|  |                 raise ValueError(f"Unknown token type: {type(token)}") | ||||||
|  |  | ||||||
|     for line_number, line in enumerate(output): |     for line_number, line in enumerate(output): | ||||||
|         output[line_number] = ' '.join(line) |         output[line_number] = ' '.join(line) | ||||||
|  |  | ||||||
|     output = '\n'.join(output) |     return  '\n'.join(output) | ||||||
|  |  | ||||||
|     return output |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def tokenize(input): |  | ||||||
|     """ |  | ||||||
|     Return of list of tokens from string (convert file contents to format to be |  | ||||||
|     processed by `process` |  | ||||||
|     """ |  | ||||||
|     return [x.split(' ') for x in input.split('\n')] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_macros(input): | def get_macros(input): | ||||||
|     """ |     """ | ||||||
|     Turn a string into a list of tuples of macros |     Turn a macros string into a list of tuples of macros | ||||||
|     """ |     """ | ||||||
|     response = [] |     response = [] | ||||||
|  |  | ||||||
| @@ -142,11 +203,34 @@ def get_macros(input): | |||||||
|         if macro[0] == "source": |         if macro[0] == "source": | ||||||
|             with open(macro[1]) as file: |             with open(macro[1]) as file: | ||||||
|                 response += get_macros(file.read()) |                 response += get_macros(file.read()) | ||||||
|         if len(macro) == 2 or len(macro) == 3: |         if len(macro) >= 2: | ||||||
|             response.append(tuple(macros[index])) |             response.append(tuple(macros[index])) | ||||||
|  |  | ||||||
|     return response |     return response | ||||||
|  |  | ||||||
|  | def is_otf_macro_start(token, line): | ||||||
|  |     """ | ||||||
|  |     Returns true if token is the start of an on the fly macro | ||||||
|  |     """ | ||||||
|  |     match = re.search(r'^\.[A-Za-z0-9]+$', token) | ||||||
|  |     if match is None: | ||||||
|  |         return False | ||||||
|  |  | ||||||
|  |     # don't return true you can't find an end token in the line | ||||||
|  |     for line_token in line: | ||||||
|  |         if is_otf_macro_end(line_token): | ||||||
|  |             return match is not None | ||||||
|  |      | ||||||
|  |     return False | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def is_otf_macro_end(token): | ||||||
|  |     """ | ||||||
|  |     Returns true if token is the end of an on the fly macro | ||||||
|  |     """ | ||||||
|  |     match = re.search(r'(\.,|,\.)', token) | ||||||
|  |     return match is not None | ||||||
|  |  | ||||||
|  |  | ||||||
| def main(args): | def main(args): | ||||||
|     """ Entry point for script """ |     """ Entry point for script """ | ||||||
| @@ -159,17 +243,13 @@ def main(args): | |||||||
|             macros += get_macros(file.read()) |             macros += get_macros(file.read()) | ||||||
|  |  | ||||||
|     # get tokens (file contents) |     # get tokens (file contents) | ||||||
|  |     if args.input == "-": | ||||||
|  |         input = sys.stdin.read() | ||||||
|  |     else: | ||||||
|         with open(args.input) as file: |         with open(args.input) as file: | ||||||
|         tokens = tokenize(file.read()) |             input = file.read() | ||||||
|  |  | ||||||
|     # get output |     return print(process(input, macros)) | ||||||
|     output = process(tokens, macros) |  | ||||||
|  |  | ||||||
|     # show and save output |  | ||||||
|     with open(args.output, 'w+') as file: |  | ||||||
|         file.write(output) |  | ||||||
|  |  | ||||||
|     return 0 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|   | |||||||
| @@ -1,3 +1,3 @@ | |||||||
| #/usr/bin/env sh | #/usr/bin/env sh | ||||||
| ./pymacro -q tests/test_input tests/test_actual_output | cat tests/test_input | ./pymacro > tests/test_actual_output | ||||||
| git diff tests/test_actual_output tests/test_expected_output | git diff tests/test_actual_output tests/test_expected_output | ||||||
|   | |||||||
| @@ -10,3 +10,7 @@ | |||||||
| 1 .he 2 .hes | 1 .he 2 .hes | ||||||
| 1 .le 2 .les | 1 .le 2 .les | ||||||
| 1 .ma 2 .mas | 1 .ma 2 .mas | ||||||
|  |  | ||||||
|  | This is a test of .otfm on the fly macro.,s! | ||||||
|  |  | ||||||
|  | If this sentence makes sense, then the test of .otfms worked! | ||||||
|   | |||||||
							
								
								
									
										25
									
								
								syntax.md
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								syntax.md
									
									
									
									
									
								
							| @@ -30,6 +30,31 @@ This sources macros from its parent and grandparent directories. | |||||||
| This may be useful if you have several related topics in separate folders, | This may be useful if you have several related topics in separate folders, | ||||||
| but only some of the macros are shared and some are not. | but only some of the macros are shared and some are not. | ||||||
|  |  | ||||||
|  | ### on the fly definitions | ||||||
|  |  | ||||||
|  | You can also define macros in the middle of a sentence without having to switch to the macros file. | ||||||
|  | The last word of multi-word macros must end with `.,`  or `,.` and end on the same line. | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  | # .Hc hydrocarbon | ||||||
|  | In or | ||||||
|  | In organic chemistry, a .hc is an .oc organic compound., consisting entirely of | ||||||
|  | hydrogen and carbon. .Hcs are examples of group 14 hydrides. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | .Ocs are compounds which contain .c .h bonds. | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | The output would be the following: | ||||||
|  |  | ||||||
|  | ```markdown | ||||||
|  | # Hydrocarbon | ||||||
|  | In organic chemistry, a hydrocarbon is an organic compound consisting entirely | ||||||
|  | of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. | ||||||
|  |  | ||||||
|  | Organic compounds are compounds which contain carbon hydrogen bonds. | ||||||
|  | ``` | ||||||
|  |  | ||||||
| ## plurals | ## plurals | ||||||
|  |  | ||||||
| Say you only have the following macro definitions: | Say you only have the following macro definitions: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user