diff --git a/pymacro/macros b/pymacro/macros deleted file mode 100644 index 63cb96e..0000000 --- a/pymacro/macros +++ /dev/null @@ -1,8 +0,0 @@ -.hc hydrocarbon -.hy hydrogen -.ca carbon -.ox oxygen -.wink 😉 -source tests/test_macros_biology -source tests/test_macros_custom_plurals -source tests/test_macros_plural diff --git a/pymacro/pymacro b/pymacro/pymacro deleted file mode 100755 index d160036..0000000 --- a/pymacro/pymacro +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import re - - -class MultipleTokens(): - """ - Used by process() to tell detokenize() that a macro adds extra tokens without modifying - without changing the indexes of other tokens - """ - - def __init__(self, words): - self.words = words - - -def get_args(): - """ Get command line arguments """ - - import argparse - parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("-m", "--macros-file", default="macros", - help="File where macros are stored") - parser.add_argument("-i", "--input", help="File to be processed.", default="-") - parser.add_argument("-o", "--output", help="Path of output", default="-") - return parser.parse_args() - - -def pluralize(input): - """ Returns the plural form of a word. """ - if isinstance(input, list): - # use custom plural if defined - if len(input) > 1: - return input[1] - - return pluralize_word(input[0]) - - return pluralize_word(input) - -def pluralize_word(word): - def is_vowel(letter): - if not isinstance(letter, str): - raise ValueError("Argument 'letter' must be type str") - if len(letter) != 1: - raise ValueError("Argument 'letter' must be 1 long") - return letter in 'aeiou' - - # TODO add more complex plural forms - if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']: - return word + 'es' - if word[-1] == 'y': - if not is_vowel(word[-2]): - return word[:-1] + 'ies' - if word[-1] == 'o': - if not is_vowel(word[-2]): - return word + 'es' - if word[-1] == 'f': - return word[:-1] + 'ves' - return word + 's' - - -def upper_check(token, word): - """ Check if word needs to be capitalized and capitalise appropriately if that is the case. """ - all_caps = True - - for letter in token: - if letter.islower(): - all_caps = False - break - - if all_caps: - return word.upper() - - if len(token) > 1: - if token[1].isupper(): - return word[:1].upper() + word[1:] - - return word - - -def process(input, macros): - """ - This function takes the string `input` and a dict, ` macros`. - It substitutes any keys in `macro` with the corresponding value. - It also checks for any otf macros defined in the string and appends them to `macros`, - replacing that otf macro and any following instances of it. - It returns the substituted string. - """ - tokens = tokenize(input) - macros = macros - - in_otf_macro = False - tmp_macro_keyword = None - tmp_macro_definition = [] - - for line_number, line in enumerate(tokens): - for token_number, token in enumerate(line): - if len(token) == 0: - continue - - # detect on the fly macros - token_is_otf_macro_start = is_otf_macro_start(token, line) - - # process otf macro tokens - if token_is_otf_macro_start: - tmp_macro_keyword = token - in_otf_macro = True - tmp_macro_definition = [] - tokens[line_number][token_number] = None - continue - elif in_otf_macro and is_otf_macro_end(token): - split_token = re.split(r',.|.,', token) - tmp_macro_definition.append(split_token[0]) - macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition) - token = tmp_macro_keyword + split_token[1] - in_otf_macro = False - # once the end of the macro has been found and stored, continue downn the for loop - # so that it can be turned back to normal text - elif in_otf_macro: - tmp_macro_definition.append(token) - tokens[line_number][token_number] = None - continue - - # cutting off the end and then adding it back once expanded - # e.g. punctuation: from the token "hello...", end would be equal to "..." - # and token would be equal to "hello" - end = [] - token = list(token) - for index, char in reversed(list(enumerate(token))): - if not char.isalnum(): - end.insert(0, token.pop(index)) - else: - break - end = ''.join(end) - token = ''.join(token) - - # if no macro is found (or if it is not a macro at all, the value - # will not be changed - value = token - - if token.lower() in macros.keys(): - value = macros[token.lower()][0] - elif token.lower() in [f"{m}s" for m in macros.keys()]: - value = pluralize(macros[token.lower()[:-1]]) - - tokens[line_number][token_number] = upper_check(token, value) - tokens[line_number][token_number] += end - - # filter out None tokens - tokens[line_number] = [token for token in tokens[line_number] if token is not None] - - return detokenize(tokens) - -def tokenize(input): - """ - Returns a 2D list of tokens and a list of otf_macros. - otf macro definitions are removed and just the keyword definition is kept as well as any - punctuation on the final word. - """ - return [x.split(' ') for x in input.split('\n')] - - -def detokenize(tokens): - """Turn a list of tokens into plaintext. """ - - output = [] - - for index, line in enumerate(tokens): - output.append([]) - for token in line: - if isinstance(token, MultipleTokens): - for word in token.words: - output[index].append(word) - elif isinstance(token, str): - output[index].append(token) - else: - raise ValueError(f"Unknown token type: {type(token)}") - - for line_number, line in enumerate(output): - output[line_number] = ' '.join(line) - - return '\n'.join(output) - - -def get_macros(input, child=False): - """ Turn a macros string into a list of tuples of macros """ - response = {} - - # turn input into list of tuples - macros = [re.split('[\t]', x) for x in input.split('\n')] - - # check if keyword is `source`, get macros from sourced file if it is - for index, macro in enumerate(macros): - if macro[0] == "source": - with open(macro[1]) as file: - macros += get_macros(file.read(), child=True) - macros[index] = () - - if child: - return macros - - # store macros as dict and return - for index, macro in enumerate(macros): - if len(macro) >= 2: - response[macro[0].lower()] = macro[1:] - return response - -def is_otf_macro_start(token, line): - """ Returns true if token is the start of an on the fly macro """ - match = re.search(r'^\.[A-Za-z0-9]+$', token) - if match is None: - return False - - # don't return true you can't find an end token in the line - for line_token in line: - if is_otf_macro_end(line_token): - return match is not None - - return False - - -def is_otf_macro_end(token): - """ Returns true if token is the end of an on the fly macro """ - match = re.search(r'(\.,|,\.)', f"{token}") - return match is not None - - -def main(args): - """ Entry point for script """ - - # get macros - - with open(args.macros_file) as file: - macros = get_macros(file.read()) - - # get tokens (file contents) - if args.input == "-": - input = sys.stdin.read() - else: - with open(args.input) as file: - input = file.read() - - return print(process(input, macros)) - - -if __name__ == '__main__': - try: - sys.exit(main(get_args())) - except KeyboardInterrupt: - sys.exit(0) diff --git a/pymacro/readme.md b/pymacro/readme.md deleted file mode 100644 index 8a1afec..0000000 --- a/pymacro/readme.md +++ /dev/null @@ -1,24 +0,0 @@ -# pymacro - -A python implementation of the macros spec - -## usage -``` -$ ./pymacro -h -usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT] - -optional arguments: - -h, --help show this help message and exit - -m MACROS_FILE, --macros-file MACROS_FILE - File where macros are stored (default: macros) - -i INPUT, --input INPUT - File to be processed. (default: -) - -o OUTPUT, --output OUTPUT - Path of output (default: -) -``` - -## testing - -Run `test.sh`. -A `diff` is run on the actual output against what should have come out according -to the spec. diff --git a/pymacro/test.sh b/pymacro/test.sh deleted file mode 100755 index 5e7523b..0000000 --- a/pymacro/test.sh +++ /dev/null @@ -1,3 +0,0 @@ -#/usr/bin/env sh -cat tests/test_input | ./pymacro > tests/test_actual_output -git diff tests/test_actual_output tests/test_expected_output diff --git a/pymacro/tests/test_expected_output b/pymacro/tests/test_expected_output deleted file mode 100644 index 00f1abf..0000000 --- a/pymacro/tests/test_expected_output +++ /dev/null @@ -1,12 +0,0 @@ -Hydrocarbons are composed of exclusively hydrogen and carbon. - -Chlorophyll is the site of photosynthesis. - -😉 - -1 hydrocarbon 2 hydrocarbons -1 dress 2 dresses -1 story 2 stories -1 hero 2 heroes -1 leaf 2 leaves -1 man 2 men diff --git a/pymacro/tests/test_input b/pymacro/tests/test_input deleted file mode 100644 index babbe4d..0000000 --- a/pymacro/tests/test_input +++ /dev/null @@ -1,21 +0,0 @@ -.Hcs are composed of exclusively .hy and .ca. - -.Chl is the site of .ps. - -.wink - -1 .hc 2 .hcs -1 .dr 2 .drs -1 .st 2 .sts -1 .he 2 .hes -1 .le 2 .les -1 .ma 2 .mas - -This is a test of .otfm on the fly macro.,s! - -If this sentence makes sense, then the test of .otfms worked! - -.otfms can also be overwritten, -you could make it equal .otfm on the fly monkey.,s! - -They're not just any monkeys, they're .otfms! diff --git a/pymacro/tests/test_macros_biology b/pymacro/tests/test_macros_biology deleted file mode 100644 index 3d40842..0000000 --- a/pymacro/tests/test_macros_biology +++ /dev/null @@ -1,2 +0,0 @@ -.chl chlorophyll -.ps photosynthesis diff --git a/pymacro/tests/test_macros_custom_plurals b/pymacro/tests/test_macros_custom_plurals deleted file mode 100644 index 01e66e6..0000000 --- a/pymacro/tests/test_macros_custom_plurals +++ /dev/null @@ -1 +0,0 @@ -.l louse lice diff --git a/pymacro/tests/test_macros_plural b/pymacro/tests/test_macros_plural deleted file mode 100644 index 80b1b34..0000000 --- a/pymacro/tests/test_macros_plural +++ /dev/null @@ -1,6 +0,0 @@ -.hc hydrocarbon -.dr dress -.st story -.he hero -.le leaf -.ma man men diff --git a/readme.md b/readme.md index ccc6265..df188c4 100644 --- a/readme.md +++ b/readme.md @@ -1,4 +1,8 @@ -# macros +# otfmacros + +on the fly macros + +--- A syntax/specification for defining macros for any sort of text file. The goal of this is to be minimal but intuitive and with enough features to make @@ -16,8 +20,174 @@ overly relying on pronouns if it would make the writing less clear. ## syntax -The syntax can be found [here](syntax.md) +How to define macros and what not to define. -## python implementation +### definition of simple macros +Macros are defined in a separate file, by default called `macros`. +You can add additional macro files through the command line options. -A python implementation can be found [here](pymacro/) +Examples of macro definitions: + +``` +.hc hydrocarbon +.h hydrogen +.c carbon +``` + +#### external definitions + +You can also `source` macros from another macro file: + +``` +source ../macros +source ../../macros +.hc hydrocarbons +.h hydrogen +.c +``` + +This sources macros from its parent and grandparent directories. +This may be useful if you have several related topics in separate folders, +but only some of the macros are shared and some are not. + +#### on the fly definitions + +You can also define macros in the middle of a sentence without having to switch to the macros file. +The last word of multi-word macros must end with `.,` or `,.` and end on the same line. + +``` +# .Hc hydrocarbon +In or +In organic chemistry, a .hc is an .oc organic compound., consisting entirely of +hydrogen and carbon. .Hcs are examples of group 14 hydrides. + + +.Ocs are compounds which contain .c .h bonds. +``` + +The output would be the following: + +```markdown +# Hydrocarbon +In organic chemistry, a hydrocarbon is an organic compound consisting entirely +of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. + +Organic compounds are compounds which contain carbon hydrogen bonds. +``` + +Otf macros can be overwritten later in the document. +They also take precedence of any macros defined in the `macros` file. + +### plurals + +Say you only have the following macro definitions: + +`.hc hydrocarbon` + +And the file `test.md`: + +```markdown +# .Hc +In organic chemistry, a .hc is an organic compound consisting entirely of +hydrogen and carbon. .Hcs are examples of group 14 hydrides. +``` + +The output would be the following: + +```markdown +# Hydrocarbon +In organic chemistry, a hydrocarbon is an organic compound consisting entirely +of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. +``` + +Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly +defined. This helps you avoid adding the plural forms of words. + + +#### default plurals + +There are also other default plurals: + +Ending | Example | Formation | Plural +--- | --- | --- | --- +`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses +`[consonant]y` | story | change `y` to `ies` | stories +`[consonant]o` | macro | add `s` | macros +`[vowel]o` | hero | add `es` | heroes +`f` | leaf | change `f` to `ves` | leaves + + +#### custom plurals +You can also define your own custom plurals: + +`macros` +``` +.s salmon salmon +.m man men +.l louse lice +``` + +Input: +``` +Hello boys and girls, my name is Fat Lip +and this is my friend Sammy the .S. +(What 'do?) +Today, we're going to teach you some fun new facts about .ss +and a brand new dance. +``` + +Output: +``` +Hello boys and girls, my name is Fat Lip +and this is my friend Sammy the Salmon. +(What 'do?) +Today, we're going to teach you some fun new facts about salmon +and a brand new dance. +``` + +Note: for plurals which do not change it's probably easier to just not use the +plural version at all and forgo defining a custom plural altogether. + +### capitalization + +- Capitalizing the first character of a macro makes the first letter of the word +also capitalized. +- Capitalizing every letter makes the whole word capitalized also, except if the +shortcut is only one word long. +- For this reason, you may not want to use one letter definitions. +- This also means that macro definitions are case insensitive and are completely +ignored. + +Say you have the following macros: + +``` +.hc hydrocarbon +.h hydrogen +.c carbon +``` + +Input: +``` +.hc +.Hc +.HC +.h +.H +.c +.C +``` + +Output: +``` +hydrocarbon +Hydrocarbon +HYDROCARBON +hydrogen +Hydrogen +carbon +Carbon +``` + +## python preprocessor + +An implementation of this that I've written is [otfm-python](https://github.com/alvierahman90/otfm-python). diff --git a/syntax.md b/syntax.md deleted file mode 100644 index 7fec5dd..0000000 --- a/syntax.md +++ /dev/null @@ -1,170 +0,0 @@ -# syntax - -How to define macros and what not to define. - -## definition of simple macros -Macros are defined in a separate file, by default called `macros`. -You can add additional macro files through the command line options. - -Examples of macro definitions: - -``` -.hc hydrocarbon -.h hydrogen -.c carbon -``` - -### external definitions - -You can also `source` macros from another macro file: - -``` -source ../macros -source ../../macros -.hc hydrocarbons -.h hydrogen -.c -``` - -This sources macros from its parent and grandparent directories. -This may be useful if you have several related topics in separate folders, -but only some of the macros are shared and some are not. - -### on the fly definitions - -You can also define macros in the middle of a sentence without having to switch to the macros file. -The last word of multi-word macros must end with `.,` or `,.` and end on the same line. - -``` -# .Hc hydrocarbon -In or -In organic chemistry, a .hc is an .oc organic compound., consisting entirely of -hydrogen and carbon. .Hcs are examples of group 14 hydrides. - - -.Ocs are compounds which contain .c .h bonds. -``` - -The output would be the following: - -```markdown -# Hydrocarbon -In organic chemistry, a hydrocarbon is an organic compound consisting entirely -of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. - -Organic compounds are compounds which contain carbon hydrogen bonds. -``` - -Otf macros can be overwritten later in the document. -They also take precedence of any macros defined in the `macros` file. - -## plurals - -Say you only have the following macro definitions: - -`.hc hydrocarbon` - -And the file `test.md`: - -```markdown -# .Hc -In organic chemistry, a .hc is an organic compound consisting entirely of -hydrogen and carbon. .Hcs are examples of group 14 hydrides. -``` - -The output would be the following: - -```markdown -# Hydrocarbon -In organic chemistry, a hydrocarbon is an organic compound consisting entirely -of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides. -``` - -Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly -defined. This helps you avoid adding the plural forms of words. - - -### other default plurals - -There are also other default plurals: - -Ending | Example | Formation | Plural ---- | --- | --- | --- -`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses -`[consonant]y` | story | change `y` to `ies` | stories -`[consonant]o` | macro | add `s` | macros -`[vowel]o` | hero | add `es` | heroes -`f` | leaf | change `f` to `ves` | leaves - - -### custom plurals -You can also define your own custom plurals: - -`macros` -``` -.s salmon salmon -.m man men -.l louse lice -``` - -Input: -``` -Hello boys and girls, my name is Fat Lip -and this is my friend Sammy the .S. -(What 'do?) -Today, we're going to teach you some fun new facts about .ss -and a brand new dance. -``` - -Output: -``` -Hello boys and girls, my name is Fat Lip -and this is my friend Sammy the Salmon. -(What 'do?) -Today, we're going to teach you some fun new facts about salmon -and a brand new dance. -``` - -Note: for plurals which do not change it's probably easier to just not use the -plural version at all and forgo defining a custom plural altogether. - -## capitalization - -- Capitalizing the first character of a macro makes the first letter of the word -also capitalized. -- Capitalizing every letter makes the whole word capitalized also, except if the -shortcut is only one word long. -- For this reason, you may not want to use one letter definitions. -- This also means that macro definitions are case insensitive and are completely -ignored. - -Say you have the following macros: - -``` -.hc hydrocarbon -.h hydrogen -.c carbon -``` - -Input: -``` -.hc -.Hc -.HC -.h -.H -.c -.C -``` - -Output: -``` -hydrocarbon -Hydrocarbon -HYDROCARBON -hydrogen -Hydrogen -carbon -Carbon -``` - diff --git a/test.sh b/test.sh deleted file mode 100755 index e0f60b4..0000000 --- a/test.sh +++ /dev/null @@ -1,4 +0,0 @@ -#/usr/bin/env sh -./pymacro -sm tests/test_macros_plural -m tests/test_macros_biology tests/test_input tests/test_actual_output -diff tests/test_actual_output tests/test_expected_output -