update readme, move python implementation to another repo, merge syntax.md with readme.md, remove test.sh

This commit is contained in:
Akbar Rahman 2021-03-07 14:33:24 +00:00
parent 10d0b76087
commit 5648638fcb
12 changed files with 174 additions and 505 deletions

View File

@ -1,8 +0,0 @@
.hc hydrocarbon
.hy hydrogen
.ca carbon
.ox oxygen
.wink 😉
source tests/test_macros_biology
source tests/test_macros_custom_plurals
source tests/test_macros_plural

View File

@ -1,250 +0,0 @@
#!/usr/bin/env python3
import sys
import re
class MultipleTokens():
"""
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
without changing the indexes of other tokens
"""
def __init__(self, words):
self.words = words
def get_args():
""" Get command line arguments """
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-m", "--macros-file", default="macros",
help="File where macros are stored")
parser.add_argument("-i", "--input", help="File to be processed.", default="-")
parser.add_argument("-o", "--output", help="Path of output", default="-")
return parser.parse_args()
def pluralize(input):
""" Returns the plural form of a word. """
if isinstance(input, list):
# use custom plural if defined
if len(input) > 1:
return input[1]
return pluralize_word(input[0])
return pluralize_word(input)
def pluralize_word(word):
def is_vowel(letter):
if not isinstance(letter, str):
raise ValueError("Argument 'letter' must be type str")
if len(letter) != 1:
raise ValueError("Argument 'letter' must be 1 long")
return letter in 'aeiou'
# TODO add more complex plural forms
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
return word + 'es'
if word[-1] == 'y':
if not is_vowel(word[-2]):
return word[:-1] + 'ies'
if word[-1] == 'o':
if not is_vowel(word[-2]):
return word + 'es'
if word[-1] == 'f':
return word[:-1] + 'ves'
return word + 's'
def upper_check(token, word):
""" Check if word needs to be capitalized and capitalise appropriately if that is the case. """
all_caps = True
for letter in token:
if letter.islower():
all_caps = False
break
if all_caps:
return word.upper()
if len(token) > 1:
if token[1].isupper():
return word[:1].upper() + word[1:]
return word
def process(input, macros):
"""
This function takes the string `input` and a dict, ` macros`.
It substitutes any keys in `macro` with the corresponding value.
It also checks for any otf macros defined in the string and appends them to `macros`,
replacing that otf macro and any following instances of it.
It returns the substituted string.
"""
tokens = tokenize(input)
macros = macros
in_otf_macro = False
tmp_macro_keyword = None
tmp_macro_definition = []
for line_number, line in enumerate(tokens):
for token_number, token in enumerate(line):
if len(token) == 0:
continue
# detect on the fly macros
token_is_otf_macro_start = is_otf_macro_start(token, line)
# process otf macro tokens
if token_is_otf_macro_start:
tmp_macro_keyword = token
in_otf_macro = True
tmp_macro_definition = []
tokens[line_number][token_number] = None
continue
elif in_otf_macro and is_otf_macro_end(token):
split_token = re.split(r',.|.,', token)
tmp_macro_definition.append(split_token[0])
macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition)
token = tmp_macro_keyword + split_token[1]
in_otf_macro = False
# once the end of the macro has been found and stored, continue downn the for loop
# so that it can be turned back to normal text
elif in_otf_macro:
tmp_macro_definition.append(token)
tokens[line_number][token_number] = None
continue
# cutting off the end and then adding it back once expanded
# e.g. punctuation: from the token "hello...", end would be equal to "..."
# and token would be equal to "hello"
end = []
token = list(token)
for index, char in reversed(list(enumerate(token))):
if not char.isalnum():
end.insert(0, token.pop(index))
else:
break
end = ''.join(end)
token = ''.join(token)
# if no macro is found (or if it is not a macro at all, the value
# will not be changed
value = token
if token.lower() in macros.keys():
value = macros[token.lower()][0]
elif token.lower() in [f"{m}s" for m in macros.keys()]:
value = pluralize(macros[token.lower()[:-1]])
tokens[line_number][token_number] = upper_check(token, value)
tokens[line_number][token_number] += end
# filter out None tokens
tokens[line_number] = [token for token in tokens[line_number] if token is not None]
return detokenize(tokens)
def tokenize(input):
"""
Returns a 2D list of tokens and a list of otf_macros.
otf macro definitions are removed and just the keyword definition is kept as well as any
punctuation on the final word.
"""
return [x.split(' ') for x in input.split('\n')]
def detokenize(tokens):
"""Turn a list of tokens into plaintext. """
output = []
for index, line in enumerate(tokens):
output.append([])
for token in line:
if isinstance(token, MultipleTokens):
for word in token.words:
output[index].append(word)
elif isinstance(token, str):
output[index].append(token)
else:
raise ValueError(f"Unknown token type: {type(token)}")
for line_number, line in enumerate(output):
output[line_number] = ' '.join(line)
return '\n'.join(output)
def get_macros(input, child=False):
""" Turn a macros string into a list of tuples of macros """
response = {}
# turn input into list of tuples
macros = [re.split('[\t]', x) for x in input.split('\n')]
# check if keyword is `source`, get macros from sourced file if it is
for index, macro in enumerate(macros):
if macro[0] == "source":
with open(macro[1]) as file:
macros += get_macros(file.read(), child=True)
macros[index] = ()
if child:
return macros
# store macros as dict and return
for index, macro in enumerate(macros):
if len(macro) >= 2:
response[macro[0].lower()] = macro[1:]
return response
def is_otf_macro_start(token, line):
""" Returns true if token is the start of an on the fly macro """
match = re.search(r'^\.[A-Za-z0-9]+$', token)
if match is None:
return False
# don't return true you can't find an end token in the line
for line_token in line:
if is_otf_macro_end(line_token):
return match is not None
return False
def is_otf_macro_end(token):
""" Returns true if token is the end of an on the fly macro """
match = re.search(r'(\.,|,\.)', f"{token}")
return match is not None
def main(args):
""" Entry point for script """
# get macros
with open(args.macros_file) as file:
macros = get_macros(file.read())
# get tokens (file contents)
if args.input == "-":
input = sys.stdin.read()
else:
with open(args.input) as file:
input = file.read()
return print(process(input, macros))
if __name__ == '__main__':
try:
sys.exit(main(get_args()))
except KeyboardInterrupt:
sys.exit(0)

View File

@ -1,24 +0,0 @@
# pymacro
A python implementation of the macros spec
## usage
```
$ ./pymacro -h
usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT]
optional arguments:
-h, --help show this help message and exit
-m MACROS_FILE, --macros-file MACROS_FILE
File where macros are stored (default: macros)
-i INPUT, --input INPUT
File to be processed. (default: -)
-o OUTPUT, --output OUTPUT
Path of output (default: -)
```
## testing
Run `test.sh`.
A `diff` is run on the actual output against what should have come out according
to the spec.

View File

@ -1,3 +0,0 @@
#/usr/bin/env sh
cat tests/test_input | ./pymacro > tests/test_actual_output
git diff tests/test_actual_output tests/test_expected_output

View File

@ -1,12 +0,0 @@
Hydrocarbons are composed of exclusively hydrogen and carbon.
Chlorophyll is the site of photosynthesis.
😉
1 hydrocarbon 2 hydrocarbons
1 dress 2 dresses
1 story 2 stories
1 hero 2 heroes
1 leaf 2 leaves
1 man 2 men

View File

@ -1,21 +0,0 @@
.Hcs are composed of exclusively .hy and .ca.
.Chl is the site of .ps.
.wink
1 .hc 2 .hcs
1 .dr 2 .drs
1 .st 2 .sts
1 .he 2 .hes
1 .le 2 .les
1 .ma 2 .mas
This is a test of .otfm on the fly macro.,s!
If this sentence makes sense, then the test of .otfms worked!
.otfms can also be overwritten,
you could make it equal .otfm on the fly monkey.,s!
They're not just any monkeys, they're .otfms!

View File

@ -1,2 +0,0 @@
.chl chlorophyll
.ps photosynthesis

View File

@ -1 +0,0 @@
.l louse lice

View File

@ -1,6 +0,0 @@
.hc hydrocarbon
.dr dress
.st story
.he hero
.le leaf
.ma man men

178
readme.md
View File

@ -1,4 +1,8 @@
# macros # otfmacros
on the fly macros
---
A syntax/specification for defining macros for any sort of text file. A syntax/specification for defining macros for any sort of text file.
The goal of this is to be minimal but intuitive and with enough features to make The goal of this is to be minimal but intuitive and with enough features to make
@ -16,8 +20,174 @@ overly relying on pronouns if it would make the writing less clear.
## syntax ## syntax
The syntax can be found [here](syntax.md) How to define macros and what not to define.
## python implementation ### definition of simple macros
Macros are defined in a separate file, by default called `macros`.
You can add additional macro files through the command line options.
A python implementation can be found [here](pymacro/) Examples of macro definitions:
```
.hc hydrocarbon
.h hydrogen
.c carbon
```
#### external definitions
You can also `source` macros from another macro file:
```
source ../macros
source ../../macros
.hc hydrocarbons
.h hydrogen
.c
```
This sources macros from its parent and grandparent directories.
This may be useful if you have several related topics in separate folders,
but only some of the macros are shared and some are not.
#### on the fly definitions
You can also define macros in the middle of a sentence without having to switch to the macros file.
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
```
# .Hc hydrocarbon
In or
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
.Ocs are compounds which contain .c .h bonds.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
Organic compounds are compounds which contain carbon hydrogen bonds.
```
Otf macros can be overwritten later in the document.
They also take precedence of any macros defined in the `macros` file.
### plurals
Say you only have the following macro definitions:
`.hc hydrocarbon`
And the file `test.md`:
```markdown
# .Hc
In organic chemistry, a .hc is an organic compound consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
```
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
defined. This helps you avoid adding the plural forms of words.
#### default plurals
There are also other default plurals:
Ending | Example | Formation | Plural
--- | --- | --- | ---
`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses
`[consonant]y` | story | change `y` to `ies` | stories
`[consonant]o` | macro | add `s` | macros
`[vowel]o` | hero | add `es` | heroes
`f` | leaf | change `f` to `ves` | leaves
#### custom plurals
You can also define your own custom plurals:
`macros`
```
.s salmon salmon
.m man men
.l louse lice
```
Input:
```
Hello boys and girls, my name is Fat Lip
and this is my friend Sammy the .S.
(What 'do?)
Today, we're going to teach you some fun new facts about .ss
and a brand new dance.
```
Output:
```
Hello boys and girls, my name is Fat Lip
and this is my friend Sammy the Salmon.
(What 'do?)
Today, we're going to teach you some fun new facts about salmon
and a brand new dance.
```
Note: for plurals which do not change it's probably easier to just not use the
plural version at all and forgo defining a custom plural altogether.
### capitalization
- Capitalizing the first character of a macro makes the first letter of the word
also capitalized.
- Capitalizing every letter makes the whole word capitalized also, except if the
shortcut is only one word long.
- For this reason, you may not want to use one letter definitions.
- This also means that macro definitions are case insensitive and are completely
ignored.
Say you have the following macros:
```
.hc hydrocarbon
.h hydrogen
.c carbon
```
Input:
```
.hc
.Hc
.HC
.h
.H
.c
.C
```
Output:
```
hydrocarbon
Hydrocarbon
HYDROCARBON
hydrogen
Hydrogen
carbon
Carbon
```
## python preprocessor
An implementation of this that I've written is [otfm-python](https://github.com/alvierahman90/otfm-python).

170
syntax.md
View File

@ -1,170 +0,0 @@
# syntax
How to define macros and what not to define.
## definition of simple macros
Macros are defined in a separate file, by default called `macros`.
You can add additional macro files through the command line options.
Examples of macro definitions:
```
.hc hydrocarbon
.h hydrogen
.c carbon
```
### external definitions
You can also `source` macros from another macro file:
```
source ../macros
source ../../macros
.hc hydrocarbons
.h hydrogen
.c
```
This sources macros from its parent and grandparent directories.
This may be useful if you have several related topics in separate folders,
but only some of the macros are shared and some are not.
### on the fly definitions
You can also define macros in the middle of a sentence without having to switch to the macros file.
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
```
# .Hc hydrocarbon
In or
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
.Ocs are compounds which contain .c .h bonds.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
Organic compounds are compounds which contain carbon hydrogen bonds.
```
Otf macros can be overwritten later in the document.
They also take precedence of any macros defined in the `macros` file.
## plurals
Say you only have the following macro definitions:
`.hc hydrocarbon`
And the file `test.md`:
```markdown
# .Hc
In organic chemistry, a .hc is an organic compound consisting entirely of
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
```
The output would be the following:
```markdown
# Hydrocarbon
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
```
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
defined. This helps you avoid adding the plural forms of words.
### other default plurals
There are also other default plurals:
Ending | Example | Formation | Plural
--- | --- | --- | ---
`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses
`[consonant]y` | story | change `y` to `ies` | stories
`[consonant]o` | macro | add `s` | macros
`[vowel]o` | hero | add `es` | heroes
`f` | leaf | change `f` to `ves` | leaves
### custom plurals
You can also define your own custom plurals:
`macros`
```
.s salmon salmon
.m man men
.l louse lice
```
Input:
```
Hello boys and girls, my name is Fat Lip
and this is my friend Sammy the .S.
(What 'do?)
Today, we're going to teach you some fun new facts about .ss
and a brand new dance.
```
Output:
```
Hello boys and girls, my name is Fat Lip
and this is my friend Sammy the Salmon.
(What 'do?)
Today, we're going to teach you some fun new facts about salmon
and a brand new dance.
```
Note: for plurals which do not change it's probably easier to just not use the
plural version at all and forgo defining a custom plural altogether.
## capitalization
- Capitalizing the first character of a macro makes the first letter of the word
also capitalized.
- Capitalizing every letter makes the whole word capitalized also, except if the
shortcut is only one word long.
- For this reason, you may not want to use one letter definitions.
- This also means that macro definitions are case insensitive and are completely
ignored.
Say you have the following macros:
```
.hc hydrocarbon
.h hydrogen
.c carbon
```
Input:
```
.hc
.Hc
.HC
.h
.H
.c
.C
```
Output:
```
hydrocarbon
Hydrocarbon
HYDROCARBON
hydrogen
Hydrogen
carbon
Carbon
```

View File

@ -1,4 +0,0 @@
#/usr/bin/env sh
./pymacro -sm tests/test_macros_plural -m tests/test_macros_biology tests/test_input tests/test_actual_output
diff tests/test_actual_output tests/test_expected_output