mirror of
https://github.com/alvierahman90/otfmacros.git
synced 2024-12-15 12:01:59 +00:00
update readme, move python implementation to another repo, merge syntax.md with readme.md, remove test.sh
This commit is contained in:
parent
10d0b76087
commit
5648638fcb
@ -1,8 +0,0 @@
|
||||
.hc hydrocarbon
|
||||
.hy hydrogen
|
||||
.ca carbon
|
||||
.ox oxygen
|
||||
.wink 😉
|
||||
source tests/test_macros_biology
|
||||
source tests/test_macros_custom_plurals
|
||||
source tests/test_macros_plural
|
250
pymacro/pymacro
250
pymacro/pymacro
@ -1,250 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
|
||||
class MultipleTokens():
|
||||
"""
|
||||
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
|
||||
without changing the indexes of other tokens
|
||||
"""
|
||||
|
||||
def __init__(self, words):
|
||||
self.words = words
|
||||
|
||||
|
||||
def get_args():
|
||||
""" Get command line arguments """
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument("-m", "--macros-file", default="macros",
|
||||
help="File where macros are stored")
|
||||
parser.add_argument("-i", "--input", help="File to be processed.", default="-")
|
||||
parser.add_argument("-o", "--output", help="Path of output", default="-")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def pluralize(input):
|
||||
""" Returns the plural form of a word. """
|
||||
if isinstance(input, list):
|
||||
# use custom plural if defined
|
||||
if len(input) > 1:
|
||||
return input[1]
|
||||
|
||||
return pluralize_word(input[0])
|
||||
|
||||
return pluralize_word(input)
|
||||
|
||||
def pluralize_word(word):
|
||||
def is_vowel(letter):
|
||||
if not isinstance(letter, str):
|
||||
raise ValueError("Argument 'letter' must be type str")
|
||||
if len(letter) != 1:
|
||||
raise ValueError("Argument 'letter' must be 1 long")
|
||||
return letter in 'aeiou'
|
||||
|
||||
# TODO add more complex plural forms
|
||||
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
||||
return word + 'es'
|
||||
if word[-1] == 'y':
|
||||
if not is_vowel(word[-2]):
|
||||
return word[:-1] + 'ies'
|
||||
if word[-1] == 'o':
|
||||
if not is_vowel(word[-2]):
|
||||
return word + 'es'
|
||||
if word[-1] == 'f':
|
||||
return word[:-1] + 'ves'
|
||||
return word + 's'
|
||||
|
||||
|
||||
def upper_check(token, word):
|
||||
""" Check if word needs to be capitalized and capitalise appropriately if that is the case. """
|
||||
all_caps = True
|
||||
|
||||
for letter in token:
|
||||
if letter.islower():
|
||||
all_caps = False
|
||||
break
|
||||
|
||||
if all_caps:
|
||||
return word.upper()
|
||||
|
||||
if len(token) > 1:
|
||||
if token[1].isupper():
|
||||
return word[:1].upper() + word[1:]
|
||||
|
||||
return word
|
||||
|
||||
|
||||
def process(input, macros):
|
||||
"""
|
||||
This function takes the string `input` and a dict, ` macros`.
|
||||
It substitutes any keys in `macro` with the corresponding value.
|
||||
It also checks for any otf macros defined in the string and appends them to `macros`,
|
||||
replacing that otf macro and any following instances of it.
|
||||
It returns the substituted string.
|
||||
"""
|
||||
tokens = tokenize(input)
|
||||
macros = macros
|
||||
|
||||
in_otf_macro = False
|
||||
tmp_macro_keyword = None
|
||||
tmp_macro_definition = []
|
||||
|
||||
for line_number, line in enumerate(tokens):
|
||||
for token_number, token in enumerate(line):
|
||||
if len(token) == 0:
|
||||
continue
|
||||
|
||||
# detect on the fly macros
|
||||
token_is_otf_macro_start = is_otf_macro_start(token, line)
|
||||
|
||||
# process otf macro tokens
|
||||
if token_is_otf_macro_start:
|
||||
tmp_macro_keyword = token
|
||||
in_otf_macro = True
|
||||
tmp_macro_definition = []
|
||||
tokens[line_number][token_number] = None
|
||||
continue
|
||||
elif in_otf_macro and is_otf_macro_end(token):
|
||||
split_token = re.split(r',.|.,', token)
|
||||
tmp_macro_definition.append(split_token[0])
|
||||
macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition)
|
||||
token = tmp_macro_keyword + split_token[1]
|
||||
in_otf_macro = False
|
||||
# once the end of the macro has been found and stored, continue downn the for loop
|
||||
# so that it can be turned back to normal text
|
||||
elif in_otf_macro:
|
||||
tmp_macro_definition.append(token)
|
||||
tokens[line_number][token_number] = None
|
||||
continue
|
||||
|
||||
# cutting off the end and then adding it back once expanded
|
||||
# e.g. punctuation: from the token "hello...", end would be equal to "..."
|
||||
# and token would be equal to "hello"
|
||||
end = []
|
||||
token = list(token)
|
||||
for index, char in reversed(list(enumerate(token))):
|
||||
if not char.isalnum():
|
||||
end.insert(0, token.pop(index))
|
||||
else:
|
||||
break
|
||||
end = ''.join(end)
|
||||
token = ''.join(token)
|
||||
|
||||
# if no macro is found (or if it is not a macro at all, the value
|
||||
# will not be changed
|
||||
value = token
|
||||
|
||||
if token.lower() in macros.keys():
|
||||
value = macros[token.lower()][0]
|
||||
elif token.lower() in [f"{m}s" for m in macros.keys()]:
|
||||
value = pluralize(macros[token.lower()[:-1]])
|
||||
|
||||
tokens[line_number][token_number] = upper_check(token, value)
|
||||
tokens[line_number][token_number] += end
|
||||
|
||||
# filter out None tokens
|
||||
tokens[line_number] = [token for token in tokens[line_number] if token is not None]
|
||||
|
||||
return detokenize(tokens)
|
||||
|
||||
def tokenize(input):
|
||||
"""
|
||||
Returns a 2D list of tokens and a list of otf_macros.
|
||||
otf macro definitions are removed and just the keyword definition is kept as well as any
|
||||
punctuation on the final word.
|
||||
"""
|
||||
return [x.split(' ') for x in input.split('\n')]
|
||||
|
||||
|
||||
def detokenize(tokens):
|
||||
"""Turn a list of tokens into plaintext. """
|
||||
|
||||
output = []
|
||||
|
||||
for index, line in enumerate(tokens):
|
||||
output.append([])
|
||||
for token in line:
|
||||
if isinstance(token, MultipleTokens):
|
||||
for word in token.words:
|
||||
output[index].append(word)
|
||||
elif isinstance(token, str):
|
||||
output[index].append(token)
|
||||
else:
|
||||
raise ValueError(f"Unknown token type: {type(token)}")
|
||||
|
||||
for line_number, line in enumerate(output):
|
||||
output[line_number] = ' '.join(line)
|
||||
|
||||
return '\n'.join(output)
|
||||
|
||||
|
||||
def get_macros(input, child=False):
|
||||
""" Turn a macros string into a list of tuples of macros """
|
||||
response = {}
|
||||
|
||||
# turn input into list of tuples
|
||||
macros = [re.split('[\t]', x) for x in input.split('\n')]
|
||||
|
||||
# check if keyword is `source`, get macros from sourced file if it is
|
||||
for index, macro in enumerate(macros):
|
||||
if macro[0] == "source":
|
||||
with open(macro[1]) as file:
|
||||
macros += get_macros(file.read(), child=True)
|
||||
macros[index] = ()
|
||||
|
||||
if child:
|
||||
return macros
|
||||
|
||||
# store macros as dict and return
|
||||
for index, macro in enumerate(macros):
|
||||
if len(macro) >= 2:
|
||||
response[macro[0].lower()] = macro[1:]
|
||||
return response
|
||||
|
||||
def is_otf_macro_start(token, line):
|
||||
""" Returns true if token is the start of an on the fly macro """
|
||||
match = re.search(r'^\.[A-Za-z0-9]+$', token)
|
||||
if match is None:
|
||||
return False
|
||||
|
||||
# don't return true you can't find an end token in the line
|
||||
for line_token in line:
|
||||
if is_otf_macro_end(line_token):
|
||||
return match is not None
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_otf_macro_end(token):
|
||||
""" Returns true if token is the end of an on the fly macro """
|
||||
match = re.search(r'(\.,|,\.)', f"{token}")
|
||||
return match is not None
|
||||
|
||||
|
||||
def main(args):
|
||||
""" Entry point for script """
|
||||
|
||||
# get macros
|
||||
|
||||
with open(args.macros_file) as file:
|
||||
macros = get_macros(file.read())
|
||||
|
||||
# get tokens (file contents)
|
||||
if args.input == "-":
|
||||
input = sys.stdin.read()
|
||||
else:
|
||||
with open(args.input) as file:
|
||||
input = file.read()
|
||||
|
||||
return print(process(input, macros))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
sys.exit(main(get_args()))
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
@ -1,24 +0,0 @@
|
||||
# pymacro
|
||||
|
||||
A python implementation of the macros spec
|
||||
|
||||
## usage
|
||||
```
|
||||
$ ./pymacro -h
|
||||
usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-m MACROS_FILE, --macros-file MACROS_FILE
|
||||
File where macros are stored (default: macros)
|
||||
-i INPUT, --input INPUT
|
||||
File to be processed. (default: -)
|
||||
-o OUTPUT, --output OUTPUT
|
||||
Path of output (default: -)
|
||||
```
|
||||
|
||||
## testing
|
||||
|
||||
Run `test.sh`.
|
||||
A `diff` is run on the actual output against what should have come out according
|
||||
to the spec.
|
@ -1,3 +0,0 @@
|
||||
#/usr/bin/env sh
|
||||
cat tests/test_input | ./pymacro > tests/test_actual_output
|
||||
git diff tests/test_actual_output tests/test_expected_output
|
@ -1,12 +0,0 @@
|
||||
Hydrocarbons are composed of exclusively hydrogen and carbon.
|
||||
|
||||
Chlorophyll is the site of photosynthesis.
|
||||
|
||||
😉
|
||||
|
||||
1 hydrocarbon 2 hydrocarbons
|
||||
1 dress 2 dresses
|
||||
1 story 2 stories
|
||||
1 hero 2 heroes
|
||||
1 leaf 2 leaves
|
||||
1 man 2 men
|
@ -1,21 +0,0 @@
|
||||
.Hcs are composed of exclusively .hy and .ca.
|
||||
|
||||
.Chl is the site of .ps.
|
||||
|
||||
.wink
|
||||
|
||||
1 .hc 2 .hcs
|
||||
1 .dr 2 .drs
|
||||
1 .st 2 .sts
|
||||
1 .he 2 .hes
|
||||
1 .le 2 .les
|
||||
1 .ma 2 .mas
|
||||
|
||||
This is a test of .otfm on the fly macro.,s!
|
||||
|
||||
If this sentence makes sense, then the test of .otfms worked!
|
||||
|
||||
.otfms can also be overwritten,
|
||||
you could make it equal .otfm on the fly monkey.,s!
|
||||
|
||||
They're not just any monkeys, they're .otfms!
|
@ -1,2 +0,0 @@
|
||||
.chl chlorophyll
|
||||
.ps photosynthesis
|
@ -1 +0,0 @@
|
||||
.l louse lice
|
@ -1,6 +0,0 @@
|
||||
.hc hydrocarbon
|
||||
.dr dress
|
||||
.st story
|
||||
.he hero
|
||||
.le leaf
|
||||
.ma man men
|
178
readme.md
178
readme.md
@ -1,4 +1,8 @@
|
||||
# macros
|
||||
# otfmacros
|
||||
|
||||
on the fly macros
|
||||
|
||||
---
|
||||
|
||||
A syntax/specification for defining macros for any sort of text file.
|
||||
The goal of this is to be minimal but intuitive and with enough features to make
|
||||
@ -16,8 +20,174 @@ overly relying on pronouns if it would make the writing less clear.
|
||||
|
||||
## syntax
|
||||
|
||||
The syntax can be found [here](syntax.md)
|
||||
How to define macros and what not to define.
|
||||
|
||||
## python implementation
|
||||
### definition of simple macros
|
||||
Macros are defined in a separate file, by default called `macros`.
|
||||
You can add additional macro files through the command line options.
|
||||
|
||||
A python implementation can be found [here](pymacro/)
|
||||
Examples of macro definitions:
|
||||
|
||||
```
|
||||
.hc hydrocarbon
|
||||
.h hydrogen
|
||||
.c carbon
|
||||
```
|
||||
|
||||
#### external definitions
|
||||
|
||||
You can also `source` macros from another macro file:
|
||||
|
||||
```
|
||||
source ../macros
|
||||
source ../../macros
|
||||
.hc hydrocarbons
|
||||
.h hydrogen
|
||||
.c
|
||||
```
|
||||
|
||||
This sources macros from its parent and grandparent directories.
|
||||
This may be useful if you have several related topics in separate folders,
|
||||
but only some of the macros are shared and some are not.
|
||||
|
||||
#### on the fly definitions
|
||||
|
||||
You can also define macros in the middle of a sentence without having to switch to the macros file.
|
||||
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
|
||||
|
||||
```
|
||||
# .Hc hydrocarbon
|
||||
In or
|
||||
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
|
||||
|
||||
.Ocs are compounds which contain .c .h bonds.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
|
||||
Organic compounds are compounds which contain carbon hydrogen bonds.
|
||||
```
|
||||
|
||||
Otf macros can be overwritten later in the document.
|
||||
They also take precedence of any macros defined in the `macros` file.
|
||||
|
||||
### plurals
|
||||
|
||||
Say you only have the following macro definitions:
|
||||
|
||||
`.hc hydrocarbon`
|
||||
|
||||
And the file `test.md`:
|
||||
|
||||
```markdown
|
||||
# .Hc
|
||||
In organic chemistry, a .hc is an organic compound consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
|
||||
defined. This helps you avoid adding the plural forms of words.
|
||||
|
||||
|
||||
#### default plurals
|
||||
|
||||
There are also other default plurals:
|
||||
|
||||
Ending | Example | Formation | Plural
|
||||
--- | --- | --- | ---
|
||||
`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses
|
||||
`[consonant]y` | story | change `y` to `ies` | stories
|
||||
`[consonant]o` | macro | add `s` | macros
|
||||
`[vowel]o` | hero | add `es` | heroes
|
||||
`f` | leaf | change `f` to `ves` | leaves
|
||||
|
||||
|
||||
#### custom plurals
|
||||
You can also define your own custom plurals:
|
||||
|
||||
`macros`
|
||||
```
|
||||
.s salmon salmon
|
||||
.m man men
|
||||
.l louse lice
|
||||
```
|
||||
|
||||
Input:
|
||||
```
|
||||
Hello boys and girls, my name is Fat Lip
|
||||
and this is my friend Sammy the .S.
|
||||
(What 'do?)
|
||||
Today, we're going to teach you some fun new facts about .ss
|
||||
and a brand new dance.
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
Hello boys and girls, my name is Fat Lip
|
||||
and this is my friend Sammy the Salmon.
|
||||
(What 'do?)
|
||||
Today, we're going to teach you some fun new facts about salmon
|
||||
and a brand new dance.
|
||||
```
|
||||
|
||||
Note: for plurals which do not change it's probably easier to just not use the
|
||||
plural version at all and forgo defining a custom plural altogether.
|
||||
|
||||
### capitalization
|
||||
|
||||
- Capitalizing the first character of a macro makes the first letter of the word
|
||||
also capitalized.
|
||||
- Capitalizing every letter makes the whole word capitalized also, except if the
|
||||
shortcut is only one word long.
|
||||
- For this reason, you may not want to use one letter definitions.
|
||||
- This also means that macro definitions are case insensitive and are completely
|
||||
ignored.
|
||||
|
||||
Say you have the following macros:
|
||||
|
||||
```
|
||||
.hc hydrocarbon
|
||||
.h hydrogen
|
||||
.c carbon
|
||||
```
|
||||
|
||||
Input:
|
||||
```
|
||||
.hc
|
||||
.Hc
|
||||
.HC
|
||||
.h
|
||||
.H
|
||||
.c
|
||||
.C
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
hydrocarbon
|
||||
Hydrocarbon
|
||||
HYDROCARBON
|
||||
hydrogen
|
||||
Hydrogen
|
||||
carbon
|
||||
Carbon
|
||||
```
|
||||
|
||||
## python preprocessor
|
||||
|
||||
An implementation of this that I've written is [otfm-python](https://github.com/alvierahman90/otfm-python).
|
||||
|
170
syntax.md
170
syntax.md
@ -1,170 +0,0 @@
|
||||
# syntax
|
||||
|
||||
How to define macros and what not to define.
|
||||
|
||||
## definition of simple macros
|
||||
Macros are defined in a separate file, by default called `macros`.
|
||||
You can add additional macro files through the command line options.
|
||||
|
||||
Examples of macro definitions:
|
||||
|
||||
```
|
||||
.hc hydrocarbon
|
||||
.h hydrogen
|
||||
.c carbon
|
||||
```
|
||||
|
||||
### external definitions
|
||||
|
||||
You can also `source` macros from another macro file:
|
||||
|
||||
```
|
||||
source ../macros
|
||||
source ../../macros
|
||||
.hc hydrocarbons
|
||||
.h hydrogen
|
||||
.c
|
||||
```
|
||||
|
||||
This sources macros from its parent and grandparent directories.
|
||||
This may be useful if you have several related topics in separate folders,
|
||||
but only some of the macros are shared and some are not.
|
||||
|
||||
### on the fly definitions
|
||||
|
||||
You can also define macros in the middle of a sentence without having to switch to the macros file.
|
||||
The last word of multi-word macros must end with `.,` or `,.` and end on the same line.
|
||||
|
||||
```
|
||||
# .Hc hydrocarbon
|
||||
In or
|
||||
In organic chemistry, a .hc is an .oc organic compound., consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
|
||||
|
||||
.Ocs are compounds which contain .c .h bonds.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
|
||||
Organic compounds are compounds which contain carbon hydrogen bonds.
|
||||
```
|
||||
|
||||
Otf macros can be overwritten later in the document.
|
||||
They also take precedence of any macros defined in the `macros` file.
|
||||
|
||||
## plurals
|
||||
|
||||
Say you only have the following macro definitions:
|
||||
|
||||
`.hc hydrocarbon`
|
||||
|
||||
And the file `test.md`:
|
||||
|
||||
```markdown
|
||||
# .Hc
|
||||
In organic chemistry, a .hc is an organic compound consisting entirely of
|
||||
hydrogen and carbon. .Hcs are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
The output would be the following:
|
||||
|
||||
```markdown
|
||||
# Hydrocarbon
|
||||
In organic chemistry, a hydrocarbon is an organic compound consisting entirely
|
||||
of hydrogen and carbon. Hydrocarbons are examples of group 14 hydrides.
|
||||
```
|
||||
|
||||
Note how `.hcs` becomes `hydrocarbons` even though that isn't explicitly
|
||||
defined. This helps you avoid adding the plural forms of words.
|
||||
|
||||
|
||||
### other default plurals
|
||||
|
||||
There are also other default plurals:
|
||||
|
||||
Ending | Example | Formation | Plural
|
||||
--- | --- | --- | ---
|
||||
`s`, `ch`, `sh`, `x`, or `z` | dress | add es | dresses
|
||||
`[consonant]y` | story | change `y` to `ies` | stories
|
||||
`[consonant]o` | macro | add `s` | macros
|
||||
`[vowel]o` | hero | add `es` | heroes
|
||||
`f` | leaf | change `f` to `ves` | leaves
|
||||
|
||||
|
||||
### custom plurals
|
||||
You can also define your own custom plurals:
|
||||
|
||||
`macros`
|
||||
```
|
||||
.s salmon salmon
|
||||
.m man men
|
||||
.l louse lice
|
||||
```
|
||||
|
||||
Input:
|
||||
```
|
||||
Hello boys and girls, my name is Fat Lip
|
||||
and this is my friend Sammy the .S.
|
||||
(What 'do?)
|
||||
Today, we're going to teach you some fun new facts about .ss
|
||||
and a brand new dance.
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
Hello boys and girls, my name is Fat Lip
|
||||
and this is my friend Sammy the Salmon.
|
||||
(What 'do?)
|
||||
Today, we're going to teach you some fun new facts about salmon
|
||||
and a brand new dance.
|
||||
```
|
||||
|
||||
Note: for plurals which do not change it's probably easier to just not use the
|
||||
plural version at all and forgo defining a custom plural altogether.
|
||||
|
||||
## capitalization
|
||||
|
||||
- Capitalizing the first character of a macro makes the first letter of the word
|
||||
also capitalized.
|
||||
- Capitalizing every letter makes the whole word capitalized also, except if the
|
||||
shortcut is only one word long.
|
||||
- For this reason, you may not want to use one letter definitions.
|
||||
- This also means that macro definitions are case insensitive and are completely
|
||||
ignored.
|
||||
|
||||
Say you have the following macros:
|
||||
|
||||
```
|
||||
.hc hydrocarbon
|
||||
.h hydrogen
|
||||
.c carbon
|
||||
```
|
||||
|
||||
Input:
|
||||
```
|
||||
.hc
|
||||
.Hc
|
||||
.HC
|
||||
.h
|
||||
.H
|
||||
.c
|
||||
.C
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
hydrocarbon
|
||||
Hydrocarbon
|
||||
HYDROCARBON
|
||||
hydrogen
|
||||
Hydrogen
|
||||
carbon
|
||||
Carbon
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user