mirror of
https://github.com/alvierahman90/otfm-python.git
synced 2025-01-12 02:04:20 +00:00
first commit
This commit is contained in:
commit
5f21b2c1dc
9
Makefile
Normal file
9
Makefile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
include config.mk
|
||||||
|
|
||||||
|
install:
|
||||||
|
mkdir -p ${DESTDIR}${PREFIX}/bin/
|
||||||
|
cp -f otfm-python ${DESTDIR}${PREFIX}/bin/
|
||||||
|
chmod 755 ${DESTDIR}${PREFIX}/bin/
|
||||||
|
|
||||||
|
uninstall:
|
||||||
|
rm -f ${DESTDIR}${PREFIX}/bin/otfm-python
|
8
macros
Normal file
8
macros
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
.hc hydrocarbon
|
||||||
|
.hy hydrogen
|
||||||
|
.ca carbon
|
||||||
|
.ox oxygen
|
||||||
|
.wink 😉
|
||||||
|
source tests/test_macros_biology
|
||||||
|
source tests/test_macros_custom_plurals
|
||||||
|
source tests/test_macros_plural
|
254
otfm-python
Executable file
254
otfm-python
Executable file
@ -0,0 +1,254 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class MultipleTokens():
|
||||||
|
"""
|
||||||
|
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
|
||||||
|
without changing the indexes of other tokens
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, words):
|
||||||
|
self.words = words
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
""" Get command line arguments """
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument("-m", "--macros-file", default="macros",
|
||||||
|
help="File where macros are stored")
|
||||||
|
parser.add_argument("-i", "--input", help="File to be processed.", default="-")
|
||||||
|
parser.add_argument("-o", "--output", help="Path of output", default="-")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def pluralize(input):
|
||||||
|
""" Returns the plural form of a word. """
|
||||||
|
if isinstance(input, list):
|
||||||
|
# use custom plural if defined
|
||||||
|
if len(input) > 1:
|
||||||
|
return input[1]
|
||||||
|
|
||||||
|
return pluralize_word(input[0])
|
||||||
|
|
||||||
|
return pluralize_word(input)
|
||||||
|
|
||||||
|
def pluralize_word(word):
|
||||||
|
def is_vowel(letter):
|
||||||
|
if not isinstance(letter, str):
|
||||||
|
raise ValueError("Argument 'letter' must be type str")
|
||||||
|
if len(letter) != 1:
|
||||||
|
raise ValueError("Argument 'letter' must be 1 long")
|
||||||
|
return letter in 'aeiou'
|
||||||
|
|
||||||
|
# TODO add more complex plural forms
|
||||||
|
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
||||||
|
return word + 'es'
|
||||||
|
if word[-1] == 'y':
|
||||||
|
if not is_vowel(word[-2]):
|
||||||
|
return word[:-1] + 'ies'
|
||||||
|
if word[-1] == 'o':
|
||||||
|
if not is_vowel(word[-2]):
|
||||||
|
return word + 'es'
|
||||||
|
if word[-1] == 'f':
|
||||||
|
return word[:-1] + 'ves'
|
||||||
|
return word + 's'
|
||||||
|
|
||||||
|
|
||||||
|
def upper_check(token, word):
|
||||||
|
""" Check if word needs to be capitalized and capitalise appropriately if that is the case. """
|
||||||
|
all_caps = True
|
||||||
|
|
||||||
|
for letter in token:
|
||||||
|
if letter.islower():
|
||||||
|
all_caps = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if all_caps:
|
||||||
|
return word.upper()
|
||||||
|
|
||||||
|
if len(token) > 1:
|
||||||
|
if token[1].isupper():
|
||||||
|
return word[:1].upper() + word[1:]
|
||||||
|
|
||||||
|
return word
|
||||||
|
|
||||||
|
|
||||||
|
def process(input, macros):
|
||||||
|
"""
|
||||||
|
This function takes the string `input` and a dict, ` macros`.
|
||||||
|
It substitutes any keys in `macro` with the corresponding value.
|
||||||
|
It also checks for any otf macros defined in the string and appends them to `macros`,
|
||||||
|
replacing that otf macro and any following instances of it.
|
||||||
|
It returns the substituted string.
|
||||||
|
"""
|
||||||
|
tokens = tokenize(input)
|
||||||
|
macros = macros
|
||||||
|
|
||||||
|
in_otf_macro = False
|
||||||
|
tmp_macro_keyword = None
|
||||||
|
tmp_macro_definition = []
|
||||||
|
|
||||||
|
for line_number, line in enumerate(tokens):
|
||||||
|
for token_number, token in enumerate(line):
|
||||||
|
if len(token) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# detect on the fly macros
|
||||||
|
token_is_otf_macro_start = is_otf_macro_start(token, line)
|
||||||
|
|
||||||
|
# process otf macro tokens
|
||||||
|
if token_is_otf_macro_start:
|
||||||
|
tmp_macro_keyword = token
|
||||||
|
in_otf_macro = True
|
||||||
|
tmp_macro_definition = []
|
||||||
|
tokens[line_number][token_number] = None
|
||||||
|
continue
|
||||||
|
elif in_otf_macro and is_otf_macro_end(token):
|
||||||
|
split_token = re.split(r',.|.,', token)
|
||||||
|
tmp_macro_definition.append(split_token[0])
|
||||||
|
macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition)
|
||||||
|
token = tmp_macro_keyword + split_token[1]
|
||||||
|
in_otf_macro = False
|
||||||
|
# once the end of the macro has been found and stored, continue downn the for loop
|
||||||
|
# so that it can be turned back to normal text
|
||||||
|
elif in_otf_macro:
|
||||||
|
tmp_macro_definition.append(token)
|
||||||
|
tokens[line_number][token_number] = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
# cutting off the end and then adding it back once expanded
|
||||||
|
# e.g. punctuation: from the token "hello...", end would be equal to "..."
|
||||||
|
# and token would be equal to "hello"
|
||||||
|
end = []
|
||||||
|
token = list(token)
|
||||||
|
for index, char in reversed(list(enumerate(token))):
|
||||||
|
if not char.isalnum():
|
||||||
|
end.insert(0, token.pop(index))
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
end = ''.join(end)
|
||||||
|
token = ''.join(token)
|
||||||
|
|
||||||
|
# if no macro is found (or if it is not a macro at all, the value
|
||||||
|
# will not be changed
|
||||||
|
value = token
|
||||||
|
|
||||||
|
if token.lower() in macros.keys():
|
||||||
|
value = macros[token.lower()][0]
|
||||||
|
elif token.lower() in [f"{m}s" for m in macros.keys()]:
|
||||||
|
value = pluralize(macros[token.lower()[:-1]])
|
||||||
|
|
||||||
|
tokens[line_number][token_number] = upper_check(token, value)
|
||||||
|
tokens[line_number][token_number] += end
|
||||||
|
|
||||||
|
# filter out None tokens
|
||||||
|
tokens[line_number] = [token for token in tokens[line_number] if token is not None]
|
||||||
|
|
||||||
|
return detokenize(tokens)
|
||||||
|
|
||||||
|
def tokenize(input):
|
||||||
|
"""
|
||||||
|
Returns a 2D list of tokens and a list of otf_macros.
|
||||||
|
otf macro definitions are removed and just the keyword definition is kept as well as any
|
||||||
|
punctuation on the final word.
|
||||||
|
"""
|
||||||
|
return [x.split(' ') for x in input.split('\n')]
|
||||||
|
|
||||||
|
|
||||||
|
def detokenize(tokens):
|
||||||
|
"""Turn a list of tokens into plaintext. """
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
for index, line in enumerate(tokens):
|
||||||
|
output.append([])
|
||||||
|
for token in line:
|
||||||
|
if isinstance(token, MultipleTokens):
|
||||||
|
for word in token.words:
|
||||||
|
output[index].append(word)
|
||||||
|
elif isinstance(token, str):
|
||||||
|
output[index].append(token)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown token type: {type(token)}")
|
||||||
|
|
||||||
|
for line_number, line in enumerate(output):
|
||||||
|
output[line_number] = ' '.join(line)
|
||||||
|
|
||||||
|
return '\n'.join(output)
|
||||||
|
|
||||||
|
|
||||||
|
def get_macros(input, child=False):
|
||||||
|
""" Turn a macros string into a list of tuples of macros """
|
||||||
|
response = {}
|
||||||
|
|
||||||
|
# turn input into list of tuples
|
||||||
|
macros = [re.split('[\t]', x) for x in input.split('\n')]
|
||||||
|
|
||||||
|
# check if keyword is `source`, get macros from sourced file if it is
|
||||||
|
for index, macro in enumerate(macros):
|
||||||
|
if macro[0] == "source":
|
||||||
|
with open(macro[1]) as file:
|
||||||
|
macros += get_macros(file.read(), child=True)
|
||||||
|
macros[index] = ()
|
||||||
|
|
||||||
|
if child:
|
||||||
|
return macros
|
||||||
|
|
||||||
|
# store macros as dict and return
|
||||||
|
for index, macro in enumerate(macros):
|
||||||
|
if len(macro) >= 2:
|
||||||
|
response[macro[0].lower()] = macro[1:]
|
||||||
|
return response
|
||||||
|
|
||||||
|
def is_otf_macro_start(token, line):
|
||||||
|
""" Returns true if token is the start of an on the fly macro """
|
||||||
|
match = re.search(r'^\.[A-Za-z0-9]+$', token)
|
||||||
|
if match is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# don't return true you can't find an end token in the line
|
||||||
|
for line_token in line:
|
||||||
|
if is_otf_macro_end(line_token):
|
||||||
|
return match is not None
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_otf_macro_end(token):
|
||||||
|
""" Returns true if token is the end of an on the fly macro """
|
||||||
|
match = re.search(r'(\.,|,\.)', f"{token}")
|
||||||
|
return match is not None
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
""" Entry point for script """
|
||||||
|
|
||||||
|
# get macros
|
||||||
|
|
||||||
|
with open(args.macros_file) as file:
|
||||||
|
macros = get_macros(file.read())
|
||||||
|
|
||||||
|
# get tokens (file contents)
|
||||||
|
if args.input == "-":
|
||||||
|
input = sys.stdin.read()
|
||||||
|
else:
|
||||||
|
with open(args.input) as file:
|
||||||
|
input = file.read()
|
||||||
|
|
||||||
|
if args.output == "-":
|
||||||
|
return print(process(input, macros))
|
||||||
|
else:
|
||||||
|
with open(args.output, 'w+') as file:
|
||||||
|
return file.write(process(input, macros))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
sys.exit(main(get_args()))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
24
readme.md
Normal file
24
readme.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# pymacro
|
||||||
|
|
||||||
|
A python implementation of [otfmacros](https://github.com/alvierahman90/otfmacros).
|
||||||
|
|
||||||
|
## usage
|
||||||
|
```
|
||||||
|
$ ./pymacro -h
|
||||||
|
usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT]
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-m MACROS_FILE, --macros-file MACROS_FILE
|
||||||
|
File where macros are stored (default: macros)
|
||||||
|
-i INPUT, --input INPUT
|
||||||
|
File to be processed. (default: -)
|
||||||
|
-o OUTPUT, --output OUTPUT
|
||||||
|
Path of output (default: -)
|
||||||
|
```
|
||||||
|
|
||||||
|
## testing
|
||||||
|
|
||||||
|
Run `test.sh`.
|
||||||
|
A `diff` is run on the actual output against what should have come out according
|
||||||
|
to the spec.
|
3
test.sh
Executable file
3
test.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#/usr/bin/env sh
|
||||||
|
cat tests/test_input | ./pymacro > tests/test_actual_output
|
||||||
|
diff tests/test_actual_output tests/test_expected_output
|
12
tests/test_expected_output
Normal file
12
tests/test_expected_output
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Hydrocarbons are composed of exclusively hydrogen and carbon.
|
||||||
|
|
||||||
|
Chlorophyll is the site of photosynthesis.
|
||||||
|
|
||||||
|
😉
|
||||||
|
|
||||||
|
1 hydrocarbon 2 hydrocarbons
|
||||||
|
1 dress 2 dresses
|
||||||
|
1 story 2 stories
|
||||||
|
1 hero 2 heroes
|
||||||
|
1 leaf 2 leaves
|
||||||
|
1 man 2 men
|
21
tests/test_input
Normal file
21
tests/test_input
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
.Hcs are composed of exclusively .hy and .ca.
|
||||||
|
|
||||||
|
.Chl is the site of .ps.
|
||||||
|
|
||||||
|
.wink
|
||||||
|
|
||||||
|
1 .hc 2 .hcs
|
||||||
|
1 .dr 2 .drs
|
||||||
|
1 .st 2 .sts
|
||||||
|
1 .he 2 .hes
|
||||||
|
1 .le 2 .les
|
||||||
|
1 .ma 2 .mas
|
||||||
|
|
||||||
|
This is a test of .otfm on the fly macro.,s!
|
||||||
|
|
||||||
|
If this sentence makes sense, then the test of .otfms worked!
|
||||||
|
|
||||||
|
.otfms can also be overwritten,
|
||||||
|
you could make it equal .otfm on the fly monkey.,s!
|
||||||
|
|
||||||
|
They're not just any monkeys, they're .otfms!
|
2
tests/test_macros_biology
Normal file
2
tests/test_macros_biology
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.chl chlorophyll
|
||||||
|
.ps photosynthesis
|
1
tests/test_macros_custom_plurals
Normal file
1
tests/test_macros_custom_plurals
Normal file
@ -0,0 +1 @@
|
|||||||
|
.l louse lice
|
6
tests/test_macros_plural
Normal file
6
tests/test_macros_plural
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
.hc hydrocarbon
|
||||||
|
.dr dress
|
||||||
|
.st story
|
||||||
|
.he hero
|
||||||
|
.le leaf
|
||||||
|
.ma man men
|
Loading…
Reference in New Issue
Block a user