mirror of
https://github.com/alvierahman90/otfm-python.git
synced 2025-01-12 02:04:20 +00:00
first commit
This commit is contained in:
commit
5f21b2c1dc
9
Makefile
Normal file
9
Makefile
Normal file
@ -0,0 +1,9 @@
|
||||
include config.mk
|
||||
|
||||
install:
|
||||
mkdir -p ${DESTDIR}${PREFIX}/bin/
|
||||
cp -f otfm-python ${DESTDIR}${PREFIX}/bin/
|
||||
chmod 755 ${DESTDIR}${PREFIX}/bin/
|
||||
|
||||
uninstall:
|
||||
rm -f ${DESTDIR}${PREFIX}/bin/otfm-python
|
8
macros
Normal file
8
macros
Normal file
@ -0,0 +1,8 @@
|
||||
.hc hydrocarbon
|
||||
.hy hydrogen
|
||||
.ca carbon
|
||||
.ox oxygen
|
||||
.wink 😉
|
||||
source tests/test_macros_biology
|
||||
source tests/test_macros_custom_plurals
|
||||
source tests/test_macros_plural
|
254
otfm-python
Executable file
254
otfm-python
Executable file
@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
|
||||
class MultipleTokens():
|
||||
"""
|
||||
Used by process() to tell detokenize() that a macro adds extra tokens without modifying
|
||||
without changing the indexes of other tokens
|
||||
"""
|
||||
|
||||
def __init__(self, words):
|
||||
self.words = words
|
||||
|
||||
|
||||
def get_args():
|
||||
""" Get command line arguments """
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument("-m", "--macros-file", default="macros",
|
||||
help="File where macros are stored")
|
||||
parser.add_argument("-i", "--input", help="File to be processed.", default="-")
|
||||
parser.add_argument("-o", "--output", help="Path of output", default="-")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def pluralize(input):
|
||||
""" Returns the plural form of a word. """
|
||||
if isinstance(input, list):
|
||||
# use custom plural if defined
|
||||
if len(input) > 1:
|
||||
return input[1]
|
||||
|
||||
return pluralize_word(input[0])
|
||||
|
||||
return pluralize_word(input)
|
||||
|
||||
def pluralize_word(word):
|
||||
def is_vowel(letter):
|
||||
if not isinstance(letter, str):
|
||||
raise ValueError("Argument 'letter' must be type str")
|
||||
if len(letter) != 1:
|
||||
raise ValueError("Argument 'letter' must be 1 long")
|
||||
return letter in 'aeiou'
|
||||
|
||||
# TODO add more complex plural forms
|
||||
if word[-1] in 'sxz' or word[-2:] in ['ch', 'sh']:
|
||||
return word + 'es'
|
||||
if word[-1] == 'y':
|
||||
if not is_vowel(word[-2]):
|
||||
return word[:-1] + 'ies'
|
||||
if word[-1] == 'o':
|
||||
if not is_vowel(word[-2]):
|
||||
return word + 'es'
|
||||
if word[-1] == 'f':
|
||||
return word[:-1] + 'ves'
|
||||
return word + 's'
|
||||
|
||||
|
||||
def upper_check(token, word):
|
||||
""" Check if word needs to be capitalized and capitalise appropriately if that is the case. """
|
||||
all_caps = True
|
||||
|
||||
for letter in token:
|
||||
if letter.islower():
|
||||
all_caps = False
|
||||
break
|
||||
|
||||
if all_caps:
|
||||
return word.upper()
|
||||
|
||||
if len(token) > 1:
|
||||
if token[1].isupper():
|
||||
return word[:1].upper() + word[1:]
|
||||
|
||||
return word
|
||||
|
||||
|
||||
def process(input, macros):
|
||||
"""
|
||||
This function takes the string `input` and a dict, ` macros`.
|
||||
It substitutes any keys in `macro` with the corresponding value.
|
||||
It also checks for any otf macros defined in the string and appends them to `macros`,
|
||||
replacing that otf macro and any following instances of it.
|
||||
It returns the substituted string.
|
||||
"""
|
||||
tokens = tokenize(input)
|
||||
macros = macros
|
||||
|
||||
in_otf_macro = False
|
||||
tmp_macro_keyword = None
|
||||
tmp_macro_definition = []
|
||||
|
||||
for line_number, line in enumerate(tokens):
|
||||
for token_number, token in enumerate(line):
|
||||
if len(token) == 0:
|
||||
continue
|
||||
|
||||
# detect on the fly macros
|
||||
token_is_otf_macro_start = is_otf_macro_start(token, line)
|
||||
|
||||
# process otf macro tokens
|
||||
if token_is_otf_macro_start:
|
||||
tmp_macro_keyword = token
|
||||
in_otf_macro = True
|
||||
tmp_macro_definition = []
|
||||
tokens[line_number][token_number] = None
|
||||
continue
|
||||
elif in_otf_macro and is_otf_macro_end(token):
|
||||
split_token = re.split(r',.|.,', token)
|
||||
tmp_macro_definition.append(split_token[0])
|
||||
macros[tmp_macro_keyword] = ' '.join(tmp_macro_definition)
|
||||
token = tmp_macro_keyword + split_token[1]
|
||||
in_otf_macro = False
|
||||
# once the end of the macro has been found and stored, continue downn the for loop
|
||||
# so that it can be turned back to normal text
|
||||
elif in_otf_macro:
|
||||
tmp_macro_definition.append(token)
|
||||
tokens[line_number][token_number] = None
|
||||
continue
|
||||
|
||||
# cutting off the end and then adding it back once expanded
|
||||
# e.g. punctuation: from the token "hello...", end would be equal to "..."
|
||||
# and token would be equal to "hello"
|
||||
end = []
|
||||
token = list(token)
|
||||
for index, char in reversed(list(enumerate(token))):
|
||||
if not char.isalnum():
|
||||
end.insert(0, token.pop(index))
|
||||
else:
|
||||
break
|
||||
end = ''.join(end)
|
||||
token = ''.join(token)
|
||||
|
||||
# if no macro is found (or if it is not a macro at all, the value
|
||||
# will not be changed
|
||||
value = token
|
||||
|
||||
if token.lower() in macros.keys():
|
||||
value = macros[token.lower()][0]
|
||||
elif token.lower() in [f"{m}s" for m in macros.keys()]:
|
||||
value = pluralize(macros[token.lower()[:-1]])
|
||||
|
||||
tokens[line_number][token_number] = upper_check(token, value)
|
||||
tokens[line_number][token_number] += end
|
||||
|
||||
# filter out None tokens
|
||||
tokens[line_number] = [token for token in tokens[line_number] if token is not None]
|
||||
|
||||
return detokenize(tokens)
|
||||
|
||||
def tokenize(input):
|
||||
"""
|
||||
Returns a 2D list of tokens and a list of otf_macros.
|
||||
otf macro definitions are removed and just the keyword definition is kept as well as any
|
||||
punctuation on the final word.
|
||||
"""
|
||||
return [x.split(' ') for x in input.split('\n')]
|
||||
|
||||
|
||||
def detokenize(tokens):
|
||||
"""Turn a list of tokens into plaintext. """
|
||||
|
||||
output = []
|
||||
|
||||
for index, line in enumerate(tokens):
|
||||
output.append([])
|
||||
for token in line:
|
||||
if isinstance(token, MultipleTokens):
|
||||
for word in token.words:
|
||||
output[index].append(word)
|
||||
elif isinstance(token, str):
|
||||
output[index].append(token)
|
||||
else:
|
||||
raise ValueError(f"Unknown token type: {type(token)}")
|
||||
|
||||
for line_number, line in enumerate(output):
|
||||
output[line_number] = ' '.join(line)
|
||||
|
||||
return '\n'.join(output)
|
||||
|
||||
|
||||
def get_macros(input, child=False):
|
||||
""" Turn a macros string into a list of tuples of macros """
|
||||
response = {}
|
||||
|
||||
# turn input into list of tuples
|
||||
macros = [re.split('[\t]', x) for x in input.split('\n')]
|
||||
|
||||
# check if keyword is `source`, get macros from sourced file if it is
|
||||
for index, macro in enumerate(macros):
|
||||
if macro[0] == "source":
|
||||
with open(macro[1]) as file:
|
||||
macros += get_macros(file.read(), child=True)
|
||||
macros[index] = ()
|
||||
|
||||
if child:
|
||||
return macros
|
||||
|
||||
# store macros as dict and return
|
||||
for index, macro in enumerate(macros):
|
||||
if len(macro) >= 2:
|
||||
response[macro[0].lower()] = macro[1:]
|
||||
return response
|
||||
|
||||
def is_otf_macro_start(token, line):
|
||||
""" Returns true if token is the start of an on the fly macro """
|
||||
match = re.search(r'^\.[A-Za-z0-9]+$', token)
|
||||
if match is None:
|
||||
return False
|
||||
|
||||
# don't return true you can't find an end token in the line
|
||||
for line_token in line:
|
||||
if is_otf_macro_end(line_token):
|
||||
return match is not None
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_otf_macro_end(token):
|
||||
""" Returns true if token is the end of an on the fly macro """
|
||||
match = re.search(r'(\.,|,\.)', f"{token}")
|
||||
return match is not None
|
||||
|
||||
|
||||
def main(args):
|
||||
""" Entry point for script """
|
||||
|
||||
# get macros
|
||||
|
||||
with open(args.macros_file) as file:
|
||||
macros = get_macros(file.read())
|
||||
|
||||
# get tokens (file contents)
|
||||
if args.input == "-":
|
||||
input = sys.stdin.read()
|
||||
else:
|
||||
with open(args.input) as file:
|
||||
input = file.read()
|
||||
|
||||
if args.output == "-":
|
||||
return print(process(input, macros))
|
||||
else:
|
||||
with open(args.output, 'w+') as file:
|
||||
return file.write(process(input, macros))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
sys.exit(main(get_args()))
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
24
readme.md
Normal file
24
readme.md
Normal file
@ -0,0 +1,24 @@
|
||||
# pymacro
|
||||
|
||||
A python implementation of [otfmacros](https://github.com/alvierahman90/otfmacros).
|
||||
|
||||
## usage
|
||||
```
|
||||
$ ./pymacro -h
|
||||
usage: pymacro [-h] [-m MACROS_FILE] [-i INPUT] [-o OUTPUT]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-m MACROS_FILE, --macros-file MACROS_FILE
|
||||
File where macros are stored (default: macros)
|
||||
-i INPUT, --input INPUT
|
||||
File to be processed. (default: -)
|
||||
-o OUTPUT, --output OUTPUT
|
||||
Path of output (default: -)
|
||||
```
|
||||
|
||||
## testing
|
||||
|
||||
Run `test.sh`.
|
||||
A `diff` is run on the actual output against what should have come out according
|
||||
to the spec.
|
3
test.sh
Executable file
3
test.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#/usr/bin/env sh
|
||||
cat tests/test_input | ./pymacro > tests/test_actual_output
|
||||
diff tests/test_actual_output tests/test_expected_output
|
12
tests/test_expected_output
Normal file
12
tests/test_expected_output
Normal file
@ -0,0 +1,12 @@
|
||||
Hydrocarbons are composed of exclusively hydrogen and carbon.
|
||||
|
||||
Chlorophyll is the site of photosynthesis.
|
||||
|
||||
😉
|
||||
|
||||
1 hydrocarbon 2 hydrocarbons
|
||||
1 dress 2 dresses
|
||||
1 story 2 stories
|
||||
1 hero 2 heroes
|
||||
1 leaf 2 leaves
|
||||
1 man 2 men
|
21
tests/test_input
Normal file
21
tests/test_input
Normal file
@ -0,0 +1,21 @@
|
||||
.Hcs are composed of exclusively .hy and .ca.
|
||||
|
||||
.Chl is the site of .ps.
|
||||
|
||||
.wink
|
||||
|
||||
1 .hc 2 .hcs
|
||||
1 .dr 2 .drs
|
||||
1 .st 2 .sts
|
||||
1 .he 2 .hes
|
||||
1 .le 2 .les
|
||||
1 .ma 2 .mas
|
||||
|
||||
This is a test of .otfm on the fly macro.,s!
|
||||
|
||||
If this sentence makes sense, then the test of .otfms worked!
|
||||
|
||||
.otfms can also be overwritten,
|
||||
you could make it equal .otfm on the fly monkey.,s!
|
||||
|
||||
They're not just any monkeys, they're .otfms!
|
2
tests/test_macros_biology
Normal file
2
tests/test_macros_biology
Normal file
@ -0,0 +1,2 @@
|
||||
.chl chlorophyll
|
||||
.ps photosynthesis
|
1
tests/test_macros_custom_plurals
Normal file
1
tests/test_macros_custom_plurals
Normal file
@ -0,0 +1 @@
|
||||
.l louse lice
|
6
tests/test_macros_plural
Normal file
6
tests/test_macros_plural
Normal file
@ -0,0 +1,6 @@
|
||||
.hc hydrocarbon
|
||||
.dr dress
|
||||
.st story
|
||||
.he hero
|
||||
.le leaf
|
||||
.ma man men
|
Loading…
Reference in New Issue
Block a user