gronk/notes2web.py

277 lines
7.8 KiB
Python
Raw Normal View History

2021-06-29 13:35:21 +00:00
#!/usr/bin/env python3
2024-01-02 18:22:15 +00:00
"""
notes2web --- view your notes as a static html site
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
import argparse
2021-06-29 13:35:21 +00:00
import os
2024-01-02 18:22:15 +00:00
from pathlib import Path
import shutil
import sys
import pprint
2021-08-19 13:43:42 +00:00
import json
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
import frontmatter
import git
import jinja2
import requests
from fileproperties import FileMap
2021-06-29 13:35:21 +00:00
N2W_COMMIT = ""
2024-01-02 18:22:15 +00:00
PANDOC_SERVER_URL = os.getenv("PANDOC_SERVER_URL", r"http://localhost:3030/")
PANDOC_TIMEOUT = int(os.getenv("PANDOC_TIMEOUT", "120"))
CSS_DIR = Path(os.getenv("CSS_DIR", "/opt/notes2web/css"))
JS_DIR = Path(os.getenv("JS_DIR", "/opt/notes2web/js"))
TEMPLATES_DIR = Path(os.getenv("TEMPLATES_DIR", "/opt/notes2web/templates"))
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
JINJA_ENV = jinja2.Environment(
loader=jinja2.PackageLoader("notes2web", str(TEMPLATES_DIR)),
autoescape=jinja2.select_autoescape
)
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
JINJA_TEMPLATES = {}
JINJA_TEMPLATE_TEXTARTICLE = JINJA_ENV.get_template("textarticle.html")
JINJA_TEMPLATE_HOME_INDEX = JINJA_ENV.get_template("home_index.html")
JINJA_TEMPLATE_DIRECTORY_INDEX = JINJA_ENV.get_template("index.html")
JINJA_TEMPLATE_ARTICLE = JINJA_ENV.get_template("article.html")
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
LICENSE = None
GIT_REPO = None
FILEMAP = None
2021-09-01 13:08:24 +00:00
2024-01-02 18:22:15 +00:00
def update_required(src_filepath, output_filepath):
"""
check if file requires an update,
return boolean
"""
return not output_filepath.exists() or src_filepath.stat().st_mtime > output_filepath.stat().st_mtimeme()
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
def get_args():
""" Get command line arguments """
2024-01-02 18:22:15 +00:00
parser = argparse.ArgumentParser()
parser.add_argument('notes', type=Path)
parser.add_argument('-o', '--output-dir', type=Path, default='web')
parser.add_argument('-F', '--force', action="store_true",
help="Generate new output html even if source file was modified before output html")
return parser.parse_args()
2024-01-02 18:22:15 +00:00
def render_markdown_file(input_filepath):
"""
render markdown file to file
write markdown file to args.output_dir in html,
return list of tuple of output filepath, frontmatter post
"""
print(f"render_markdown_file({input_filepath})")
with open(input_filepath, encoding='utf-8') as file_pointer:
content = frontmatter.load(file_pointer).content
2024-01-02 18:22:15 +00:00
properties = FILEMAP.get(input_filepath)
2021-10-19 19:19:54 +00:00
2024-01-02 18:22:15 +00:00
# TODO pandoc no longer handles template due to metadata passing issues, use jinja to fill in the metadata
html = render_markdown(content)
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['html'], 'w+', encoding='utf-8') as file_pointer:
file_pointer.write(html)
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
def render_plaintext_file(input_filepath):
"""
render plaintext file to file
copy plaintext file into a html preview, copy raw to output dir
return list of tuple of output filepath, empty dict
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
with open(input_filepath, encoding='utf-8') as file_pointer:
raw_content = file_pointer.read()
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
properties = FILEMAP.get(input_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
html = JINJA_TEMPLATE_TEXTARTICLE.render(license = LICENSE, **properties)
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['raw'], "w+", encoding='utf-8') as file_pointer:
file_pointer.write(raw_content)
2021-08-15 18:34:29 +00:00
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['html'], "w+", encoding='utf-8') as file_pointer:
file_pointer.write(html)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_generic_file(input_filepath):
"""
render generic file to file
copy generic file into to output_dir
return list of tuple of output filepath, empty dict
"""
properties = FILEMAP.get(input_filepath)
output_filepath = properties['dst_path']['raw']
shutil.copyfile(input_filepath, output_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_file(input_filepath):
"""
render any file by detecting type and applying appropriate type
write input_filepath to correct file in args.output_dir in appropriate formats,
return list of tuples of output filepath, frontmatter post
"""
if input_filepath.suffix == '.md':
return render_markdown_file(input_filepath)
if FileMap.is_plaintext(input_filepath):
return render_plaintext_file(input_filepath)
return render_generic_file(input_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_markdown(content):
"""
render markdown to html
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
post_body = {
'text': content,
'toc-depth': 6,
'highlight-style': 'pygments',
'html-math-method': 'mathml',
'to': 'html',
'files': {
'data/data/abbreviations': '',
},
'standalone': False,
}
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
headers = {
'Accept': 'application/json'
}
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
response = requests.post(
PANDOC_SERVER_URL,
headers=headers,
json=post_body,
timeout=PANDOC_TIMEOUT
)
response = response.json()
# TODO look at response['messages'] and log them maybe?
# https://github.com/jgm/pandoc/blob/main/doc/pandoc-server.md#response
return response['output']
def process_home_index(output_dir, search_data, notes_git_head_sha1=None):
"""
create home index.html in output_dir
"""
html = JINJA_TEMPLATE_HOME_INDEX.render(
n2w_commit = N2W_COMMIT,
search_data=search_data,
notes_git_head_sha1=notes_git_head_sha1,
)
with open(output_dir.joinpath('index.html'), 'w+', encoding='utf-8') as file_pointer:
file_pointer.write(html)
def generate_variable_browser(output_dir, posts, variable_name) :
"""
generate a directory that lets you groub by and browse by any given tag. e.g. tags, authors
"""
groups = {}
for key, post in posts.iter():
group_val = post.get(variable_name, None)
if group_val is None:
continue
if group_val not in groups.keys():
groups[group_val] = []
groups[group_val].append(post)
for group_val, index_entries in groups.iter():
post = {
'index_entries': index_entries,
'title': group_val,
}
# TODO finish writing function, write page to disk
def main(args):
""" Entry point for script """
global LICENSE
global GIT_REPO
global FILEMAP
FILEMAP = FileMap(args.notes, args.output_dir.joinpath('notes'))
2021-08-24 13:39:30 +00:00
if args.output_dir.is_file():
2021-08-24 12:52:47 +00:00
print(f"Output directory ({args.output_dir}) cannot be a file.")
2021-06-29 13:35:21 +00:00
2021-08-24 12:52:47 +00:00
args.output_dir.mkdir(parents=True, exist_ok=True)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
# attempt to get licensing information
2021-09-01 17:26:18 +00:00
license_path = args.notes.joinpath("LICENSE")
if license_path.exists():
2024-01-02 18:22:15 +00:00
with open(license_path, encoding='utf-8') as file_pointer:
LICENSE = file_pointer.read()
# create git.Repo object if notes dir is a git repo
# TODO git commit log integration
if '.git' in args.notes.iterdir():
GIT_REPO = git.Repo(args.notes)
for root_str, subdirectories, files in os.walk(args.notes):
root = Path(root_str)
if '.git' in root.parts:
continue
2021-08-24 12:52:47 +00:00
2024-01-02 18:22:15 +00:00
root_properties = FILEMAP.get(root)
root_properties['dst_path']['raw'].mkdir(parents=True, exist_ok=True)
pprint.pprint(root_properties)
print(JINJA_TEMPLATE_DIRECTORY_INDEX)
html = JINJA_TEMPLATE_DIRECTORY_INDEX.render(**root_properties)
with open(root_properties['dst_path']['raw'].joinpath('index.html'), 'w+', encoding='utf-8') as file_pointer:
file_pointer.write(html)
# render each file
for file in files:
render_file(root.joinpath(file))
process_home_index(args.output_dir, search_data=FILEMAP.to_list())
# copy styling and js scripts necessary for function
shutil.copytree(CSS_DIR, args.output_dir.joinpath('css'), dirs_exist_ok=True)
shutil.copytree(JS_DIR, args.output_dir.joinpath('js'), dirs_exist_ok=True)
2021-06-29 13:35:21 +00:00
return 0
2024-01-02 18:22:15 +00:00
# TODO implement useful logging and debug printing
# TODO build tag/metadata pages
2021-06-29 13:35:21 +00:00
if __name__ == '__main__':
try:
sys.exit(main(get_args()))
except KeyboardInterrupt:
sys.exit(0)