This repository has been archived on 2024-01-02. You can view files and clone it, but cannot push or open issues or pull requests.
notes2web/notes2web.py

302 lines
8.6 KiB
Python
Raw Normal View History

2021-06-29 13:35:21 +00:00
#!/usr/bin/env python3
2024-01-02 18:22:15 +00:00
"""
notes2web --- view your notes as a static html site
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
import argparse
2021-06-29 13:35:21 +00:00
import os
2024-01-02 18:22:15 +00:00
from pathlib import Path
import shutil
import sys
import pprint
2021-08-19 13:43:42 +00:00
import json
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
import frontmatter
import git
import jinja2
import requests
from fileproperties import FileMap
2021-06-29 13:35:21 +00:00
2023-09-17 19:18:24 +00:00
N2W_COMMIT = "dev"
2024-01-02 18:22:15 +00:00
PANDOC_SERVER_URL = os.getenv("PANDOC_SERVER_URL", r"http://localhost:3030/")
PANDOC_TIMEOUT = int(os.getenv("PANDOC_TIMEOUT", "120"))
CSS_DIR = Path(os.getenv("CSS_DIR", "/opt/notes2web/css"))
JS_DIR = Path(os.getenv("JS_DIR", "/opt/notes2web/js"))
TEMPLATES_DIR = Path(os.getenv("TEMPLATES_DIR", "/opt/notes2web/templates"))
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
JINJA_ENV = jinja2.Environment(
loader=jinja2.PackageLoader("notes2web", str(TEMPLATES_DIR)),
autoescape=jinja2.select_autoescape
)
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
JINJA_TEMPLATES = {}
JINJA_TEMPLATE_TEXTARTICLE = JINJA_ENV.get_template("textarticle.html")
JINJA_TEMPLATE_HOME_INDEX = JINJA_ENV.get_template("home_index.html")
2023-09-17 19:18:24 +00:00
JINJA_TEMPLATE_INDEX = JINJA_ENV.get_template("index.html")
2024-01-02 18:22:15 +00:00
JINJA_TEMPLATE_ARTICLE = JINJA_ENV.get_template("article.html")
2022-11-11 12:06:46 +00:00
2024-01-02 18:22:15 +00:00
LICENSE = None
GIT_REPO = None
FILEMAP = None
2021-09-01 13:08:24 +00:00
2024-01-02 18:22:15 +00:00
def update_required(src_filepath, output_filepath):
"""
check if file requires an update,
return boolean
"""
return not output_filepath.exists() or src_filepath.stat().st_mtime > output_filepath.stat().st_mtimeme()
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
def get_args():
""" Get command line arguments """
2024-01-02 18:22:15 +00:00
parser = argparse.ArgumentParser()
parser.add_argument('notes', type=Path)
parser.add_argument('-o', '--output-dir', type=Path, default='web')
parser.add_argument('-F', '--force', action="store_true",
help="Generate new output html even if source file was modified before output html")
return parser.parse_args()
2024-01-02 18:22:15 +00:00
def render_markdown_file(input_filepath):
"""
render markdown file to file
write markdown file to args.output_dir in html,
return list of tuple of output filepath, frontmatter post
"""
print(f"render_markdown_file({input_filepath})")
with open(input_filepath, encoding='utf-8') as file_pointer:
content = frontmatter.load(file_pointer).content
2024-01-02 18:22:15 +00:00
properties = FILEMAP.get(input_filepath)
2021-10-19 19:19:54 +00:00
2024-01-02 18:22:15 +00:00
# TODO pandoc no longer handles template due to metadata passing issues, use jinja to fill in the metadata
html = render_markdown(content)
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['html'], 'w+', encoding='utf-8') as file_pointer:
file_pointer.write(html)
2021-08-21 01:14:12 +00:00
2024-01-02 18:22:15 +00:00
def render_plaintext_file(input_filepath):
"""
render plaintext file to file
copy plaintext file into a html preview, copy raw to output dir
return list of tuple of output filepath, empty dict
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
with open(input_filepath, encoding='utf-8') as file_pointer:
raw_content = file_pointer.read()
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
properties = FILEMAP.get(input_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
html = JINJA_TEMPLATE_TEXTARTICLE.render(license = LICENSE, **properties)
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['raw'], "w+", encoding='utf-8') as file_pointer:
file_pointer.write(raw_content)
2021-08-15 18:34:29 +00:00
2024-01-02 18:22:15 +00:00
with open(properties['dst_path']['html'], "w+", encoding='utf-8') as file_pointer:
file_pointer.write(html)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_generic_file(input_filepath):
"""
render generic file to file
copy generic file into to output_dir
return list of tuple of output filepath, empty dict
"""
properties = FILEMAP.get(input_filepath)
output_filepath = properties['dst_path']['raw']
shutil.copyfile(input_filepath, output_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_file(input_filepath):
"""
render any file by detecting type and applying appropriate type
write input_filepath to correct file in args.output_dir in appropriate formats,
return list of tuples of output filepath, frontmatter post
"""
if input_filepath.suffix == '.md':
return render_markdown_file(input_filepath)
if FileMap.is_plaintext(input_filepath):
return render_plaintext_file(input_filepath)
return render_generic_file(input_filepath)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
def render_markdown(content):
"""
render markdown to html
"""
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
post_body = {
'text': content,
'toc-depth': 6,
'highlight-style': 'pygments',
'html-math-method': 'mathml',
'to': 'html',
'files': {
'data/data/abbreviations': '',
},
'standalone': False,
}
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
headers = {
'Accept': 'application/json'
}
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
response = requests.post(
PANDOC_SERVER_URL,
headers=headers,
json=post_body,
timeout=PANDOC_TIMEOUT
)
response = response.json()
# TODO look at response['messages'] and log them maybe?
# https://github.com/jgm/pandoc/blob/main/doc/pandoc-server.md#response
return response['output']
2023-09-17 19:18:24 +00:00
def process_home_index(args, notes_git_head_sha1=None):
2024-01-02 18:22:15 +00:00
"""
create home index.html in output_dir
"""
2023-09-17 19:18:24 +00:00
post = {
'title': 'gronk',
'content': ''
}
custom_content_file = args.notes.joinpath('index.md')
print(f'{custom_content_file=}')
if custom_content_file.is_file():
fmpost = frontmatter.loads(custom_content_file.read_text()).to_dict()
for key, val in fmpost.items():
post[key] = val
post['content'] = render_markdown(post['content'])
2024-01-02 18:22:15 +00:00
html = JINJA_TEMPLATE_HOME_INDEX.render(
n2w_commit = N2W_COMMIT,
2023-09-17 19:18:24 +00:00
search_data = FILEMAP.to_search_data(),
notes_git_head_sha1 = notes_git_head_sha1,
post=post
2024-01-02 18:22:15 +00:00
)
2023-09-17 19:18:24 +00:00
args.output_dir.joinpath('index.html').write_text(html)
2024-01-02 18:22:15 +00:00
2023-09-17 19:18:24 +00:00
def generate_tag_browser(output_dir) :
2024-01-02 18:22:15 +00:00
"""
generate a directory that lets you groub by and browse by any given tag. e.g. tags, authors
"""
2023-09-17 19:18:24 +00:00
tags = {}
for post in FILEMAP.to_list():
post['path'] = post['dst_path']['web']
2024-01-02 18:22:15 +00:00
2023-09-17 19:18:24 +00:00
if 'tags' not in post.keys():
2024-01-02 18:22:15 +00:00
continue
2023-09-17 19:18:24 +00:00
for tag in post['tags']:
if tag not in tags.keys():
tags[tag] = []
2024-01-02 18:22:15 +00:00
2023-09-17 19:18:24 +00:00
tags[tag].append(post)
2024-01-02 18:22:15 +00:00
2023-09-17 19:18:24 +00:00
for tag, index_entries in tags.items():
output_file = output_dir.joinpath(tag, 'index.html')
output_file.parent.mkdir(exist_ok=True, parents=True)
output_file.write_text(JINJA_TEMPLATE_INDEX.render(
automatic_index=True,
search_bar=True,
title=tag,
index_entries = index_entries
))
2024-01-02 18:22:15 +00:00
2023-09-17 19:18:24 +00:00
output_file = output_dir.joinpath('index.html')
output_file.parent.mkdir(exist_ok=True, parents=True)
output_file.write_text(JINJA_TEMPLATE_INDEX.render(
automatic_index=True,
search_bar=True,
title='tags',
index_entries = [{ 'path': tag, 'title': tag, 'is_dir': False, } for tag in tags.keys()]
))
2024-01-02 18:22:15 +00:00
def main(args):
""" Entry point for script """
global LICENSE
global GIT_REPO
global FILEMAP
FILEMAP = FileMap(args.notes, args.output_dir.joinpath('notes'))
2021-08-24 13:39:30 +00:00
if args.output_dir.is_file():
2021-08-24 12:52:47 +00:00
print(f"Output directory ({args.output_dir}) cannot be a file.")
2021-06-29 13:35:21 +00:00
2021-08-24 12:52:47 +00:00
args.output_dir.mkdir(parents=True, exist_ok=True)
2021-06-29 13:35:21 +00:00
2024-01-02 18:22:15 +00:00
# attempt to get licensing information
2021-09-01 17:26:18 +00:00
license_path = args.notes.joinpath("LICENSE")
if license_path.exists():
2024-01-02 18:22:15 +00:00
with open(license_path, encoding='utf-8') as file_pointer:
LICENSE = file_pointer.read()
# create git.Repo object if notes dir is a git repo
# TODO git commit log integration
if '.git' in args.notes.iterdir():
GIT_REPO = git.Repo(args.notes)
for root_str, subdirectories, files in os.walk(args.notes):
root = Path(root_str)
if '.git' in root.parts:
continue
2021-08-24 12:52:47 +00:00
2024-01-02 18:22:15 +00:00
root_properties = FILEMAP.get(root)
root_properties['dst_path']['raw'].mkdir(parents=True, exist_ok=True)
2023-09-17 19:18:24 +00:00
#pprint.pprint(root_properties)
html = JINJA_TEMPLATE_INDEX.render(**root_properties)
2024-01-02 18:22:15 +00:00
with open(root_properties['dst_path']['raw'].joinpath('index.html'), 'w+', encoding='utf-8') as file_pointer:
file_pointer.write(html)
# render each file
for file in files:
render_file(root.joinpath(file))
2023-09-17 19:18:24 +00:00
process_home_index(args)
2024-01-02 18:22:15 +00:00
# copy styling and js scripts necessary for function
shutil.copytree(CSS_DIR, args.output_dir.joinpath('css'), dirs_exist_ok=True)
shutil.copytree(JS_DIR, args.output_dir.joinpath('js'), dirs_exist_ok=True)
2023-09-17 19:18:24 +00:00
generate_tag_browser(args.output_dir.joinpath('tags'))
2021-06-29 13:35:21 +00:00
return 0
2024-01-02 18:22:15 +00:00
# TODO implement useful logging and debug printing
2021-06-29 13:35:21 +00:00
if __name__ == '__main__':
try:
sys.exit(main(get_args()))
except KeyboardInterrupt:
sys.exit(0)