mirror of
https://github.com/alvierahman90/gronk.git
synced 2024-11-21 23:09:52 +00:00
641 lines
20 KiB
Python
Executable File
641 lines
20 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
gronk --- view your notes as a static html site
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
from pathlib import Path
|
|
import shutil
|
|
import sys
|
|
import subprocess
|
|
import copy
|
|
import time
|
|
import magic
|
|
import regex as re
|
|
import pprint
|
|
from datetime import datetime as dt
|
|
|
|
import frontmatter
|
|
import jinja2
|
|
import requests
|
|
|
|
GRONK_COMMIT = "dev"
|
|
|
|
PANDOC_SERVER_URL = os.getenv("PANDOC_SERVER_URL", r"http://localhost:3030/")
|
|
PANDOC_TIMEOUT = int(os.getenv("PANDOC_TIMEOUT", "120"))
|
|
GRONK_CSS_DIR = Path(os.getenv("GRONK_CSS_DIR", "/opt/gronk/css"))
|
|
GRONK_JS_DIR = Path(os.getenv("GRONK_JS_DIR", "/opt/gronk/js"))
|
|
GRONK_TEMPLATES_DIR = Path(
|
|
os.getenv("GRONK_TEMPLATES_DIR", "/opt/gronk/templates/"))
|
|
|
|
JINJA_ENV = jinja2.Environment(
|
|
loader=jinja2.FileSystemLoader(searchpath=GRONK_TEMPLATES_DIR),
|
|
autoescape=jinja2.select_autoescape)
|
|
|
|
JINJA_TEMPLATE_TEXTARTICLE = JINJA_ENV.get_template("article-text.html")
|
|
JINJA_TEMPLATE_HOME_INDEX = JINJA_ENV.get_template("home.html")
|
|
JINJA_TEMPLATE_INDEX = JINJA_ENV.get_template("index.html")
|
|
JINJA_TEMPLATE_ARTICLE = JINJA_ENV.get_template("article.html")
|
|
JINJA_TEMPLATE_PERMALINK = JINJA_ENV.get_template("permalink.html")
|
|
|
|
JINJA_TEMPLATE_BLOGINDEX = JINJA_ENV.get_template("blog_index.html")
|
|
JINJA_TEMPLATE_BLOG_INLINE_POST = JINJA_ENV.get_template("blog_inline_post.html")
|
|
JINJA_TEMPLATE_BLOG_FEED = JINJA_ENV.get_template("rss.xml")
|
|
|
|
LICENSE = None
|
|
FILEMAP = None
|
|
|
|
|
|
class FileMap:
|
|
"""
|
|
this class is used to read file properties, inherit properties,
|
|
and have a centralised place to access them
|
|
"""
|
|
|
|
def __init__(self, input_dir, output_dir):
|
|
self._map = {}
|
|
self.input_dir = Path(input_dir)
|
|
self.output_dir = Path(output_dir)
|
|
|
|
def get_base_url(self):
|
|
props = self.get(self.input_dir.joinpath('readme.md'))
|
|
return props['base_url']
|
|
|
|
@staticmethod
|
|
def _path_to_key(path):
|
|
return str(path)
|
|
|
|
@staticmethod
|
|
def is_plaintext(filename):
|
|
return re.match(r'^text/', magic.from_file(str(filename),
|
|
mime=True)) is not None
|
|
|
|
def add(self, filepath):
|
|
filepath = Path(filepath)
|
|
if filepath.is_dir():
|
|
properties = self._get_directory_properties(filepath)
|
|
else:
|
|
properties = self._get_file_properties(filepath)
|
|
|
|
properties['src_path'] = filepath
|
|
properties['dst_path'] = self._get_output_filepath(filepath)
|
|
|
|
self._map[self._path_to_key(filepath)] = properties
|
|
|
|
def get(self, filepath, default=None, raw=False):
|
|
"""
|
|
get the properties of a file at a filepath
|
|
raw=True to not inherit properties
|
|
"""
|
|
# TODO maybe store properties of a file once it's in built and mark it
|
|
# as built? might save time but also cba
|
|
if self._path_to_key(filepath) not in self._map.keys():
|
|
self.add(filepath)
|
|
|
|
properties = copy.deepcopy(
|
|
self._map.get(self._path_to_key(filepath), default))
|
|
|
|
if raw:
|
|
return properties
|
|
|
|
parent = filepath
|
|
while True:
|
|
parent = parent.parent
|
|
if parent == Path('.'):
|
|
break
|
|
|
|
parent_properties = self.get(parent, raw=True)
|
|
# TODO inherit any property that isn't defined, append any lists
|
|
# that exist
|
|
properties['tags'] = properties.get(
|
|
'tags', []) + parent_properties.get('tags', [])
|
|
|
|
if parent == self.input_dir:
|
|
break
|
|
|
|
return properties
|
|
|
|
def _get_directory_properties(self,
|
|
filepath: Path,
|
|
include_index_entries=True):
|
|
post = {
|
|
'title': filepath.name,
|
|
'blog': False,
|
|
'content_after_search': None,
|
|
'automatic_index': True,
|
|
'search_bar': True,
|
|
'tags': [],
|
|
}
|
|
|
|
if 'readme.md' in [f.name for f in filepath.iterdir()]:
|
|
with open(filepath.joinpath('readme.md'),
|
|
encoding='utf-8') as file_pointer:
|
|
for key, val in frontmatter.load(
|
|
file_pointer).to_dict().items():
|
|
post[key] = val
|
|
|
|
if post['content_after_search'] is None:
|
|
post['content_after_search'] = post['blog']
|
|
|
|
if 'content' in post.keys():
|
|
post['content'] = render_markdown(post['content'])
|
|
|
|
post['is_dir'] = True
|
|
|
|
if include_index_entries:
|
|
post['index_entries'] = self._get_index_entries(filepath)
|
|
|
|
return post
|
|
|
|
def _get_index_entries(self, filepath):
|
|
entries = []
|
|
|
|
for path in filepath.iterdir():
|
|
if '.git' in path.parts:
|
|
continue
|
|
|
|
if 'readme.md' == path.name:
|
|
continue
|
|
|
|
if path.is_dir():
|
|
entry = self._get_directory_properties(
|
|
path, include_index_entries=False)
|
|
else:
|
|
entry = self._get_file_properties(path)
|
|
|
|
entry['path'] = self._get_output_filepath(path)['web']
|
|
entries.append(entry)
|
|
|
|
entries.sort(key=lambda entry: str(entry.get('title', '')).lower())
|
|
entries.sort(key=lambda entry: entry['is_dir'], reverse=True)
|
|
|
|
return entries
|
|
|
|
def _get_file_properties(self, filepath):
|
|
post = {'title': filepath.name, 'pub_date': False}
|
|
|
|
if filepath.suffix == '.md':
|
|
with open(filepath, encoding='utf-8') as file_pointer:
|
|
post = frontmatter.load(file_pointer).to_dict()
|
|
|
|
# don't store file contents in memory
|
|
if 'content' in post.keys():
|
|
del post['content']
|
|
post['is_dir'] = False
|
|
|
|
return post
|
|
|
|
def _get_output_filepath(self, input_filepath):
|
|
|
|
def webpath(filepath):
|
|
return Path('/notes').joinpath(
|
|
filepath.relative_to(self.output_dir))
|
|
|
|
r = {}
|
|
r['raw'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(self.input_dir))
|
|
r['web'] = webpath(r['raw'])
|
|
|
|
if input_filepath.is_dir():
|
|
return r
|
|
|
|
if input_filepath.suffix == '.md':
|
|
r['html'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(
|
|
self.input_dir)).with_suffix('.html')
|
|
r['web'] = webpath(r['html'])
|
|
|
|
elif self.is_plaintext(input_filepath):
|
|
r['html'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(
|
|
self.input_dir)).with_suffix(input_filepath.suffix +
|
|
'.html')
|
|
r['raw'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(self.input_dir))
|
|
r['web'] = webpath(r['html'])
|
|
r['web_raw'] = webpath(r['raw'])
|
|
|
|
return r
|
|
|
|
def to_list(self):
|
|
return [val for _, val in self._map.items()]
|
|
|
|
def to_search_data(self):
|
|
"""
|
|
returns list of every file in map
|
|
"""
|
|
r = []
|
|
for _, val in self._map.items():
|
|
r.append({
|
|
'title': val.get('title', ''),
|
|
'tags': val.get('tags', []),
|
|
'path': str(val['dst_path']['web']),
|
|
'is_dir': val['is_dir']
|
|
})
|
|
|
|
return r
|
|
|
|
def get_uuid_map(self):
|
|
d = {}
|
|
for _, val in self._map.items():
|
|
if 'uuid' not in val.keys():
|
|
continue
|
|
d[val['uuid']] = str(val['dst_path']['web'])
|
|
|
|
return d
|
|
|
|
|
|
def rfc822_date_sorter_key(date):
|
|
if date is None:
|
|
ret = 0
|
|
else:
|
|
ret = int(dt.strptime(date, '%a, %d %b %Y %H:%M:%S %z').timestamp())
|
|
|
|
return ret
|
|
|
|
|
|
def update_required(src_filepath, output_filepath):
|
|
"""
|
|
check if file requires an update,
|
|
return boolean
|
|
"""
|
|
return not output_filepath.exists() or src_filepath.stat(
|
|
).st_mtime > output_filepath.stat().st_mtimeme()
|
|
|
|
|
|
def get_args():
|
|
""" Get command line arguments """
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('notes', type=Path)
|
|
parser.add_argument('-o', '--output-dir', type=Path, default='web')
|
|
parser.add_argument(
|
|
'-F',
|
|
'--force',
|
|
action="store_true",
|
|
help=
|
|
"Generate new output html even if source file was modified before output html"
|
|
)
|
|
return parser.parse_args()
|
|
|
|
def render_inline_blog_post(input_filepath):
|
|
"""
|
|
render markdown file as blog post for inlinining into blog index
|
|
returns html
|
|
"""
|
|
with open(input_filepath, encoding='utf-8') as file_pointer:
|
|
content = frontmatter.load(file_pointer).content
|
|
|
|
properties = FILEMAP.get(input_filepath)
|
|
|
|
html = render_markdown(content)
|
|
html = JINJA_TEMPLATE_BLOG_INLINE_POST.render(
|
|
license=LICENSE,
|
|
content=html,
|
|
lecture_slides=properties.get("lecture_slides"),
|
|
lecture_notes=properties.get("lecture_notes"),
|
|
uuid=properties.get("uuid"),
|
|
tags=properties.get("tags"),
|
|
author=properties.get("author"),
|
|
title=properties.get("title"),
|
|
published=properties.get("pub_date"),
|
|
base_url=FILEMAP.get_base_url(),
|
|
)
|
|
|
|
properties['dst_path']['html'].write_text(html)
|
|
|
|
return html
|
|
|
|
|
|
def render_markdown_file(input_filepath):
|
|
"""
|
|
render markdown file to file
|
|
write markdown file to args.output_dir in html,
|
|
return list of tuple of output filepath, frontmatter post
|
|
"""
|
|
with open(input_filepath, encoding='utf-8') as file_pointer:
|
|
content = frontmatter.load(file_pointer).content
|
|
|
|
properties = FILEMAP.get(input_filepath)
|
|
|
|
html = render_markdown(content)
|
|
html = JINJA_TEMPLATE_ARTICLE.render(
|
|
license=LICENSE,
|
|
content=html,
|
|
lecture_slides=properties.get("lecture_slides"),
|
|
lecture_notes=properties.get("lecture_notes"),
|
|
uuid=properties.get("uuid"),
|
|
tags=properties.get("tags"),
|
|
author=properties.get("author"),
|
|
title=properties.get("title"),
|
|
published=properties.get("pub_date")
|
|
)
|
|
|
|
properties['dst_path']['html'].write_text(html)
|
|
|
|
|
|
def render_plaintext_file(input_filepath):
|
|
"""
|
|
render plaintext file to file
|
|
copy plaintext file into a html preview, copy raw to output dir
|
|
return list of tuple of output filepath, empty dict
|
|
"""
|
|
|
|
raw_content = input_filepath.read_text()
|
|
properties = FILEMAP.get(input_filepath)
|
|
html = JINJA_TEMPLATE_TEXTARTICLE.render(
|
|
license=LICENSE,
|
|
**properties,
|
|
raw_link=properties['dst_path']['web_raw'],
|
|
raw_content=raw_content)
|
|
properties['dst_path']['raw'].write_text(raw_content)
|
|
properties['dst_path']['html'].write_text(html)
|
|
|
|
|
|
def render_generic_file(input_filepath):
|
|
"""
|
|
render generic file to file
|
|
copy generic file into to output_dir
|
|
return list of tuple of output filepath, empty dict
|
|
"""
|
|
properties = FILEMAP.get(input_filepath)
|
|
output_filepath = properties['dst_path']['raw']
|
|
shutil.copyfile(input_filepath, output_filepath)
|
|
|
|
|
|
def render_file(input_filepath):
|
|
"""
|
|
render any file by detecting type and applying appropriate type
|
|
write input_filepath to correct file in args.output_dir in appropriate formats,
|
|
return list of tuples of output filepath, frontmatter post
|
|
"""
|
|
|
|
if input_filepath.suffix == '.md':
|
|
return render_markdown_file(input_filepath)
|
|
|
|
if FileMap.is_plaintext(input_filepath):
|
|
return render_plaintext_file(input_filepath)
|
|
|
|
return render_generic_file(input_filepath)
|
|
|
|
|
|
def render_markdown(content):
|
|
"""
|
|
render markdown to html
|
|
"""
|
|
|
|
post_body = {
|
|
'text': content,
|
|
'toc-depth': 6,
|
|
'highlight-style': 'pygments',
|
|
'html-math-method': 'mathml',
|
|
'to': 'html',
|
|
'files': {
|
|
'data/data/abbreviations': '',
|
|
},
|
|
'standalone': False,
|
|
}
|
|
|
|
headers = {'Accept': 'application/json'}
|
|
|
|
response = requests.post(PANDOC_SERVER_URL,
|
|
headers=headers,
|
|
json=post_body,
|
|
timeout=PANDOC_TIMEOUT)
|
|
|
|
response = response.json()
|
|
|
|
# TODO look at response['messages'] and log them maybe?
|
|
# https://github.com/jgm/pandoc/blob/main/doc/pandoc-server.md#response
|
|
|
|
return response['output']
|
|
|
|
|
|
def process_home_index(args, notes_git_head_sha1=None):
|
|
"""
|
|
create home index.html in output_dir
|
|
"""
|
|
|
|
post = {'title': 'gronk', 'content': ''}
|
|
custom_content_file = args.notes.joinpath('readme.md')
|
|
if custom_content_file.is_file():
|
|
fmpost = frontmatter.loads(custom_content_file.read_text()).to_dict()
|
|
for key, val in fmpost.items():
|
|
post[key] = val
|
|
|
|
post['content'] = render_markdown(post['content'])
|
|
|
|
html = JINJA_TEMPLATE_HOME_INDEX.render(
|
|
gronk_commit=GRONK_COMMIT,
|
|
search_data=FILEMAP.to_search_data(),
|
|
notes_git_head_sha1=notes_git_head_sha1,
|
|
post=post)
|
|
|
|
args.output_dir.joinpath('index.html').write_text(html)
|
|
|
|
|
|
def generate_permalink_page(output_dir):
|
|
"""
|
|
create the directory and index.html for redirecting permalinks
|
|
"""
|
|
|
|
dir = output_dir.joinpath('permalink')
|
|
dir.mkdir(exist_ok=True)
|
|
dir.joinpath('index.html').write_text(
|
|
JINJA_TEMPLATE_PERMALINK.render(title="redirecting... | gronk",
|
|
gronk_commit=GRONK_COMMIT,
|
|
data=FILEMAP.get_uuid_map()))
|
|
|
|
|
|
def generate_tag_browser(output_dir):
|
|
"""
|
|
generate a directory that lets you groub by and browse by any given tag. e.g. tags, authors
|
|
"""
|
|
tags = {}
|
|
|
|
for post in FILEMAP.to_list():
|
|
post['path'] = post['dst_path']['web']
|
|
|
|
if 'tags' not in post.keys():
|
|
continue
|
|
|
|
for tag in post['tags']:
|
|
if tag not in tags.keys():
|
|
tags[tag] = []
|
|
|
|
tags[tag].append(post)
|
|
|
|
for tag, index_entries in tags.items():
|
|
output_file = output_dir.joinpath(tag, 'index.html')
|
|
output_file.parent.mkdir(exist_ok=True, parents=True)
|
|
output_file.write_text(
|
|
JINJA_TEMPLATE_INDEX.render(
|
|
gronk_commit=GRONK_COMMIT,
|
|
automatic_index=True,
|
|
search_bar=True,
|
|
title=tag,
|
|
index_entries=[{
|
|
'title': entry.get('title', ''),
|
|
'is_dir': entry.get('is_dir', False),
|
|
'path': str(entry.get('path', Path(''))),
|
|
} for entry in index_entries],
|
|
))
|
|
|
|
output_file = output_dir.joinpath('index.html')
|
|
output_file.parent.mkdir(exist_ok=True, parents=True)
|
|
output_file.write_text(
|
|
JINJA_TEMPLATE_INDEX.render(automatic_index=True,
|
|
gronk_commit=GRONK_COMMIT,
|
|
search_bar=True,
|
|
title='tags',
|
|
index_entries=[{
|
|
'path': tag,
|
|
'title': tag,
|
|
'is_dir': False,
|
|
} for tag in tags.keys()]))
|
|
|
|
|
|
def main(args):
|
|
""" Entry point for script """
|
|
|
|
start_time = time.time()
|
|
|
|
global LICENSE
|
|
global FILEMAP
|
|
|
|
FILEMAP = FileMap(args.notes, args.output_dir.joinpath('notes'))
|
|
|
|
# TODO have some sort of 'site rebuild in progress - come back in a minute
|
|
# or two!' or auto checking/refreshing page for when site is being built
|
|
|
|
if args.output_dir.is_file():
|
|
print(f"Output directory ({args.output_dir}) cannot be a file.")
|
|
|
|
args.output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# attempt to get licensing information
|
|
license_path = args.notes.joinpath("LICENSE")
|
|
if license_path.exists():
|
|
LICENSE = license_path.read_text()
|
|
|
|
# TODO git commit log integration
|
|
|
|
for root_str, _, files in os.walk(args.notes):
|
|
root = Path(root_str)
|
|
if '.git' in root.parts:
|
|
continue
|
|
|
|
root_properties = FILEMAP.get(root)
|
|
root_properties['dst_path']['raw'].mkdir(parents=True, exist_ok=True)
|
|
|
|
posts = []
|
|
if root_properties['blog']:
|
|
for file in files:
|
|
props = FILEMAP.get(root.joinpath(file))
|
|
post = {
|
|
'title': props['title'],
|
|
'link': props['dst_path']['web'],
|
|
'pub_date': props.get('pub_date'),
|
|
'description': render_inline_blog_post(root.joinpath(file)),
|
|
}
|
|
posts.append(post)
|
|
|
|
posts.sort(
|
|
key=lambda p: rfc822_date_sorter_key(p.get('pub_date')),
|
|
reverse=True
|
|
)
|
|
|
|
# render rss feed
|
|
rss = JINJA_TEMPLATE_BLOG_FEED.render(
|
|
title=root_properties.get('title', ''),
|
|
description=root_properties.get('content', ''),
|
|
base_url=FILEMAP.get_base_url(),
|
|
link=f"{FILEMAP.get_base_url()}{root_properties['dst_path']['web']}",
|
|
language='en-GB',
|
|
posts=posts,
|
|
)
|
|
root_properties['dst_path']['raw'].joinpath('feed.xml').write_text(rss)
|
|
root_properties['dst_path']['raw'].joinpath('rss.xml').write_text(rss)
|
|
|
|
#pprint.pprint(root_properties)
|
|
# render index
|
|
html = (JINJA_TEMPLATE_BLOGINDEX if root_properties['blog'] else JINJA_TEMPLATE_INDEX).render(
|
|
gronk_commit=GRONK_COMMIT,
|
|
title=root_properties.get('title', ''),
|
|
content=root_properties.get('content', ''),
|
|
content_after_search=root_properties['content_after_search'],
|
|
automatic_index=root_properties['automatic_index'],
|
|
search_bar=root_properties['search_bar'],
|
|
posts=posts,
|
|
index_entries=[{
|
|
'title': entry.get('title', ''),
|
|
'is_dir': entry.get('is_dir', False),
|
|
'path': str(entry.get('path', Path(''))),
|
|
} for entry in root_properties.get('index_entries', '')],
|
|
)
|
|
root_properties['dst_path']['raw'].joinpath('index.html').write_text(html)
|
|
|
|
# render each file
|
|
for file in files:
|
|
render_file(root.joinpath(file))
|
|
|
|
process_home_index(args)
|
|
|
|
# copy styling and js scripts necessary for function
|
|
shutil.copytree(GRONK_CSS_DIR,
|
|
args.output_dir.joinpath('css'),
|
|
dirs_exist_ok=True)
|
|
shutil.copytree(GRONK_JS_DIR,
|
|
args.output_dir.joinpath('js'),
|
|
dirs_exist_ok=True)
|
|
|
|
generate_tag_browser(args.output_dir.joinpath('tags'))
|
|
generate_permalink_page(args.output_dir)
|
|
|
|
elapsed_time = time.time() - start_time
|
|
print(f"generated notes {elapsed_time=}")
|
|
|
|
return 0
|
|
|
|
|
|
def start_pandoc_server():
|
|
"""
|
|
attempt to get the version of pandoc server in a loop until it is
|
|
successful and return version as string
|
|
"""
|
|
start_time = time.time()
|
|
process = subprocess.Popen(["/usr/bin/pandoc-server"],
|
|
stdout=subprocess.PIPE)
|
|
version = None
|
|
|
|
while True:
|
|
try:
|
|
resp = requests.get(f"{PANDOC_SERVER_URL}/version")
|
|
version = resp.content.decode('utf-8')
|
|
break
|
|
except requests.ConnectionError:
|
|
time.sleep(0.1)
|
|
rc = process.poll()
|
|
if rc is not None:
|
|
print(f"PANDOC SERVER FAILED TO START: {rc=}")
|
|
print(process.stdout.read().decode("utf-8"))
|
|
raise Exception("Pandoc server failed to start")
|
|
|
|
elapsed_time = time.time() - start_time
|
|
print(f"pandoc-server started {version=} {elapsed_time=}")
|
|
return process
|
|
|
|
|
|
# TODO implement useful logging and debug printing
|
|
|
|
if __name__ == '__main__':
|
|
pandoc_process = start_pandoc_server()
|
|
|
|
try:
|
|
sys.exit(main(get_args()))
|
|
except KeyboardInterrupt:
|
|
sys.exit(0)
|
|
finally:
|
|
pandoc_process.kill()
|