From 29529cfd6a669adb4b93b7ca40158463891828b7 Mon Sep 17 00:00:00 2001 From: Akbar Rahman Date: Tue, 2 Jan 2024 18:22:15 +0000 Subject: [PATCH] begin rewrite --- Makefile | 11 +- styles.css => css/styles.css | 1 + fileproperties.py | 184 +++++++++ fuse.js => js/fuse.js | 0 indexsearch.js => js/indexsearch.js | 0 permalink.js => js/permalink.js | 0 search.js => js/search.js | 18 +- toc_search.js => js/toc_search.js | 0 notes2web.py | 586 ++++++++++------------------ readme.md | 92 ++--- requirements.txt | 1 + templates/article.html | 63 ++- templates/extra_index_content.html | 4 - templates/home_index.html | 16 +- templates/index.html | 38 +- templates/indexfoot.html | 7 - templates/indexhead.html | 21 - templates/permalink_index.html | 4 +- templates/textarticle.html | 9 + templates/textarticlefoot.html | 3 - templates/textarticlehead.html | 34 -- 21 files changed, 539 insertions(+), 553 deletions(-) rename styles.css => css/styles.css (99%) create mode 100644 fileproperties.py rename fuse.js => js/fuse.js (100%) rename indexsearch.js => js/indexsearch.js (100%) rename permalink.js => js/permalink.js (100%) rename search.js => js/search.js (91%) rename toc_search.js => js/toc_search.js (100%) delete mode 100644 templates/extra_index_content.html delete mode 100644 templates/indexfoot.html delete mode 100644 templates/indexhead.html create mode 100644 templates/textarticle.html delete mode 100644 templates/textarticlefoot.html delete mode 100644 templates/textarticlehead.html diff --git a/Makefile b/Makefile index f35bc3f..6e37981 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,9 @@ install: cp n2w_add_uuid.py /usr/local/bin sed "s/N2W_COMMIT = \"\"/N2W_COMMIT = \"$$(git rev-parse --short HEAD)\"/" notes2web.py > /usr/local/bin/notes2web.py - pip3 install -r requirements.txt --break-system-packages mkdir -p /opt/notes2web - cp -r templates /opt/notes2web - cp styles.css /opt/notes2web - cp fuse.js /opt/notes2web - cp search.js /opt/notes2web - cp indexsearch.js /opt/notes2web - cp toc_search.js /opt/notes2web - cp permalink.js /opt/notes2web - chmod +x /usr/local/bin/notes2web.py + cp -r templates js css /opt/notes2web + pip3 install -r requirements.txt uninstall: rm -rf /usr/local/bin/notes2web.py /usr/local/bin/n2w_add_uuid.py /opt/notes2web diff --git a/styles.css b/css/styles.css similarity index 99% rename from styles.css rename to css/styles.css index b854a97..5f629c6 100644 --- a/styles.css +++ b/css/styles.css @@ -3,6 +3,7 @@ @import url("https://styles.alv.cx/modules/search.css"); @import url("https://styles.alv.cx/modules/buttonlist.css"); @import url("https://styles.alv.cx/modules/darkmode.css"); +@import url("/notes/styles.css"); html { scroll-behavior: smooth; diff --git a/fileproperties.py b/fileproperties.py new file mode 100644 index 0000000..effa137 --- /dev/null +++ b/fileproperties.py @@ -0,0 +1,184 @@ +from pathlib import Path +import frontmatter +import copy +import magic +import regex as re + + +class FileMap: + """ + this class is used to read file properties, inherit properties, and have a centralised place to access them + """ + def __init__(self, input_dir, output_dir): + self._map = {} + self.input_dir = Path(input_dir) + self.output_dir = Path(output_dir) + + @staticmethod + def _path_to_key(path): + return str(Path(path)) + + def get(self, filepath, default=None, raw=False): + """ + get the properties of a file at a filepath + raw=True to not inherit properties + """ + #print(f"FileMap.get({filepath=}, {default=}, {raw=})") + # TODO maybe store properties of a file once it's in built and mark it as built? might save time but also cba + if self._path_to_key(filepath) not in self._map.keys(): + self.add(filepath) + + properties = copy.deepcopy(self._map.get(self._path_to_key(filepath), default)) + #print(f"FileMap.get({filepath=}, {default=}, {raw=}): {properties=}") + + if raw: + return properties + + parent = filepath + while True: + parent = parent.parent + if parent == Path('.'): + break + + parent_properties = self.get(parent, raw=True) + # TODO inherit any property that isn't defined, append any lists that exist + properties['tags'] = properties.get('tags', []) + parent_properties.get('tags', []) + + if parent == self.input_dir: + break + + return properties + + def add(self, filepath): + filepath = Path(filepath) + #print(f"FileMap.add({filepath=}") + if filepath.is_dir(): + properties = self._get_directory_properties(filepath) + else: + properties = self._get_file_properties(filepath) + + properties['src_path'] = filepath + properties['dst_path'] = self._get_output_filepath(filepath) + + self._map[self._path_to_key(filepath)] = properties + + + def _get_directory_properties(self, filepath: Path, include_index_entries=True): + """ + return dict of directory properties to be used in pandoc template + """ + + post = { + 'title': filepath.name, + 'content_after_search': False, + 'automatic_index': True, + 'search_bar': True, + 'tags': [], + } + + if 'index.md' in filepath.iterdir(): + with open(filepath.joinpath('index.md'), encoding='utf-8') as file_pointer: + for key, val in frontmatter.load(file_pointer).to_dict(): + post[key] = val + + post['is_dir'] = True + + if include_index_entries: + post['index_entries'] = self._get_index_entries(filepath) + + return post + + + def _get_index_entries(self, filepath): + """ + return sorted list of index entries. alphabetically sorted, folders first + """ + entries = [] + + for path in filepath.iterdir(): + print(f'{path=}') + if path.is_dir(): + entry = self._get_directory_properties(path, include_index_entries=False) + else: + entry = self._get_file_properties(path) + + entry['path'] = self._get_output_filepath(path)['web'] + entries.append(entry) + #print(f"FileMap._get_index_entries({filepath=}): {entry=}") + + + entries.sort(key=lambda entry: str(entry['title']).lower()) + entries.sort(key=lambda entry: entry['is_dir'], reverse=True) + + return entries + + def _get_file_properties(self, filepath): + #print(f"FileMap._get_file_properties({filepath=}") + post = { 'title': filepath.name } + + if filepath.suffix == '.md': + with open(filepath, encoding='utf-8') as file_pointer: + post = frontmatter.load(file_pointer).to_dict() + + # don't store file contents in memory + if 'content' in post.keys(): + del post['content'] + post['is_dir'] = False + + return post + + + def _get_output_filepath(self, input_filepath): + + def webpath(filepath): + return Path('/notes').joinpath(filepath.relative_to(self.output_dir)) + + + r = {} + r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir)) + r['web'] = webpath(r['raw']) + + if input_filepath.is_dir(): + return r + + if input_filepath.suffix == '.md': + r['html'] = self.output_dir.joinpath( + input_filepath.relative_to(self.input_dir) + ).with_suffix('.html') + r['web'] = webpath(r['html']) + + elif self.is_plaintext(input_filepath): + r['html'] = self.output_dir.joinpath( + input_filepath.relative_to(self.input_dir) + ).with_suffix(input_filepath.suffix + '.html') + r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir)) + r['web'] = webpath(r['html']) + + #print(f"{r=}") + + return r + + + def to_list(self): + """ + returns list of every file in map + """ + r = [] + for _, val in self._map.items(): + r.append({ + 'title': val.get('title', ''), + 'tags': val.get('tags', []), + 'path': str(val['dst_path']['web']), + 'is_dir': val['is_dir'] + }) + + return r + + + @staticmethod + def is_plaintext(filename): + """ + check if file is a plaintext format, such as html, css, etc, + return boolean + """ + return re.match(r'^text/', magic.from_file(str(filename), mime=True)) is not None diff --git a/fuse.js b/js/fuse.js similarity index 100% rename from fuse.js rename to js/fuse.js diff --git a/indexsearch.js b/js/indexsearch.js similarity index 100% rename from indexsearch.js rename to js/indexsearch.js diff --git a/permalink.js b/js/permalink.js similarity index 100% rename from permalink.js rename to js/permalink.js diff --git a/search.js b/js/search.js similarity index 91% rename from search.js rename to js/search.js index 618b7dc..236ff51 100644 --- a/search.js +++ b/js/search.js @@ -1,3 +1,13 @@ +/* + * search.js expects an array `data` containing objects with the following keys: + * headers: [string] + * path: string + * tags: [string] + * title: string + * uuid: string + */ + + const HEADERS = "headers" const PATH = "path" const TAGS = "tags" @@ -6,11 +16,11 @@ const TITLE = "title" const SEARCH_TIMEOUT_MS = 100 var SEARCH_TIMEOUT_ID = -1 -const fuse = new Fuse(data, { +const fuse = new Fuse(search_data, { keys: [ { name: HEADERS, - weight: 0.2 + weight: 1 }, { name: PATH, @@ -18,11 +28,11 @@ const fuse = new Fuse(data, { }, { name: TAGS, - weight: 0.1 + weight: 0.5 }, { name: TITLE, - weight: 4 + weight: 2 } ], includeMatches: true, diff --git a/toc_search.js b/js/toc_search.js similarity index 100% rename from toc_search.js rename to js/toc_search.js diff --git a/notes2web.py b/notes2web.py index a983fe4..e1784bd 100755 --- a/notes2web.py +++ b/notes2web.py @@ -1,426 +1,274 @@ #!/usr/bin/env python3 +""" +notes2web --- view your notes as a static html site +""" -from bs4 import BeautifulSoup as bs -import subprocess -import frontmatter -import magic -import sys -import pathlib -import pypandoc -import shutil +import argparse import os -import regex as re +from pathlib import Path +import shutil +import sys +import pprint + import json -import yaml +import frontmatter +import git +import jinja2 +import requests + +from fileproperties import FileMap -TEXT_ARTICLE_TEMPLATE_FOOT = None -TEXT_ARTICLE_TEMPLATE_HEAD = None -INDEX_TEMPLATE_FOOT = None -INDEX_TEMPLATE_HEAD = None -EXTRA_INDEX_CONTENT = None N2W_COMMIT = "" - -def is_plaintext(filename): - return re.match(r'^text/', magic.from_file(str(filename), mime=True)) is not None - -def get_files(folder): - markdown = [] - plaintext = [] - other = [] - - for root, folders, files in os.walk(folder): - for filename in files: - if '/.git' in root: - continue - name = os.path.join(root, filename) - if pathlib.Path(name).suffix == '.md': - markdown.append(name) - elif is_plaintext(name): - plaintext.append(name) - other.append(name) - else: - other.append(name) - - return markdown, plaintext, other - -def get_inherited_tags(file, base_folder): - tags = [] - folder = pathlib.Path(file) - - while folder != base_folder.parent: - print(f"get_inherited_tags {folder=}") - folder = pathlib.Path(folder).parent - folder_metadata = folder.joinpath('.n2w.yml') - if not folder_metadata.exists(): - continue - - with open(folder.joinpath('.n2w.yml')) as fp: - folder_properties = yaml.safe_load(fp) - - tags += folder_properties.get('itags') - - print(f"get_inherited_tags {tags=}") - return list(set(tags)) +PANDOC_SERVER_URL = os.getenv("PANDOC_SERVER_URL", r"http://localhost:3030/") +PANDOC_TIMEOUT = int(os.getenv("PANDOC_TIMEOUT", "120")) +CSS_DIR = Path(os.getenv("CSS_DIR", "/opt/notes2web/css")) +JS_DIR = Path(os.getenv("JS_DIR", "/opt/notes2web/js")) +TEMPLATES_DIR = Path(os.getenv("TEMPLATES_DIR", "/opt/notes2web/templates")) -def git_head_sha1(working_dir): - git_response = subprocess.run( - [ 'git', f"--git-dir={working_dir.joinpath('.git')}", 'rev-parse', '--short', 'HEAD' ], - stdout=subprocess.PIPE - ).stdout.decode('utf-8') - - return git_response.strip() - - -def git_filehistory(working_dir, filename): - print(f"{pathlib.Path(filename).relative_to(working_dir)=}") - git_response = subprocess.run( - [ - 'git', - f"--git-dir={working_dir.joinpath('.git')}", - "log", - "-p", - "--", - pathlib.Path(filename).relative_to(working_dir) - ], - stdout=subprocess.PIPE - ) - - filehistory = [f"File history not available: git log returned code {git_response.returncode}." - "\nIf this is not a git repository, this is not a problem."] - - if git_response.returncode == 0: - filehistory = git_response.stdout.decode('utf-8') - temp = re.split( - r'(commit [a-f0-9]{40})', - filehistory, - flags=re.IGNORECASE +JINJA_ENV = jinja2.Environment( + loader=jinja2.PackageLoader("notes2web", str(TEMPLATES_DIR)), + autoescape=jinja2.select_autoescape ) - for t in temp: - if t == '': - temp.remove(t) - filehistory = [] - for i in range(0, len(temp)-1, 2): - filehistory.append(f"{temp[i]}{temp[i+1]}") - - if filehistory == "": - filehistory = ["This file has no history (it may not be part of the git repository)."] - - filehistory = [ x.replace("<", "<").replace(">", ">") for x in filehistory] - - filehistory = "
\n" + "
\n".join(filehistory) + "
" - - return filehistory +JINJA_TEMPLATES = {} +JINJA_TEMPLATE_TEXTARTICLE = JINJA_ENV.get_template("textarticle.html") +JINJA_TEMPLATE_HOME_INDEX = JINJA_ENV.get_template("home_index.html") +JINJA_TEMPLATE_DIRECTORY_INDEX = JINJA_ENV.get_template("index.html") +JINJA_TEMPLATE_ARTICLE = JINJA_ENV.get_template("article.html") -def get_dirs_to_index(folder): - r = [] - - for root, folders, files in os.walk(folder): - if pathlib.Path(os.path.join(root, folder)).is_relative_to(folder.joinpath('permalink')): - continue - [r.append(os.path.join(root, folder)) for folder in folders] - - return r +LICENSE = None +GIT_REPO = None +FILEMAP = None -def update_required(src_filename, output_filename): - return not os.path.exists(output_filename) or os.path.getmtime(src_filename) > os.path.getmtime(output_filename) +def update_required(src_filepath, output_filepath): + """ + check if file requires an update, + return boolean + """ + return not output_filepath.exists() or src_filepath.stat().st_mtime > output_filepath.stat().st_mtimeme() + def get_args(): """ Get command line arguments """ - import argparse parser = argparse.ArgumentParser() - parser.add_argument('notes', type=pathlib.Path) - parser.add_argument('-o', '--output-dir', type=pathlib.Path, default='web') - parser.add_argument('-t', '--template', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/article.html')) - parser.add_argument('-H', '--template-text-head', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/textarticlehead.html')) - parser.add_argument('-f', '--template-text-foot', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/textarticlefoot.html')) - parser.add_argument('-i', '--template-index-head', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexhead.html')) - parser.add_argument('-I', '--template-index-foot', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexfoot.html')) - parser.add_argument('-s', '--stylesheet', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/styles.css')) - parser.add_argument('--home_index', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/home_index.html')) - parser.add_argument('--permalink_index', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/permalink_index.html')) - parser.add_argument('-e', '--extra-index-content', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/extra_index_content.html')) - parser.add_argument('-n', '--index-article-names', action='append', default=['index.md']) - parser.add_argument('-F', '--force', action="store_true", help="Generate new output html even if source file was modified before output html") - parser.add_argument('--fuse', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/fuse.js')) - parser.add_argument('--searchjs', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/search.js')) - parser.add_argument('--indexsearchjs', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/indexsearch.js')) - parser.add_argument('--permalinkjs', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/permalink.js')) - parser.add_argument('--tocsearchjs', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/toc_search.js')) - parser.add_argument('--toc-depth', type=int, default=6, dest='toc_depth') + parser.add_argument('notes', type=Path) + parser.add_argument('-o', '--output-dir', type=Path, default='web') + parser.add_argument('-F', '--force', action="store_true", + help="Generate new output html even if source file was modified before output html") return parser.parse_args() +def render_markdown_file(input_filepath): + """ + render markdown file to file + write markdown file to args.output_dir in html, + return list of tuple of output filepath, frontmatter post + """ + print(f"render_markdown_file({input_filepath})") + with open(input_filepath, encoding='utf-8') as file_pointer: + content = frontmatter.load(file_pointer).content + + properties = FILEMAP.get(input_filepath) + + # TODO pandoc no longer handles template due to metadata passing issues, use jinja to fill in the metadata + html = render_markdown(content) + + with open(properties['dst_path']['html'], 'w+', encoding='utf-8') as file_pointer: + file_pointer.write(html) + + +def render_plaintext_file(input_filepath): + """ + render plaintext file to file + copy plaintext file into a html preview, copy raw to output dir + return list of tuple of output filepath, empty dict + """ + + with open(input_filepath, encoding='utf-8') as file_pointer: + raw_content = file_pointer.read() + + properties = FILEMAP.get(input_filepath) + + html = JINJA_TEMPLATE_TEXTARTICLE.render(license = LICENSE, **properties) + + with open(properties['dst_path']['raw'], "w+", encoding='utf-8') as file_pointer: + file_pointer.write(raw_content) + + with open(properties['dst_path']['html'], "w+", encoding='utf-8') as file_pointer: + file_pointer.write(html) + + +def render_generic_file(input_filepath): + """ + render generic file to file + copy generic file into to output_dir + return list of tuple of output filepath, empty dict + """ + properties = FILEMAP.get(input_filepath) + output_filepath = properties['dst_path']['raw'] + shutil.copyfile(input_filepath, output_filepath) + + +def render_file(input_filepath): + """ + render any file by detecting type and applying appropriate type + write input_filepath to correct file in args.output_dir in appropriate formats, + return list of tuples of output filepath, frontmatter post + """ + + if input_filepath.suffix == '.md': + return render_markdown_file(input_filepath) + + if FileMap.is_plaintext(input_filepath): + return render_plaintext_file(input_filepath) + + return render_generic_file(input_filepath) + + +def render_markdown(content): + """ + render markdown to html + """ + + post_body = { + 'text': content, + 'toc-depth': 6, + 'highlight-style': 'pygments', + 'html-math-method': 'mathml', + 'to': 'html', + 'files': { + 'data/data/abbreviations': '', + }, + 'standalone': False, + } + + headers = { + 'Accept': 'application/json' + } + + response = requests.post( + PANDOC_SERVER_URL, + headers=headers, + json=post_body, + timeout=PANDOC_TIMEOUT + ) + + response = response.json() + + + # TODO look at response['messages'] and log them maybe? + # https://github.com/jgm/pandoc/blob/main/doc/pandoc-server.md#response + + return response['output'] + + + +def process_home_index(output_dir, search_data, notes_git_head_sha1=None): + """ + create home index.html in output_dir + """ + html = JINJA_TEMPLATE_HOME_INDEX.render( + n2w_commit = N2W_COMMIT, + search_data=search_data, + notes_git_head_sha1=notes_git_head_sha1, + ) + with open(output_dir.joinpath('index.html'), 'w+', encoding='utf-8') as file_pointer: + file_pointer.write(html) + + +def generate_variable_browser(output_dir, posts, variable_name) : + """ + generate a directory that lets you groub by and browse by any given tag. e.g. tags, authors + """ + groups = {} + + for key, post in posts.iter(): + group_val = post.get(variable_name, None) + if group_val is None: + continue + + if group_val not in groups.keys(): + groups[group_val] = [] + + groups[group_val].append(post) + + for group_val, index_entries in groups.iter(): + post = { + 'index_entries': index_entries, + 'title': group_val, + } + + # TODO finish writing function, write page to disk + + + def main(args): """ Entry point for script """ - with open(args.template_text_foot) as fp: - TEXT_ARTICLE_TEMPLATE_FOOT = fp.read() + global LICENSE + global GIT_REPO + global FILEMAP - with open(args.template_text_head) as fp: - TEXT_ARTICLE_TEMPLATE_HEAD = fp.read() - - with open(args.template_index_foot) as fp: - INDEX_TEMPLATE_FOOT = fp.read() - - with open(args.template_index_head) as fp: - INDEX_TEMPLATE_HEAD = fp.read() - - with open(args.extra_index_content) as fp: - EXTRA_INDEX_CONTENT = fp.read() + FILEMAP = FileMap(args.notes, args.output_dir.joinpath('notes')) if args.output_dir.is_file(): print(f"Output directory ({args.output_dir}) cannot be a file.") args.output_dir.mkdir(parents=True, exist_ok=True) - notes_license = "This note has no copyright license.", - print(f"{notes_license=}") + # attempt to get licensing information license_path = args.notes.joinpath("LICENSE") if license_path.exists(): - with open(license_path) as fp: - notes_license = fp.read() + with open(license_path, encoding='utf-8') as file_pointer: + LICENSE = file_pointer.read() - markdown_files, plaintext_files, other_files = get_files(args.notes) + # create git.Repo object if notes dir is a git repo + # TODO git commit log integration + if '.git' in args.notes.iterdir(): + GIT_REPO = git.Repo(args.notes) - all_entries=[] - dirs_with_index_article = [] - tag_dict = {} - permalink_to_filepath = {} - - print(f"{markdown_files=}") - for filename in markdown_files: - print(f"{filename=}") - - # calculate output filename - output_filename = args.output_dir.joinpath('notes').joinpath( - pathlib.Path(filename).relative_to(args.notes) - ).with_suffix('.html') - if os.path.basename(filename) in args.index_article_names: - output_filename = output_filename.parent.joinpath('index.html') - dirs_with_index_article.append(str(output_filename.parent)) - print(f"{output_filename=}") - - # extract tags from frontmatter, save to tag_dict - fm = frontmatter.load(filename) - if isinstance(fm.get('tags'), list): - for tag in list(set(fm.get('tags') + get_inherited_tags(filename, args.notes))): - t = { - 'path': str(pathlib.Path(output_filename).relative_to(args.output_dir)), - 'title': fm.get('title') or pathlib.Path(filename).name - } - if tag in tag_dict.keys(): - tag_dict[tag].append(t) - else: - tag_dict[tag] = [t] - - # find headers in markdown - with open(filename) as fp: - lines = fp.read().split('\n') - header_lines = [] - for line in lines: - if re.match('^#{1,6} \S', line): - header_lines.append(" ".join(line.split(" ")[1:])) - - all_entries.append({ - 'path': '/' + str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])), - 'title': fm.get('title') or pathlib.Path(filename).name, - 'tags': list(set(fm.get('tags'))), - 'headers': header_lines, - 'uuid': fm.get('uuid') - }) - - if 'uuid' in fm.keys(): - permalink_to_filepath[fm['uuid']] = all_entries[-1]['path'] - - # update file if required - if update_required(filename, output_filename) or args.force: - filehistory = git_filehistory(args.notes, filename) - with open(filename) as fp: - article = frontmatter.load(fp) - - article['tags'] += get_inherited_tags(filename, args.notes) - article['tags'] = sorted(list(set(article['tags']))) - article['filehistory'] = filehistory - article['licenseFull'] = notes_license - html = pypandoc.convert_text(frontmatter.dumps(article), 'html', format='md', extra_args=[ - f'--template={args.template}', - '--mathjax', - '--toc', f'--toc-depth={args.toc_depth}' - ]) - pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True) - - with open(output_filename, 'w+') as fp: - fp.write(html) - - print(f"{plaintext_files=}") - for filename in plaintext_files: - filehistory = git_filehistory(args.notes, filename) - title = os.path.basename(filename) - output_filename = str( - args.output_dir.joinpath('notes').joinpath( - pathlib.Path(filename).relative_to(args.notes) - ) - ) + '.html' - print(f"{output_filename=}") - - pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True) - html = re.sub(r'\$title\$', title, TEXT_ARTICLE_TEMPLATE_HEAD) - html = re.sub(r'\$h1title\$', title, html) - html = re.sub(r'\$raw\$', os.path.basename(filename), html) - html = re.sub(r'\$licenseFull\$', notes_license, html) - html = html.replace('$filehistory$', filehistory) - with open(filename) as fp: - html += fp.read().replace("<", "<").replace(">", ">") - html += TEXT_ARTICLE_TEMPLATE_FOOT - - with open(output_filename, 'w+') as fp: - fp.write(html) - all_entries.append({ - 'path': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])), - 'title': title, - 'tags': [get_inherited_tags(filename, args.notes)], - 'headers': [] - }) - - print(f"{other_files=}") - for filename in other_files: - output_filename = str( - args.output_dir.joinpath('notes').joinpath( - pathlib.Path(filename).relative_to(args.notes) - ) - ) - title = os.path.basename(filename) - pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True) - all_entries.append({ - 'path': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])), - 'title': title, - 'tags': [get_inherited_tags(filename, args.notes)], - 'headers': [] - }) - shutil.copyfile(filename, output_filename) - - tagdir = args.output_dir.joinpath('.tags') - tagdir.mkdir(parents=True, exist_ok=True) - - for tag in tag_dict.keys(): - html = re.sub(r'\$title\$', f'{tag}', INDEX_TEMPLATE_HEAD) - html = re.sub(r'\$h1title\$', f'tag: {tag}', html) - html = re.sub(r'\$extra_content\$', '', html) - - for entry in tag_dict[tag]: - entry['path'] = '/' + entry['path'] - html += f"
{entry['title']}
" - html += re.sub('\$data\$', json.dumps(tag_dict[tag]), INDEX_TEMPLATE_FOOT) - - with open(tagdir.joinpath(f'{tag}.html'), 'w+') as fp: - fp.write(html) - - dirs_to_index = [args.output_dir.name] + get_dirs_to_index(args.output_dir) - print(f"{dirs_to_index=}") - print(f"{dirs_with_index_article=}") - - for d in dirs_to_index: - print(f"{d in dirs_with_index_article=} {d=}") - if d in dirs_with_index_article: + for root_str, subdirectories, files in os.walk(args.notes): + root = Path(root_str) + if '.git' in root.parts: continue - directory = pathlib.Path(d) - paths = os.listdir(directory) - #print(f"{paths=}") + root_properties = FILEMAP.get(root) + root_properties['dst_path']['raw'].mkdir(parents=True, exist_ok=True) - indexentries = [] - - for p in paths: - path = pathlib.Path(p) - #print(f"{path=}") - if p in [ 'index.html', '.git' ]: - continue + pprint.pprint(root_properties) + print(JINJA_TEMPLATE_DIRECTORY_INDEX) + html = JINJA_TEMPLATE_DIRECTORY_INDEX.render(**root_properties) + with open(root_properties['dst_path']['raw'].joinpath('index.html'), 'w+', encoding='utf-8') as file_pointer: + file_pointer.write(html) - fullpath = directory.joinpath(path) - title = path.name - if path.suffix == '.html': - with open(fullpath) as fp: - soup = bs(fp.read(), 'html.parser') + # render each file + for file in files: + render_file(root.joinpath(file)) - try: - title = soup.find('title').get_text() or pathlib.Path(path).name - except AttributeError: - title = pathlib.Path(path).stem - elif fullpath.is_dir(): - title = path - elif is_plaintext(fullpath): - # don't add plaintext files to index, since they have a html wrapper - continue - if str(title).strip() == '': - title = path + process_home_index(args.output_dir, search_data=FILEMAP.to_list()) - indexentries.append({ - 'title': str(title), - 'path': './' + str(path), - 'isdirectory': fullpath.is_dir() - }) + # copy styling and js scripts necessary for function + shutil.copytree(CSS_DIR, args.output_dir.joinpath('css'), dirs_exist_ok=True) + shutil.copytree(JS_DIR, args.output_dir.joinpath('js'), dirs_exist_ok=True) - indexentries.sort(key=lambda entry: str(entry['title']).lower()) - indexentries.sort(key=lambda entry: entry['isdirectory'], reverse=True) - - html = re.sub(r'\$title\$', str(directory), INDEX_TEMPLATE_HEAD) - html = re.sub(r'\$h1title\$', str(directory), html) - html = re.sub(r'\$extra_content\$', - EXTRA_INDEX_CONTENT if directory == args.notes else '', - html - ) - for entry in indexentries: - html += ( - '
  • ' - f'

    ' - f'{entry["title"]}{"/" if entry["isdirectory"] else ""}' - '

    ' - '
  • ' - ) - html += re.sub(r'\$data\$', json.dumps(indexentries), INDEX_TEMPLATE_FOOT) - - with open(directory.joinpath('index.html'), 'w+') as fp: - fp.write(html) - - shutil.copyfile(args.stylesheet, args.output_dir.joinpath('styles.css')) - shutil.copyfile(args.fuse, args.output_dir.joinpath('fuse.js')) - shutil.copyfile(args.searchjs, args.output_dir.joinpath('search.js')) - shutil.copyfile(args.indexsearchjs, args.output_dir.joinpath('indexsearch.js')) - shutil.copyfile(args.tocsearchjs, args.output_dir.joinpath('toc_search.js')) - shutil.copyfile(args.permalinkjs, args.output_dir.joinpath('permalink.js')) - with open(args.output_dir.joinpath('index.html'), 'w+') as fp: - with open(args.home_index) as fp2: - html = re.sub(r'\$title\$', args.output_dir.parts[0], fp2.read()) - html = re.sub(r'\$h1title\$', args.output_dir.parts[0], html) - html = re.sub(r'\$n2w_commit\$', N2W_COMMIT, html) - html = re.sub(r'\$notes_git_head_sha1\$', git_head_sha1(args.notes), html) - - html = re.sub(r'\$data\$', json.dumps(all_entries), html) - - fp.write(html) - permalink_dir = args.output_dir.joinpath('permalink') - permalink_dir.mkdir(exist_ok=True) - with open(args.permalink_index) as fp: - html = re.sub(r'\$data\$', json.dumps(permalink_to_filepath), fp.read()) - with open(permalink_dir.joinpath('index.html'), 'w+') as fp: - fp.write(html) - print(tag_dict) return 0 +# TODO implement useful logging and debug printing +# TODO build tag/metadata pages + if __name__ == '__main__': try: sys.exit(main(get_args())) diff --git a/readme.md b/readme.md index f831100..2426864 100644 --- a/readme.md +++ b/readme.md @@ -25,27 +25,43 @@ doing it for me: 0. Install [Pandoc](https://pandoc.org/index.html) and [Pip](https://github.com/pypa/pip), python3-dev, and a C compiler 1. Run `make install` as root -## Things to Remember Whilst Writing Notes +## Other Things to Know -- notes2web reads the following YAML [frontmatter](https://jekyllrb.com/docs/front-matter/) variable: - - - `author` --- The person(s) who wrote the article - - `tags` --- A YAML list of tags which the article relates to - this is used for browsing and also - searching - - `title` --- The title of the article - - `uuid` --- A unique identifier used for permalinks. More below. - - `lecture_slides` --- a list of paths pointing to lecture slides used while taking notes - - `lecture_notes` --- a list of paths pointing to other notes used while taking notes - - notes2web indexes [ATX-style headings](https://pandoc.org/MANUAL.html#atx-style-headings) for searching - notes2web attempts to display file history through the `git log` command - notes2web looks for the plaintext file `LICENSE` in the root directory of your notes - This is optional but if you would like to add a license you can find one - [here](https://choosealicense.com). -### Permalinks +## Custom Directory Index + +To add custom content to a directory index, put it in a file called `index.md` under the directory. + +You can set the following frontmatter variables to customise the directory index of a directory: + +| variable | default value | description | +|------------------------|-------------------|--------------------------------------------------------------------------------------------| +| `tags` | `[]` | list of tags, used by search and inherited by any notes and subdirectories | +| `uuid` | none | unique id to reference directory, used for permalinking | +| `content_after_search` | `false` | show custom content in `index.md` after search bar and directory index | +| `automatic_index` | `true` | show the automatically generated directory index. required for search bar to function. | +| `search_bar` | `true` | show search bar to search directory items. requires `automatic_index` (enabled by default) | + + +## Notes Metadata + +notes2web reads the following YAML [frontmatter](https://jekyllrb.com/docs/front-matter/) variables for metadata: + +| variable | description | +|------------------|---------------------------------------------------------------------------------------| +| `author` | The person(s) who wrote the article | +| `tags` | A YAML list of tags which the article relates to - this is used for browsing and also | +| `title` | The title of the article | +| `uuid` | A unique identifier used for permalinks. | +| `lecture_slides` | a list of paths pointing to lecture slides used while taking notes | +| `lecture_notes` | a list of paths pointing to other notes used while taking notes | + +## Permalinks Permalinks are currently rather basic and requires JavaScript to be enabled on the local computer. In order to identify documents between file changes, a unique identifier is used to identify a file. @@ -57,21 +73,13 @@ The included `n2w_add_uuid.py` will add a UUID to a markdown file which does not already. Combine it with `find` to UUIDify all your markdown files (but make a backup first). -### Inherited Properties +## Custom Styling -Notes can inherit a some properties from their parent folder(s) by creating a `.n2w.yml` file in a -folder. +To completely replace the existing styling, set the environment variable `CSS_DIR` to another directory with +a file called `styles.css`. -#### Tags - -If you have a folder `uni` with all you university notes, you might want all the files in there to -be tagged `uni`: - -`NOTES_PATH/uni/.n2w.yaml`: - -```yaml -itags: [ university ] -``` +To add additional styling, the default styling will attempt to import `styles.css` from the root of the notes +directory. ## CLI Usage @@ -81,35 +89,7 @@ $ notes2web.py notes_directory Output of `notes2web.py --help`: -``` -usage: notes2web.py [-h] [-o OUTPUT_DIR] [-t TEMPLATE] [-H TEMPLATE_TEXT_HEAD] - [-f TEMPLATE_TEXT_FOOT] [-i TEMPLATE_INDEX_HEAD] - [-I TEMPLATE_INDEX_FOOT] [-s STYLESHEET] - [--home_index HOME_INDEX] [-e EXTRA_INDEX_CONTENT] - [-n INDEX_ARTICLE_NAMES] [-F] [--fuse FUSE] - [--searchjs SEARCHJS] - notes - -positional arguments: - notes - -optional arguments: - -h, --help show this help message and exit - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - -t TEMPLATE, --template TEMPLATE - -H TEMPLATE_TEXT_HEAD, --template-text-head TEMPLATE_TEXT_HEAD - -f TEMPLATE_TEXT_FOOT, --template-text-foot TEMPLATE_TEXT_FOOT - -i TEMPLATE_INDEX_HEAD, --template-index-head TEMPLATE_INDEX_HEAD - -I TEMPLATE_INDEX_FOOT, --template-index-foot TEMPLATE_INDEX_FOOT - -s STYLESHEET, --stylesheet STYLESHEET - --home_index HOME_INDEX - -e EXTRA_INDEX_CONTENT, --extra-index-content EXTRA_INDEX_CONTENT - -n INDEX_ARTICLE_NAMES, --index-article-names INDEX_ARTICLE_NAMES - -F, --force Generate new output html even if source file was - modified before output html - --fuse FUSE - --searchjs SEARCHJS -``` +TODO add cli output The command will generate a website in the `output-dir` directory (`./web` by default). It will then generate a list of all note files and put it in `index.html`. diff --git a/requirements.txt b/requirements.txt index e78423c..378db00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ python-magic==0.4.24 PyYAML==5.3.1 regex==2021.11.10 soupsieve==2.2.1 +PyYAML==6.0.1 diff --git a/templates/article.html b/templates/article.html index 0d55afe..dd49903 100644 --- a/templates/article.html +++ b/templates/article.html @@ -2,7 +2,7 @@ - + -$title$ +{{ title }}
    - - $if(lecture_slides)$ + + {% if lecture_slides %} - $endif$ - $if(lecture_notes)$ + {% endif %} + {% if lecture_notes %} - $endif$ + {% endif %} + + {% if uuid %} + + {% endif %} - -
    - - Commit log (file history) - - $filehistory$ -
    + {% if license %}
    License -
    $licenseFull$
    +
    {{ license }}
    - $body$ + {% endif %} + {% block body %} + {{ content }} + {% endblock %}
    - - diff --git a/templates/extra_index_content.html b/templates/extra_index_content.html deleted file mode 100644 index 26e211a..0000000 --- a/templates/extra_index_content.html +++ /dev/null @@ -1,4 +0,0 @@ -

    -These are my personal notes. Correctness is not guaranteed. -Browse by tag here. -

    diff --git a/templates/home_index.html b/templates/home_index.html index 9c50c33..46d75ce 100644 --- a/templates/home_index.html +++ b/templates/home_index.html @@ -2,15 +2,15 @@ - -$title$ + +{{ title }}
    -

    $h1title$

    +

    {{ h1title }}

    These are my personal notes. Correctness is not guaranteed. -Browse here or by tag here. +Browse here or by tag here.

    @@ -20,9 +20,9 @@ Browse here or by tag here.
    -

    page generated by notes2web (commit $n2w_commit$) notes commit $notes_git_head_sha1$

    +

    page generated by notes2web (commit {{ n2w_commit }}) {% if notes_git_head_sha1 %}notes commit {{ notes_git_head_sha1 }}{% endif %}

    - - - + + + diff --git a/templates/index.html b/templates/index.html index 157e6a6..59cc5ea 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,13 +2,43 @@ - -$title$ + +{{ title }} + -

    $h1title$

    -$body$ +

    {{ title }}

    + + {% if not content_after_search %} + {{ content }} + {% endif %} + + {% if automatic_index %} + {% if search_bar %} +
    + +
    +

    + Press (Shift+)Enter to open first result (in new tab) +

    + {% endif %} + + + {% endif %} + + {% if content_after_search %} + {{ content }} + {% endif %}

    page generated by notes2web

    + + + + diff --git a/templates/indexfoot.html b/templates/indexfoot.html deleted file mode 100644 index c76c924..0000000 --- a/templates/indexfoot.html +++ /dev/null @@ -1,7 +0,0 @@ - -

    page generated by notes2web

    -
    - - - - diff --git a/templates/indexhead.html b/templates/indexhead.html deleted file mode 100644 index 95275f1..0000000 --- a/templates/indexhead.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - - -$title$ - - - -
    -

    $title$

    - $extra_content$ -
    - -
    -

    - Press (Shift+)Enter to open first result (in new tab) -

    - -
    - + diff --git a/templates/textarticle.html b/templates/textarticle.html new file mode 100644 index 0000000..58bb3e4 --- /dev/null +++ b/templates/textarticle.html @@ -0,0 +1,9 @@ +{% extends "article.html" %} +{% block body %} +

    This file was not rendered by notes2web because it is a plaintext file, not a markdown +file. +You access the raw file here. +Below is an unformatted representation of the file: +

    +
    {{ raw_content }}
    +{% endblock %} diff --git a/templates/textarticlefoot.html b/templates/textarticlefoot.html deleted file mode 100644 index 8ef0abe..0000000 --- a/templates/textarticlefoot.html +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/templates/textarticlehead.html b/templates/textarticlehead.html deleted file mode 100644 index d11580b..0000000 --- a/templates/textarticlehead.html +++ /dev/null @@ -1,34 +0,0 @@ - - - - - -$title$ - - - -
    - -

    $title$

    -

    This file was not rendered by notes2web because it is a plaintext file, not a markdown - file. - You access the raw file here. - Below is an unformatted representation of the file: -

    -