32 Commits

Author SHA1 Message Date
014d72f39c Fix markdown with no title not displaying in search 2021-08-24 15:37:18 +01:00
0c5f367ceb Add p to tell user about enter to open 2021-08-24 15:36:42 +01:00
8871faaef0 Open link article in new tab if user presses shift+enter 2021-08-24 15:36:03 +01:00
7654745cf0 Allow use to press enter to open top search result 2021-08-24 14:53:25 +01:00
1c72f127d3 Fix syntax error 2021-08-24 14:39:30 +01:00
94a047e51b Replace os.path with pathlib 2021-08-24 13:52:47 +01:00
78def54645 escape html < and > chars 2021-08-22 21:47:54 +01:00
c8d70a3661 add extra callbacks 2021-08-21 10:34:14 +01:00
7bda685927 Add a max no. results input 2021-08-21 10:33:31 +01:00
1386679c49 Clean up code, limit search results to 15 2021-08-21 10:11:56 +01:00
d6c9a8adab add new switches to cli usage 2021-08-21 03:22:51 +01:00
1d9245fb8f Update readme.md 2021-08-21 03:21:04 +01:00
56688b26f5 Split file history into one pre tag per commit 2021-08-21 03:04:59 +01:00
7e85e2a1fa styling 2021-08-21 02:29:00 +01:00
65e36de48c Add file commit histories 2021-08-21 02:14:12 +01:00
b6978bad9e Allow for searching in headers 2021-08-20 14:31:34 +01:00
f9ffd01d7c update search bar styling 2021-08-19 15:34:20 +01:00
c51b8a302a Add tags, path to search results 2021-08-19 15:31:28 +01:00
3f50d9da28 explicitly use pip3 2021-08-19 14:44:41 +01:00
51ba98f045 searching! 2021-08-19 14:43:42 +01:00
1e0135a8e5 move notes into notes subdirectory for cleaner look 2021-08-19 13:41:19 +01:00
d4de923f1e fix article template for some versions of pandoc 2021-08-15 20:50:57 +01:00
d77077f13d tags! 2021-08-15 20:40:13 +01:00
08b0037ee6 Only generate html for updated files 2021-08-15 19:36:06 +01:00
388c75b351 Add overflow-x scrolling to pre tag 2021-08-04 21:12:57 +01:00
17fae8c60e Don't show .git directories in indexes. 2021-07-29 14:19:32 +01:00
89e5a1061a Don't add plaintext files to indexes 2021-07-29 14:06:03 +01:00
e7db61e551 update requirements 2021-07-29 13:25:23 +01:00
42d0775fe6 update readme 2021-06-29 20:33:16 +01:00
700709f171 add ability to manually set/override index.html contents with a markdown file 2021-06-29 20:23:20 +01:00
ca3baf0768 remove yq from readme.md 2021-06-29 19:28:21 +01:00
70d92e6d04 add why? section to readme.md 2021-06-29 19:27:31 +01:00
13 changed files with 2681 additions and 46 deletions

View File

@@ -1,9 +1,11 @@
install:
cp notes2web.py /usr/local/bin
pip install -r requirements.txt
pip3 install -r requirements.txt
mkdir -p /opt/notes2web
cp -r templates /opt/notes2web
cp styles.css /opt/notes2web
cp fuse.js /opt/notes2web
cp search.js /opt/notes2web
uninstall:
rm -rf /usr/local/bin/notes2web.py /opt/notes2web

2255
fuse.js Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,8 @@
from bs4 import BeautifulSoup as bs
import subprocess
import frontmatter
import magic
import sys
import pathlib
@@ -9,6 +11,8 @@ import pypandoc
import shutil
import os
import re
import json
TEXT_ARTICLE_TEMPLATE_FOOT = None
TEXT_ARTICLE_TEMPLATE_HEAD = None
@@ -16,6 +20,7 @@ INDEX_TEMPLATE_FOOT = None
INDEX_TEMPLATE_HEAD = None
EXTRA_INDEX_CONTENT = None
def get_files(folder):
markdown = []
plaintext = []
@@ -23,8 +28,10 @@ def get_files(folder):
for root, folders, files in os.walk(folder):
for filename in files:
if '/.git' in root:
continue
name = os.path.join(root, filename)
if os.path.splitext(name)[1] == '.md':
if pathlib.Path(name).suffix == '.md':
markdown.append(name)
elif re.match(r'^text/', magic.from_file(name, mime=True)):
plaintext.append(name)
@@ -34,6 +41,46 @@ def get_files(folder):
return markdown, plaintext, other
def git_filehistory(working_dir, filename):
print(f"{pathlib.Path(filename).relative_to(working_dir)=}")
git_response = subprocess.run(
[
'git',
f"--git-dir={working_dir.joinpath('.git')}",
"log",
"-p",
"--",
pathlib.Path(filename).relative_to(working_dir)
],
stdout=subprocess.PIPE
)
filehistory = [f"File history not available: git log returned code {git_response.returncode}."
"\nIf this is not a git repository, this is not a problem."]
if git_response.returncode == 0:
filehistory = git_response.stdout.decode('utf-8')
temp = re.split(
r'(commit [a-f0-9]{40})',
filehistory,
flags=re.IGNORECASE
)
for t in temp:
if t == '':
temp.remove(t)
filehistory = []
for i in range(0, len(temp)-1, 2):
filehistory.append(f"{temp[i]}{temp[i+1]}")
if filehistory == "":
filehistory = ["This file has no history (it may not be part of the git repository)."]
filehistory = "<pre>\n" + "</pre><pre>\n".join(filehistory) + "</pre>"
return filehistory
def get_dirs(folder):
r = []
@@ -43,6 +90,10 @@ def get_dirs(folder):
return r
def update_required(src_filename, output_filename):
return not os.path.exists(output_filename) or os.path.getmtime(src_filename) > os.path.getmtime(output_filename)
def get_args():
""" Get command line arguments """
@@ -56,7 +107,12 @@ def get_args():
parser.add_argument('-i', '--template-index-head', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexhead.html'))
parser.add_argument('-I', '--template-index-foot', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexfoot.html'))
parser.add_argument('-s', '--stylesheet', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/styles.css'))
parser.add_argument('--home_index', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/home_index.html'))
parser.add_argument('-e', '--extra-index-content', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/extra_index_content.html'))
parser.add_argument('-n', '--index-article-names', action='append', default=['index.md'])
parser.add_argument('-F', '--force', action="store_true", help="Generate new output html even if source file was modified before output html")
parser.add_argument('--fuse', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/fuse.js'))
parser.add_argument('--searchjs', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/search.js'))
return parser.parse_args()
@@ -78,87 +134,181 @@ def main(args):
with open(args.extra_index_content) as fp:
EXTRA_INDEX_CONTENT = fp.read()
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir, exist_ok=True)
if os.path.isfile(args.output_dir):
print("Output directory ({output_dir}) cannot be a file.")
if args.output_dir.is_file():
print(f"Output directory ({args.output_dir}) cannot be a file.")
args.output_dir.mkdir(parents=True, exist_ok=True)
markdown_files, plaintext_files, other_files = get_files(args.notes)
all_entries=[]
dirs_with_index_article = []
tag_dict = {}
print(f"{markdown_files=}")
for filename in markdown_files:
print(f"{filename=}")
html = pypandoc.convert_file(filename, 'html', extra_args=[f'--template={args.template}'])
output_filename = os.path.splitext(re.sub(f"^{args.notes.name}", args.output_dir.name, filename))[0] + '.html'
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
with open(output_filename, 'w+') as fp:
fp.write(html)
# calculate output filename
output_filename = args.output_dir.joinpath('notes').joinpath(
pathlib.Path(filename).relative_to(args.notes)
).with_suffix('.html')
if os.path.basename(filename) in args.index_article_names:
output_filename = output_filename.parent.joinpath('index.html')
dirs_with_index_article.append(str(output_filename.parent))
print(f"{output_filename=}")
# extract tags from frontmatter, save to tag_dict
fm = frontmatter.load(filename)
if isinstance(fm.get('tags'), list):
for tag in fm.get('tags'):
t = {
'path': str(pathlib.Path(output_filename).relative_to(args.output_dir)),
'title': fm.get('title') or pathlib.Path(filename).name
}
if tag in tag_dict.keys():
tag_dict[tag].append(t)
else:
tag_dict[tag] = [t]
# find headers in markdown
with open(filename) as fp:
lines = fp.read().split('\n')
header_lines = []
for line in lines:
if re.match('^#{1,6} \S', line):
header_lines.append(" ".join(line.split(" ")[1:]))
all_entries.append({
'path': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])),
'title': fm.get('title') or pathlib.Path(filename).name,
'tags': fm.get('tags'),
'headers': header_lines
})
# update file if required
if update_required(filename, output_filename) or args.force:
filehistory = git_filehistory(args.notes, filename)
html = pypandoc.convert_file(filename, 'html', extra_args=[f'--template={args.template}', '-V', f'filehistory={filehistory}'])
pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True)
with open(output_filename, 'w+') as fp:
fp.write(html)
print(f"{plaintext_files=}")
for filename in plaintext_files:
output_filename = re.sub(f"^{args.notes.name}", args.output_dir.name, filename) + '.html'
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
title = os.path.basename(output_filename)
filehistory = git_filehistory(args.notes, filename)
title = os.path.basename(filename)
output_filename = str(
args.output_dir.joinpath('notes').joinpath(
pathlib.Path(filename).relative_to(args.notes)
)
) + '.html'
print(f"{output_filename=}")
pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True)
html = re.sub(r'\$title\$', title, TEXT_ARTICLE_TEMPLATE_HEAD)
html = re.sub(r'\$h1title\$', title, html)
html = re.sub(r'\$raw\$', os.path.basename(filename), html)
html = html.replace('$filehistory$', filehistory)
with open(filename) as fp:
html += fp.read()
html += fp.read().replace("<", "&lt;").replace(">", "&gt;")
html += TEXT_ARTICLE_TEMPLATE_FOOT
with open(output_filename, 'w+') as fp:
fp.write(html)
all_entries.append({
'path': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])),
'title': title,
'tags': [],
'headers': []
})
print(f"{other_files=}")
for filename in other_files:
output_filename = re.sub(f"^{args.notes.name}", args.output_dir.name, filename)
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
output_filename = str(
args.output_dir.joinpath('notes').joinpath(
pathlib.Path(filename).relative_to(args.notes)
)
)
pathlib.Path(output_filename).parent.mkdir(parents=True, exist_ok=True)
all_entries.append({
'path': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])),
'title': str(pathlib.Path(*pathlib.Path(output_filename).parts[1:])),
'tags': [],
'headers': []
})
shutil.copyfile(filename, output_filename)
tagdir = args.output_dir.joinpath('.tags')
tagdir.mkdir(parents=True, exist_ok=True)
for tag in tag_dict.keys():
html = re.sub(r'\$title\$', f'{tag}', INDEX_TEMPLATE_HEAD)
html = re.sub(r'\$h1title\$', f'tag: {tag}', html)
html = re.sub(r'\$extra_content\$', '', html)
for entry in tag_dict[tag]:
html += f"<div class=\"article\"><a href=\"/{entry['path']}\">{entry['title']}</a></div>"
html += INDEX_TEMPLATE_FOOT
with open(tagdir.joinpath(f'{tag}.html'), 'w+') as fp:
fp.write(html)
dirs_to_index = [args.output_dir.name] + get_dirs(args.output_dir)
print(f"{dirs_to_index=}")
print(f"{os.path.commonpath(dirs_to_index)=}")
print(f"{dirs_with_index_article=}")
for directory in dirs_to_index:
for d in dirs_to_index:
print(f"{d in dirs_with_index_article=} {d=}")
if d in dirs_with_index_article:
continue
directory = pathlib.Path(d)
paths = os.listdir(directory)
print(f"{paths=}")
#print(f"{paths=}")
indexentries = []
for path in paths:
if path == 'index.html':
for p in paths:
path = pathlib.Path(p)
#print(f"{path=}")
if p in [ 'index.html', '.git' ]:
continue
fullpath = os.path.join(directory, path)
if os.path.splitext(path)[1] == '.html':
fullpath = directory.joinpath(path)
if path.suffix == '.html':
with open(fullpath) as fp:
soup = bs(fp.read(), 'html.parser')
try:
title = soup.find('title').get_text()
title = soup.find('title').get_text() or pathlib.Path(path).name
except AttributeError:
title = path
else:
title = pathlib.Path(path).stem
elif fullpath.is_dir():
title = path
else:
# don't add plaintext files to index, since they have a html wrapper
continue
if title.strip() == '':
if str(title).strip() == '':
title = path
indexentries.append({
'title': title,
'path': path,
'isdirectory': os.path.isdir(fullpath)
'title': str(title),
'path': str(path),
'isdirectory': fullpath.is_dir()
})
indexentries.sort(key=lambda entry: entry['title'])
indexentries.sort(key=lambda entry: str(entry['title']))
indexentries.sort(key=lambda entry: entry['isdirectory'], reverse=True)
html = re.sub(r'\$title\$', directory, INDEX_TEMPLATE_HEAD)
html = re.sub(r'\$title\$', str(directory), INDEX_TEMPLATE_HEAD)
html = re.sub(r'\$h1title\$', str(directory), html)
html = re.sub(r'\$extra_content\$',
EXTRA_INDEX_CONTENT if directory == os.path.commonpath(dirs_to_index) else '',
EXTRA_INDEX_CONTENT if directory == args.notes else '',
html
)
@@ -166,10 +316,21 @@ def main(args):
html += f"<div class=\"article\"><a href=\"{entry['path']}\">{entry['title']}{'/' if entry['isdirectory'] else ''}</a></div>"
html += INDEX_TEMPLATE_FOOT
with open(os.path.join(directory, 'index.html'), 'w+') as fp:
with open(directory.joinpath('index.html'), 'w+') as fp:
fp.write(html)
shutil.copyfile(args.stylesheet, os.path.join(args.output_dir.name, 'styles.css'))
shutil.copyfile(args.stylesheet, args.output_dir.joinpath('styles.css'))
shutil.copyfile(args.fuse, args.output_dir.joinpath('fuse.js'))
shutil.copyfile(args.searchjs, args.output_dir.joinpath('search.js'))
with open(args.output_dir.joinpath('index.html'), 'w+') as fp:
with open(args.home_index) as fp2:
html = re.sub(r'\$title\$', args.output_dir.parts[0], fp2.read())
html = re.sub(r'\$h1title\$', args.output_dir.parts[0], html)
html = re.sub(r'\$data\$', json.dumps(all_entries), html)
fp.write(html)
print(tag_dict)
return 0

View File

@@ -4,21 +4,45 @@ View your notes as a static html site.
![](./screenshot.png)
## Why?
I want to be able to view my notes in a more convenient way.
I was already writing them in Pandoc markdown and could view them as PDFs but that wasn't quite
doing it for me:
- It was inconvenient to flick through multiple files of notes to find the right PDF
- It was annoying to sync to my phone
- PDFs do not scale so they were hard to read on smaller screens
- Probably more reasons I can't think of right now
## Install
0. Install [Pandoc](https://pandoc.org/index.html) and [yq](https://github.com/mikefarah/yq)
0. Install [Pandoc](https://pandoc.org/index.html) and [Pip](https://github.com/pypa/pip)
On arch:
```
# pacman -S pandoc yq
# pacman -S pandoc python-pip
```
1. Run `make install` as root
## Usage
## Things to Remember Whilst Writing Notes
- notes2web reads the following YAML [frontmatter](https://jekyllrb.com/docs/front-matter/) variable:
- `author` --- The person(s) who wrote the article
- `tags` --- A YAML list of tags which the article relates to - this is used for browsing and also searching
- `title` --- The title of the article
- notes2web indexes [ATX-style headings](https://pandoc.org/MANUAL.html#atx-style-headings) for searching
- notes2web attempts to display file history through the `git log` command
## CLI Usage
```
$ notes2web.py NOTES_DIRECTORY_1
$ notes2web.py notes_directory
```
Output of `notes2web.py --help`:
@@ -27,7 +51,9 @@ Output of `notes2web.py --help`:
usage: notes2web.py [-h] [-o OUTPUT_DIR] [-t TEMPLATE] [-H TEMPLATE_TEXT_HEAD]
[-f TEMPLATE_TEXT_FOOT] [-i TEMPLATE_INDEX_HEAD]
[-I TEMPLATE_INDEX_FOOT] [-s STYLESHEET]
[-e EXTRA_INDEX_CONTENT]
[--home_index HOME_INDEX] [-e EXTRA_INDEX_CONTENT]
[-n INDEX_ARTICLE_NAMES] [-F] [--fuse FUSE]
[--searchjs SEARCHJS]
notes
positional arguments:
@@ -42,7 +68,13 @@ optional arguments:
-i TEMPLATE_INDEX_HEAD, --template-index-head TEMPLATE_INDEX_HEAD
-I TEMPLATE_INDEX_FOOT, --template-index-foot TEMPLATE_INDEX_FOOT
-s STYLESHEET, --stylesheet STYLESHEET
--home_index HOME_INDEX
-e EXTRA_INDEX_CONTENT, --extra-index-content EXTRA_INDEX_CONTENT
-n INDEX_ARTICLE_NAMES, --index-article-names INDEX_ARTICLE_NAMES
-F, --force Generate new output html even if source file was
modified before output html
--fuse FUSE
--searchjs SEARCHJS
```
The command will generate a website in the `output-dir` directory (`./web` by default).

View File

@@ -1,3 +1,6 @@
beautifulsoup4==4.9.3
pypandoc==1.5
python-frontmatter==1.0.0
python-magic==0.4.24
PyYAML==5.4.1
soupsieve==2.2.1

86
search.js Normal file
View File

@@ -0,0 +1,86 @@
const HEADERS = "headers"
const PATH = "path"
const TAGS = "tags"
const TITLE = "title"
const fuse = new Fuse(data, {
keys: [ HEADERS, PATH, TAGS, TITLE ],
includeMatches: true
})
const searchBar = document.getElementById('search')
const resultsMax = document.getElementById('resultsMax')
const resultsDiv = document.getElementById('results')
var results = []
function updateResults() {
resultsDiv.innerHTML = ''
results = fuse.search(searchBar.value).slice(0, parseInt(resultsMax.value))
results.forEach(r => {
wrapper = document.createElement('div')
wrapper.className = "article"
display_matches = {}
display_matches[HEADERS] = []
display_matches[PATH] = []
display_matches[TAGS] = []
display_matches[TITLE] = []
r.matches.every(match => {
if (display_matches[match.key].length > 3) {
display_matches[match.key].push('...')
return false
}
display_match = match.value
if (match.indices.length >= 1) {
match.indices.sort((a, b) => (b[1]-b[0])-(a[1]-a[0]))
indexPair = match.indices[0]
matching_slice = match.value.slice(indexPair[0], indexPair[1]+1)
display_match = match.value.replace(
matching_slice,
'<span class="matchHighlight">' + matching_slice + '</span>'
)
}
display_matches[match.key].push(display_match)
return true
})
content = document.createElement('a')
content.innerHTML = r.item.title
content.href = r.item.path
wrapper.appendChild(content)
Object.keys(display_matches).forEach(key => {
if (display_matches[key].length < 1) return
p = document.createElement('p')
p.className = "smallText"
p.innerHTML += key + ": [" + display_matches[key].join(', ') + ']'
wrapper.appendChild(p)
})
resultsDiv.appendChild(wrapper)
})
}
searchBar.addEventListener('keyup', e => {
console.log(e)
// if user pressed enter
if (e.keyCode === 13) {
if (e.shiftKey) {
window.open(results[0].item.path, '_blank')
} else {
window.location.href = results[0].item.path
}
return
}
updateResults()
})
searchBar.addEventListener('change', updateResults)
resultsMax.addEventListener('keyup', updateResults)
resultsMax.addEventListener('change', updateResults)
updateResults()

View File

@@ -35,6 +35,7 @@ pre {
background-color: #d9d9d9 ;
color: #000;
padding: 1em;
overflow-x: scroll;
}
details {
@@ -71,3 +72,46 @@ blockquote {
blockquote * {
margin: 0;
}
#searchWrapper > input {
padding: 1em;
margin: 1em 0.5em 1em 0.5em;
font-size: 1em;
min-width: 0;
}
#searchWrapper {
display: flex
}
#search { flex-grow: 9 }
#results {
overflow-x: scroll;
}
.smallText {
font-size: 0.7em;
}
.article .smallText {
margin: 0
}
.matchHighlight {
background-color: #86c1b9;
}
#header > * {
margin: 0;
padding: 0
}
kbd {
background-color: #d9d9d9;
border-radius: 0.25em;
padding: 0.2em;
box-shadow: 0.15em 0.15em 0 #c9c9c9;
margin-left: 0.2em;
margin-right: 0.2em;
}

View File

@@ -9,6 +9,19 @@
</head>
<body>
<div id="header">
<p style="font-size: 0.7em"> tags:
$for(tags)$
<a href="/.tags/$tags$.html">$tags$</a>$sep$,
$endfor$
</p>
<p class="smallText" id="footer"> written by $author$, generated by <a href="https://git.alv.cx/alvierahman90/notes2web">notes2web</a></p>
<details id="commitLog">
<summary class="smallText">
Commit log (file history)
</summary>
$filehistory$
</details>
<div>
$body$
<p style="font-size: 0.7em;"> page generated by <a href="https://github.com/alvierahman90/notes2web">notes2web</a></p>
</body>

View File

@@ -1 +1,4 @@
<p>These are my personal notes. Correctness is not guaranteed.</p>
<p>
These are my personal notes. Correctness is not guaranteed.
Browse by tag <a href="/.tags">here</a>.
</p>

28
templates/home_index.html Normal file
View File

@@ -0,0 +1,28 @@
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta charset="utf-8">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Open+Sans&display=swap" />
<link rel="stylesheet" type="text/css" href="/styles.css" />
<title>$title$</title>
</head>
<body>
<h1>$h1title$</h1>
<p>
These are my personal notes. Correctness is not guaranteed.
Browse <a href="/notes">here</a> or by tag <a href="/.tags">here</a>.
</p>
<div id="searchWrapper">
<input placeholder="Search" id="search">
<input type="number" id="resultsMax" min="0" value="5">
</div>
<p class="smallText" style="margin-top: 0; text-align: center;"> Press <kbd>Enter</kbd> to open first result or <kbd>Shift</kbd>+<kbd>Enter</kbd> to open in new tab</p>
<div id="results">
</div>
<p class="smallText"> page generated by <a href="https://github.com/alvierahman90/notes2web">notes2web</a></p>
<script src="/fuse.js"> </script>
<script> const data = $data$ </script>
<script src="/search.js"> </script>
</body>

View File

@@ -6,7 +6,7 @@
<title>$title$</title>
</head>
<body>
<h1>$title$</h1>
<h1>$h1title$</h1>
$body$
<p style="font-size: 0.7em;"> page generated by <a href="https://github.com/alvierahman90/notes2web">notes2web</a></p>

View File

@@ -1,3 +1,2 @@
</pre>
<p style="font-size: 0.7em;"> page generated by <a href="https://github.com/alvierahman90/notes2web">notes2web</a></p>
</body>

View File

@@ -7,6 +7,15 @@
</head>
<body>
<div id="header">
<p class="smallText" id="footer"> page generated by <a href="https://git.alv.cx/alvierahman90/notes2web">notes2web</a></p>
<details>
<summary class="smallText">
Commit log (file history)
</summary>
$filehistory$
</details>
</div>
<h1>$title$</h1>
<p> This file was not rendered by notes2web because it is a plaintext file, not a markdown
file.