convert script to python

This commit is contained in:
Akbar Rahman 2021-06-29 14:35:21 +01:00
parent 08ed9d67ca
commit 7c5b082ee3
8 changed files with 192 additions and 140 deletions

View File

@ -1,8 +1,9 @@
install: install:
cp notes2web /usr/local/bin cp notes2web.py /usr/local/bin
pip install -r requirement.txt
mkdir -p /opt/notes2web mkdir -p /opt/notes2web
cp -r templates /opt/notes2web cp -r templates /opt/notes2web
cp styles.css /opt/notes2web cp styles.css /opt/notes2web
uninstall: uninstall:
rm -rf /usr/local/bin/notes2web/opt/notes2web rm -rf /usr/local/bin/notes2web.py /opt/notes2web

137
notes2web
View File

@ -1,137 +0,0 @@
#!/usr/bin/env bash
# set default config values, load user config, export config variables
name=""
output="web"
article_template="/opt/notes2web/templates/article.html"
textarticlehead_template="/opt/notes2web/templates/textarticlehead.html"
textarticlefoot_template="/opt/notes2web/templates/textarticlefoot.html"
listitem_template="/opt/notes2web/templates/listitem.html"
index_template="/opt/notes2web/templates/index.html"
stylesheet="/opt/notes2web/styles.css"
for configpath in "$HOME/.notes2web.conf" "$HOME/.config/notes2web/config" ".notes2web.conf"
do
[[ -f "$configpath" ]] && source "$configpath"
done
export name
export output
export article_template
export textarticlehead_template
export textarticlefoot_template
export listitem_template
export index_template
export stylesheet
[[ "$1" == "--help" ]] && echo "USAGE: $0 [NOTES_DIRECTORY_1 [NOTES_DIRECTORY_2 [...]]]" && exit 0
[[ "$1" == "--clean" ]] && {
find -name ".2web" -exec rm {} \;
rm -rf "$output"
exit 0
}
mkdir -p "$output"
function _renderarticle {
echo "rendering $1"
mkdir -p "$(dirname "$output/${1}.html")"
pandoc\
--toc\
--standalone\
-t html\
--template "$article_template"\
-o "$output/${1}.html"\
"$1"\
--mathjax
}
function _rendertextarticle {
[[ "$(file -b "$1")" == "ASCII text" ]] || exit
echo "rendering text file $1"
mkdir -p "$(dirname "$output/${1}.html")"
sed -e "s#\\\$title\\\$#$1#" "$textarticlehead_template"\
> "$output/${1}.html"
cat "$1" >> "$output/${1}.html"
cat "$textarticlefoot_template" >> "$output/${1}.html"
}
function _adddirtoindex {
dir="$(dirname "$1")"
echo "<h2>$(basename "$dir") notes</h2>" >> $output/index.md
find "$dir" -name '*.md' -exec bash -c "_addarticletoindex '{}'" \;
find "$dir" -not -path '**/.git/**' -not -name '*.md' -type f -exec bash -c "_addtextarticletoindex '{}'" \;
}
function _addtextarticletoindex {
[[ "$(file -b "$1")" == "ASCII text" ]] || exit
pandoc\
-t html\
-V "filepath=${1}.html"\
-V "title=$1"\
--template "$listitem_template"\
"$1"\
>> $output/index.md
}
function _addarticletoindex {
echo "adding $1 to list of notes"
pandoc\
-t html\
-V "filepath=${1}.html"\
--template "$listitem_template"\
"$1"\
>> $output/index.md
}
export -f _renderarticle
export -f _rendertextarticle
export -f _adddirtoindex
export -f _addarticletoindex
export -f _addtextarticletoindex
#render each markdown file in every folder passed in args
for dir in "$@"
do
find "$dir" -name '*.md' -exec bash -c "_renderarticle '{}'" \;
find "$dir" -not -path '**/.git/**' -not -name '*.md' -type f -exec bash -c "_rendertextarticle '{}'" \;
done
# create an intermediate markdown file of links to each article
echo "---" > $output/index.md
[[ -z "$name" ]] && echo "title: notes" >> $output/index.md || echo "title: ${name}'s notes" >> $output/index.md
echo "---" >> $output/index.md
# mark folders to be included in notes2web's index
for file in "$@"
do
[[ ! -f "$file" ]] && echo "the presence of this files tells notes2web that it should be added to the notes2web index" > "$file/.2web"
done
# add articles to index and render
find -name '.2web' -exec bash -c "_adddirtoindex '{}'" \;
echo "copying styles.css to current directory"
cp "$stylesheet" "$output/styles.css"
echo "rendering index.md"
pandoc\
-t html\
--template "$index_template"\
-o "$output/index.html"\
"$output/index.md"

169
notes2web.py Executable file
View File

@ -0,0 +1,169 @@
#!/usr/bin/env python3
from bs4 import BeautifulSoup as bs
import magic
import sys
import pathlib
import pypandoc
import shutil
import os
import re
TEXT_ARTICLE_TEMPLATE_FOOT = None
TEXT_ARTICLE_TEMPLATE_HEAD = None
INDEX_TEMPLATE_FOOT = None
INDEX_TEMPLATE_HEAD = None
def get_files(folder):
markdown = []
plaintext = []
other = []
for root, folders, files in os.walk(folder):
for filename in files:
name = os.path.join(root, filename)
if os.path.splitext(name)[1] == '.md':
markdown.append(name)
elif re.match(r'^text/', magic.from_file(name, mime=True)):
plaintext.append(name)
other.append(name)
else:
other.append(name)
return markdown, plaintext, other
def get_dirs(folder):
r = []
for root, folders, files in os.walk(folder):
[r.append(os.path.join(root, folder)) for folder in folders]
return r
def get_args():
""" Get command line arguments """
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('notes', type=pathlib.Path)
parser.add_argument('-o', '--output-dir', type=pathlib.Path, default='web')
parser.add_argument('-t', '--template', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/article.html'))
parser.add_argument('-H', '--template-text-head', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/textarticlehead.html'))
parser.add_argument('-f', '--template-text-foot', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/textarticlefoot.html'))
parser.add_argument('-i', '--template-index-head', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexhead.html'))
parser.add_argument('-I', '--template-index-foot', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/templates/indexfoot.html'))
parser.add_argument('-s', '--stylesheet', type=pathlib.Path, default=pathlib.Path('/opt/notes2web/styles.css'))
return parser.parse_args()
def main(args):
""" Entry point for script """
with open(args.template_text_foot) as fp:
TEXT_ARTICLE_TEMPLATE_FOOT = fp.read()
with open(args.template_text_head) as fp:
TEXT_ARTICLE_TEMPLATE_HEAD = fp.read()
with open(args.template_index_foot) as fp:
INDEX_TEMPLATE_FOOT = fp.read()
with open(args.template_index_head) as fp:
INDEX_TEMPLATE_HEAD = fp.read()
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir, exist_ok=True)
if os.path.isfile(args.output_dir):
print("Output directory ({output_dir}) cannot be a file.")
markdown_files, plaintext_files, other_files = get_files(args.notes)
print(f"{markdown_files=}")
for filename in markdown_files:
html = pypandoc.convert_file(filename, 'html', extra_args=[f'--template={args.template}'])
output_filename = os.path.splitext(re.sub(f"^{args.notes.name}", args.output_dir.name, filename))[0] + '.html'
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
with open(output_filename, 'w+') as fp:
fp.write(html)
print(f"{plaintext_files=}")
for filename in plaintext_files:
output_filename = re.sub(f"^{args.notes.name}", args.output_dir.name, filename) + '.html'
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
title = os.path.basename(output_filename)
html = re.sub(r'\$title\$', title, TEXT_ARTICLE_TEMPLATE_HEAD)
html = re.sub(r'\$raw\$', os.path.basename(filename), html)
with open(filename) as fp:
html += fp.read()
html += TEXT_ARTICLE_TEMPLATE_FOOT
with open(output_filename, 'w+') as fp:
fp.write(html)
print(f"{other_files=}")
for filename in other_files:
output_filename = re.sub(f"^{args.notes.name}", args.output_dir.name, filename)
os.makedirs(os.path.dirname(output_filename), exist_ok=True)
shutil.copyfile(filename, output_filename)
dirs_to_index = [args.output_dir.name] + get_dirs(args.output_dir)
print(f"{dirs_to_index=}")
for directory in dirs_to_index:
paths = os.listdir(directory)
print(f"{paths=}")
indexentries = []
for path in paths:
if path == 'index.html':
continue
fullpath = os.path.join(directory, path)
print(fullpath)
if os.path.splitext(path)[1] == '.html':
with open(fullpath) as fp:
soup = bs(fp.read(), 'html.parser')
try:
title = soup.find('title').get_text()
except AttributeError:
title = path
else:
title = path
if title.strip() == '':
title = path
indexentries.append({
'title': title,
'path': path,
'isdirectory': os.path.isdir(fullpath)
})
indexentries.sort(key=lambda entry: entry['title'])
indexentries.sort(key=lambda entry: entry['isdirectory'], reverse=True)
html = re.sub(r'\$title\$', directory, INDEX_TEMPLATE_HEAD)
for entry in indexentries:
html += f"<div class=\"article\"><a href=\"{entry['path']}\">{entry['title']}{'/' if entry['isdirectory'] else ''}</a></div>"
html += INDEX_TEMPLATE_FOOT
with open(os.path.join(directory, 'index.html'), 'w+') as fp:
fp.write(html)
shutil.copyfile(args.stylesheet, os.path.join(args.output_dir.name, 'styles.css'))
return 0
if __name__ == '__main__':
try:
sys.exit(main(get_args()))
except KeyboardInterrupt:
sys.exit(0)

View File

@ -18,7 +18,7 @@ View your notes as a static html site.
## Usage ## Usage
``` ```
$ notes2web NOTES_DIRECTORY_1 [NOTES_DIRECTORY_2 [...]] $ notes2web.py NOTES_DIRECTORY_1 [NOTES_DIRECTORY_2 [...]]
``` ```
The command will generate a website in the `$output` directory (`./web` by default). The command will generate a website in the `$output` directory (`./web` by default).
@ -28,6 +28,8 @@ Then you just have to point a webserver at `$output`.
## Config ## Config
NOT CURRENTLY IMPLEMENTED
`notes2web` looks for a config file called `.notes2web.conf` in your current directory and your home `notes2web` looks for a config file called `.notes2web.conf` in your current directory and your home
directory. directory.
Default config values: Default config values:

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
beautifulsoup4==4.9.3
pypandoc==1.5
soupsieve==2.2.1

2
templates/indexfoot.html Normal file
View File

@ -0,0 +1,2 @@
<p style="font-size: 0.7em;"> page generated by <a href="https://github.com/alvierahman90/notes2web">notes2web</a></p>
</body>

11
templates/indexhead.html Normal file
View File

@ -0,0 +1,11 @@
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta charset="utf-8">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Open+Sans&display=swap" />
<link rel="stylesheet" type="text/css" href="/styles.css" />
<title>$title$</title>
</head>
<body>
<h1>$title$</h1>
<div class="article"><a href="..">../</a></div>

View File

@ -10,6 +10,7 @@
<h1>$title$</h1> <h1>$title$</h1>
<p> This file was not rendered by notes2web because it is a plaintext file, not a markdown <p> This file was not rendered by notes2web because it is a plaintext file, not a markdown
file. file.
You access the raw file <a href="$raw$">here</a>.
Below is an unformatted representation of the file: Below is an unformatted representation of the file:
</p> </p>
<pre> <pre>