This repository has been archived on 2024-01-02. You can view files and clone it, but cannot push or open issues or pull requests.
notes2web/fileproperties.py

189 lines
5.8 KiB
Python
Raw Normal View History

2024-01-02 18:22:15 +00:00
from pathlib import Path
import frontmatter
import copy
import magic
import regex as re
class FileMap:
"""
this class is used to read file properties, inherit properties, and have a centralised place to access them
"""
def __init__(self, input_dir, output_dir):
self._map = {}
self.input_dir = Path(input_dir)
self.output_dir = Path(output_dir)
@staticmethod
def _path_to_key(path):
return str(Path(path))
def get(self, filepath, default=None, raw=False):
"""
get the properties of a file at a filepath
raw=True to not inherit properties
"""
#print(f"FileMap.get({filepath=}, {default=}, {raw=})")
# TODO maybe store properties of a file once it's in built and mark it as built? might save time but also cba
if self._path_to_key(filepath) not in self._map.keys():
self.add(filepath)
properties = copy.deepcopy(self._map.get(self._path_to_key(filepath), default))
#print(f"FileMap.get({filepath=}, {default=}, {raw=}): {properties=}")
if raw:
return properties
parent = filepath
while True:
parent = parent.parent
if parent == Path('.'):
break
parent_properties = self.get(parent, raw=True)
# TODO inherit any property that isn't defined, append any lists that exist
properties['tags'] = properties.get('tags', []) + parent_properties.get('tags', [])
if parent == self.input_dir:
break
return properties
def add(self, filepath):
filepath = Path(filepath)
#print(f"FileMap.add({filepath=}")
if filepath.is_dir():
properties = self._get_directory_properties(filepath)
else:
properties = self._get_file_properties(filepath)
properties['src_path'] = filepath
properties['dst_path'] = self._get_output_filepath(filepath)
self._map[self._path_to_key(filepath)] = properties
def _get_directory_properties(self, filepath: Path, include_index_entries=True):
"""
return dict of directory properties to be used in pandoc template
"""
post = {
'title': filepath.name,
'content_after_search': False,
'automatic_index': True,
'search_bar': True,
'tags': [],
}
if 'index.md' in filepath.iterdir():
with open(filepath.joinpath('index.md'), encoding='utf-8') as file_pointer:
for key, val in frontmatter.load(file_pointer).to_dict():
post[key] = val
post['is_dir'] = True
if include_index_entries:
post['index_entries'] = self._get_index_entries(filepath)
return post
def _get_index_entries(self, filepath):
"""
return sorted list of index entries. alphabetically sorted, folders first
"""
entries = []
for path in filepath.iterdir():
print(f'{path=}')
if path.is_dir():
entry = self._get_directory_properties(path, include_index_entries=False)
else:
entry = self._get_file_properties(path)
entry['path'] = self._get_output_filepath(path)['web']
entries.append(entry)
#print(f"FileMap._get_index_entries({filepath=}): {entry=}")
2023-09-17 19:18:24 +00:00
entries.sort(key=lambda entry: str(entry.get('title', '')).lower())
2024-01-02 18:22:15 +00:00
entries.sort(key=lambda entry: entry['is_dir'], reverse=True)
return entries
def _get_file_properties(self, filepath):
#print(f"FileMap._get_file_properties({filepath=}")
post = { 'title': filepath.name }
if filepath.suffix == '.md':
with open(filepath, encoding='utf-8') as file_pointer:
post = frontmatter.load(file_pointer).to_dict()
# don't store file contents in memory
if 'content' in post.keys():
del post['content']
post['is_dir'] = False
return post
def _get_output_filepath(self, input_filepath):
def webpath(filepath):
return Path('/notes').joinpath(filepath.relative_to(self.output_dir))
r = {}
r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir))
r['web'] = webpath(r['raw'])
if input_filepath.is_dir():
return r
if input_filepath.suffix == '.md':
r['html'] = self.output_dir.joinpath(
input_filepath.relative_to(self.input_dir)
).with_suffix('.html')
r['web'] = webpath(r['html'])
elif self.is_plaintext(input_filepath):
r['html'] = self.output_dir.joinpath(
input_filepath.relative_to(self.input_dir)
).with_suffix(input_filepath.suffix + '.html')
r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir))
r['web'] = webpath(r['html'])
#print(f"{r=}")
return r
def to_list(self):
2023-09-17 19:18:24 +00:00
return [ val for _, val in self._map.items() ]
def to_search_data(self):
2024-01-02 18:22:15 +00:00
"""
returns list of every file in map
"""
r = []
for _, val in self._map.items():
r.append({
'title': val.get('title', ''),
'tags': val.get('tags', []),
'path': str(val['dst_path']['web']),
'is_dir': val['is_dir']
})
return r
@staticmethod
def is_plaintext(filename):
"""
check if file is a plaintext format, such as html, css, etc,
return boolean
"""
return re.match(r'^text/', magic.from_file(str(filename), mime=True)) is not None