189 lines
5.8 KiB
Python
189 lines
5.8 KiB
Python
from pathlib import Path
|
|
import frontmatter
|
|
import copy
|
|
import magic
|
|
import regex as re
|
|
|
|
|
|
class FileMap:
|
|
"""
|
|
this class is used to read file properties, inherit properties, and have a centralised place to access them
|
|
"""
|
|
def __init__(self, input_dir, output_dir):
|
|
self._map = {}
|
|
self.input_dir = Path(input_dir)
|
|
self.output_dir = Path(output_dir)
|
|
|
|
@staticmethod
|
|
def _path_to_key(path):
|
|
return str(Path(path))
|
|
|
|
def get(self, filepath, default=None, raw=False):
|
|
"""
|
|
get the properties of a file at a filepath
|
|
raw=True to not inherit properties
|
|
"""
|
|
#print(f"FileMap.get({filepath=}, {default=}, {raw=})")
|
|
# TODO maybe store properties of a file once it's in built and mark it as built? might save time but also cba
|
|
if self._path_to_key(filepath) not in self._map.keys():
|
|
self.add(filepath)
|
|
|
|
properties = copy.deepcopy(self._map.get(self._path_to_key(filepath), default))
|
|
#print(f"FileMap.get({filepath=}, {default=}, {raw=}): {properties=}")
|
|
|
|
if raw:
|
|
return properties
|
|
|
|
parent = filepath
|
|
while True:
|
|
parent = parent.parent
|
|
if parent == Path('.'):
|
|
break
|
|
|
|
parent_properties = self.get(parent, raw=True)
|
|
# TODO inherit any property that isn't defined, append any lists that exist
|
|
properties['tags'] = properties.get('tags', []) + parent_properties.get('tags', [])
|
|
|
|
if parent == self.input_dir:
|
|
break
|
|
|
|
return properties
|
|
|
|
def add(self, filepath):
|
|
filepath = Path(filepath)
|
|
#print(f"FileMap.add({filepath=}")
|
|
if filepath.is_dir():
|
|
properties = self._get_directory_properties(filepath)
|
|
else:
|
|
properties = self._get_file_properties(filepath)
|
|
|
|
properties['src_path'] = filepath
|
|
properties['dst_path'] = self._get_output_filepath(filepath)
|
|
|
|
self._map[self._path_to_key(filepath)] = properties
|
|
|
|
|
|
def _get_directory_properties(self, filepath: Path, include_index_entries=True):
|
|
"""
|
|
return dict of directory properties to be used in pandoc template
|
|
"""
|
|
|
|
post = {
|
|
'title': filepath.name,
|
|
'content_after_search': False,
|
|
'automatic_index': True,
|
|
'search_bar': True,
|
|
'tags': [],
|
|
}
|
|
|
|
if 'index.md' in filepath.iterdir():
|
|
with open(filepath.joinpath('index.md'), encoding='utf-8') as file_pointer:
|
|
for key, val in frontmatter.load(file_pointer).to_dict():
|
|
post[key] = val
|
|
|
|
post['is_dir'] = True
|
|
|
|
if include_index_entries:
|
|
post['index_entries'] = self._get_index_entries(filepath)
|
|
|
|
return post
|
|
|
|
|
|
def _get_index_entries(self, filepath):
|
|
"""
|
|
return sorted list of index entries. alphabetically sorted, folders first
|
|
"""
|
|
entries = []
|
|
|
|
for path in filepath.iterdir():
|
|
print(f'{path=}')
|
|
if path.is_dir():
|
|
entry = self._get_directory_properties(path, include_index_entries=False)
|
|
else:
|
|
entry = self._get_file_properties(path)
|
|
|
|
entry['path'] = self._get_output_filepath(path)['web']
|
|
entries.append(entry)
|
|
#print(f"FileMap._get_index_entries({filepath=}): {entry=}")
|
|
|
|
|
|
entries.sort(key=lambda entry: str(entry.get('title', '')).lower())
|
|
entries.sort(key=lambda entry: entry['is_dir'], reverse=True)
|
|
|
|
return entries
|
|
|
|
def _get_file_properties(self, filepath):
|
|
#print(f"FileMap._get_file_properties({filepath=}")
|
|
post = { 'title': filepath.name }
|
|
|
|
if filepath.suffix == '.md':
|
|
with open(filepath, encoding='utf-8') as file_pointer:
|
|
post = frontmatter.load(file_pointer).to_dict()
|
|
|
|
# don't store file contents in memory
|
|
if 'content' in post.keys():
|
|
del post['content']
|
|
post['is_dir'] = False
|
|
|
|
return post
|
|
|
|
|
|
def _get_output_filepath(self, input_filepath):
|
|
|
|
def webpath(filepath):
|
|
return Path('/notes').joinpath(filepath.relative_to(self.output_dir))
|
|
|
|
|
|
r = {}
|
|
r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir))
|
|
r['web'] = webpath(r['raw'])
|
|
|
|
if input_filepath.is_dir():
|
|
return r
|
|
|
|
if input_filepath.suffix == '.md':
|
|
r['html'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(self.input_dir)
|
|
).with_suffix('.html')
|
|
r['web'] = webpath(r['html'])
|
|
|
|
elif self.is_plaintext(input_filepath):
|
|
r['html'] = self.output_dir.joinpath(
|
|
input_filepath.relative_to(self.input_dir)
|
|
).with_suffix(input_filepath.suffix + '.html')
|
|
r['raw'] = self.output_dir.joinpath(input_filepath.relative_to(self.input_dir))
|
|
r['web'] = webpath(r['html'])
|
|
|
|
#print(f"{r=}")
|
|
|
|
return r
|
|
|
|
|
|
def to_list(self):
|
|
return [ val for _, val in self._map.items() ]
|
|
|
|
|
|
def to_search_data(self):
|
|
"""
|
|
returns list of every file in map
|
|
"""
|
|
r = []
|
|
for _, val in self._map.items():
|
|
r.append({
|
|
'title': val.get('title', ''),
|
|
'tags': val.get('tags', []),
|
|
'path': str(val['dst_path']['web']),
|
|
'is_dir': val['is_dir']
|
|
})
|
|
|
|
return r
|
|
|
|
|
|
@staticmethod
|
|
def is_plaintext(filename):
|
|
"""
|
|
check if file is a plaintext format, such as html, css, etc,
|
|
return boolean
|
|
"""
|
|
return re.match(r'^text/', magic.from_file(str(filename), mime=True)) is not None
|