2020-11-20 13:22:00 -05:00
|
|
|
#!/usr/bin/env python3
|
2021-01-30 19:02:40 -05:00
|
|
|
import dataclasses
|
|
|
|
import mimetypes
|
2020-11-20 13:22:00 -05:00
|
|
|
import re
|
|
|
|
import sqlite3
|
|
|
|
from collections import defaultdict
|
2020-11-30 07:49:56 -05:00
|
|
|
from datetime import datetime
|
2020-11-20 13:22:00 -05:00
|
|
|
from pathlib import Path
|
2020-11-20 14:29:02 -05:00
|
|
|
from shutil import copy
|
2020-11-20 13:22:00 -05:00
|
|
|
from shutil import rmtree
|
2021-01-30 19:02:40 -05:00
|
|
|
from typing import Dict
|
|
|
|
from typing import List
|
2020-11-20 13:22:00 -05:00
|
|
|
from typing import Optional
|
2021-01-30 19:02:40 -05:00
|
|
|
from typing import Set
|
2021-11-19 21:13:37 -05:00
|
|
|
from typing import Union
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
|
2021-01-07 12:31:18 -05:00
|
|
|
def contains_word(word: str, text: str) -> bool:
|
|
|
|
"""
|
|
|
|
Check whether `text` contains `word`, as a whole word.
|
|
|
|
|
|
|
|
Case insensitive.
|
|
|
|
"""
|
|
|
|
return re.search(f"\\b{word}\\b".lower(), text.lower()) is not None
|
|
|
|
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def slugify(text):
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Convert `text` into a slug."""
|
2020-11-20 13:22:00 -05:00
|
|
|
return re.sub(r"[\W_]+", "-", text.lower()).strip("-")
|
|
|
|
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
@dataclasses.dataclass
|
|
|
|
class Folder:
|
|
|
|
"""A helper type for a folder."""
|
|
|
|
|
|
|
|
id: str
|
|
|
|
parent_id: str
|
|
|
|
title: str
|
|
|
|
|
|
|
|
def is_private(self) -> bool:
|
|
|
|
"""Return whether this folder is private."""
|
|
|
|
return contains_word("private", self.title)
|
|
|
|
|
|
|
|
def get_url(self) -> str:
|
|
|
|
"""Return the folder's relative URL."""
|
|
|
|
return slugify(self.title)
|
|
|
|
|
|
|
|
def get_summary_line(self, level: int) -> str:
|
|
|
|
"""Get the appropriate summary file line for this folder."""
|
|
|
|
return (" " * (level - 1)) + f"- [{self.title}]({self.get_url()}/index.md)"
|
|
|
|
|
|
|
|
def __lt__(self, other: Union["Folder", "Note"]) -> bool:
|
|
|
|
"""Support comparison, for sorting."""
|
|
|
|
if isinstance(other, Note):
|
|
|
|
# Folders always come before notes.
|
|
|
|
return True
|
|
|
|
return self.title.lower() < other.title.lower()
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
"""Pretty-print this class."""
|
|
|
|
return f"Folder: <{self.title}>"
|
|
|
|
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
@dataclasses.dataclass
|
2020-11-20 13:22:00 -05:00
|
|
|
class Note:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""A helper type for a note."""
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
id: str
|
2021-11-19 21:13:37 -05:00
|
|
|
folder: Folder
|
2021-01-30 19:02:40 -05:00
|
|
|
title: str
|
|
|
|
body: str
|
|
|
|
updated_time: datetime
|
|
|
|
tags: List[str] = dataclasses.field(default_factory=list)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
def is_private(self) -> bool:
|
|
|
|
"""
|
|
|
|
Check whether a note is private.
|
|
|
|
|
|
|
|
This function checks a note's title and tags and returns whether it
|
|
|
|
should be considered private or whether it should be published.
|
|
|
|
"""
|
|
|
|
hidden_keywords = ["private", "wip", "draft"]
|
|
|
|
for keyword in hidden_keywords:
|
|
|
|
if contains_word(keyword, self.title) or keyword in self.tags:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def get_url(self) -> str:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Return the note's relative URL."""
|
2021-11-19 21:13:37 -05:00
|
|
|
return slugify(self.folder.title) + "/" + slugify(self.title)
|
|
|
|
|
|
|
|
def get_summary_line(self, level: int) -> str:
|
|
|
|
"""
|
|
|
|
Get the appropriate summary file line for this note.
|
|
|
|
|
|
|
|
The introduction is level 0, and is treated differently here.
|
|
|
|
"""
|
|
|
|
return (
|
|
|
|
" " * (level - 1)
|
|
|
|
) + f"{'- ' if level > 0 else ''}[{self.title}]({self.get_url()}.md)"
|
|
|
|
|
|
|
|
def __lt__(self, other: Union["Folder", "Note"]) -> bool:
|
|
|
|
"""Support comparison, for sorting."""
|
|
|
|
return self.title.lower() < other.title.lower()
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
"""Pretty-print this class."""
|
|
|
|
return f"Note: <{self.title}>"
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
@dataclasses.dataclass
|
|
|
|
class Resource:
|
|
|
|
"""A helper type for a resource."""
|
|
|
|
|
|
|
|
title: str
|
|
|
|
# The actual extension that the file stored in Joplin has.
|
|
|
|
extension: str
|
|
|
|
mimetype: str
|
|
|
|
|
|
|
|
@property
|
|
|
|
def derived_ext(self):
|
|
|
|
"""Return an extension derived from the resource's mime type."""
|
|
|
|
ext = mimetypes.guess_extension(self.mimetype, strict=False)
|
|
|
|
return "" if ext is None else ext
|
|
|
|
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
class JoplinExporter:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""The main exporter class."""
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
content_dir = Path("content")
|
2022-07-23 11:39:21 -04:00
|
|
|
static_dir = Path("static/resources")
|
2020-11-20 14:29:02 -05:00
|
|
|
joplin_dir = Path.home() / ".config/joplin-desktop"
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-01-26 18:14:14 -05:00
|
|
|
def __init__(self):
|
2021-01-30 19:02:40 -05:00
|
|
|
self.resources: Dict[str, Resource] = {}
|
|
|
|
self.used_resources: Set[str] = set()
|
2021-01-26 18:14:14 -05:00
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
# A mapping of {"note_id": Note()}.
|
|
|
|
self.note_lookup_dict: Dict[str, Note] = {}
|
|
|
|
|
|
|
|
# A mapping of {"folder_id": Folder()}.
|
|
|
|
self.folders: Dict[str, Folder] = {}
|
|
|
|
|
|
|
|
# A mapping of {"folder_id": [Note(), Note()]}.
|
|
|
|
self.notes: Dict[str, List[Note]] = defaultdict(list)
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def clean_content_dir(self):
|
|
|
|
"""Reset the content directory to a known state to begin."""
|
2021-11-19 21:13:37 -05:00
|
|
|
rmtree(self.content_dir, ignore_errors=True)
|
|
|
|
rmtree(self.static_dir, ignore_errors=True)
|
|
|
|
self.content_dir.mkdir(parents=True)
|
|
|
|
self.static_dir.mkdir(parents=True)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
def resolve_note_links(self, note: Note) -> str:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Resolve the links between notes and replace them in the body."""
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def replacement(match):
|
2020-11-20 14:29:02 -05:00
|
|
|
item_id = match.group(1)
|
|
|
|
new_url = self.get_note_url_by_id(item_id)
|
2021-12-14 19:41:34 -05:00
|
|
|
if new_url:
|
|
|
|
new_url += ".html"
|
|
|
|
else:
|
2020-11-20 14:29:02 -05:00
|
|
|
new_url = self.get_resource_url_by_id(item_id)
|
|
|
|
if not new_url:
|
|
|
|
new_url = item_id
|
2021-07-05 18:48:34 -04:00
|
|
|
if match.group(2):
|
|
|
|
new_url += match.group(2)
|
2021-11-19 21:13:37 -05:00
|
|
|
return f"](/{new_url})"
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-07-05 18:48:34 -04:00
|
|
|
return re.sub(r"\]\(:/([a-f0-9]{32})(#.*?)?\)", replacement, note.body)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
def get_note_url_by_id(self, note_id: str) -> Optional[str]:
|
|
|
|
"""Return a note's relative URL by its ID."""
|
|
|
|
note = self.note_lookup_dict.get(note_id)
|
|
|
|
if not note:
|
|
|
|
return None
|
|
|
|
return note.get_url()
|
|
|
|
|
2020-11-20 14:29:02 -05:00
|
|
|
def get_resource_url_by_id(self, resource_id: str) -> Optional[str]:
|
|
|
|
"""Return a resource's relative URL by its ID."""
|
|
|
|
resource = self.resources.get(resource_id)
|
|
|
|
if not resource:
|
|
|
|
return None
|
2021-01-26 18:14:14 -05:00
|
|
|
# Add the resource to the set of used resources, so we can only copy
|
|
|
|
# the resources that are used.
|
|
|
|
self.used_resources.add(resource_id)
|
2021-01-30 19:02:40 -05:00
|
|
|
return "resources/" + resource_id + resource.derived_ext
|
2020-11-20 14:29:02 -05:00
|
|
|
|
|
|
|
def copy_resources(self):
|
2021-01-26 18:14:14 -05:00
|
|
|
"""Copy all the used resources to the output directory."""
|
|
|
|
for resource_id in self.used_resources:
|
|
|
|
resource = self.resources[resource_id]
|
2020-11-20 14:29:02 -05:00
|
|
|
copy(
|
2021-01-30 19:02:40 -05:00
|
|
|
self.joplin_dir / "resources" / (f"{resource_id}.{resource.extension}"),
|
|
|
|
self.static_dir / f"{resource_id}{resource.derived_ext}",
|
2020-11-20 14:29:02 -05:00
|
|
|
)
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def read_data(self):
|
2020-11-20 14:29:02 -05:00
|
|
|
"""Read the data from the Joplin database."""
|
|
|
|
conn = sqlite3.connect(self.joplin_dir / "database.sqlite")
|
2020-11-20 13:22:00 -05:00
|
|
|
c = conn.cursor()
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
c.execute("""SELECT id, title, parent_id FROM folders;""")
|
|
|
|
self.folders = {
|
|
|
|
id: Folder(id, parent_id, title) for id, title, parent_id in c.fetchall()
|
|
|
|
}
|
|
|
|
|
|
|
|
self.folders = {
|
|
|
|
id: folder for id, folder in self.folders.items() if not folder.is_private()
|
|
|
|
}
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-01-07 16:17:37 -05:00
|
|
|
# Get the tags by ID.
|
|
|
|
c.execute("""SELECT id, title FROM tags;""")
|
|
|
|
tags = {id: title for id, title in c.fetchall()}
|
|
|
|
# Get the tag IDs for each note ID.
|
|
|
|
c.execute("""SELECT note_id, tag_id FROM note_tags;""")
|
|
|
|
note_tags = defaultdict(list)
|
|
|
|
for note_id, tag_id in c.fetchall():
|
|
|
|
note_tags[note_id].append(tags[tag_id])
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
c.execute("""SELECT id, title, mime, file_extension FROM resources;""")
|
|
|
|
|
|
|
|
self.resources = {
|
|
|
|
id: Resource(
|
|
|
|
title=title,
|
|
|
|
extension=ext,
|
|
|
|
mimetype=mime,
|
|
|
|
)
|
|
|
|
for id, title, mime, ext in c.fetchall()
|
|
|
|
}
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2020-11-29 22:21:36 -05:00
|
|
|
c.execute("""SELECT id, parent_id, title, body, updated_time FROM notes;""")
|
|
|
|
for id, parent_id, title, body, updated_time in c.fetchall():
|
2021-11-19 21:13:37 -05:00
|
|
|
if parent_id not in self.folders:
|
|
|
|
# This note is in a private folder, continue.
|
|
|
|
continue
|
|
|
|
|
2020-11-29 22:21:36 -05:00
|
|
|
note = Note(
|
2021-01-07 16:17:37 -05:00
|
|
|
id,
|
|
|
|
self.folders[parent_id],
|
|
|
|
title,
|
|
|
|
body,
|
2021-01-30 19:02:40 -05:00
|
|
|
datetime.fromtimestamp(updated_time / 1000),
|
2021-01-07 16:17:37 -05:00
|
|
|
tags=note_tags[id],
|
2020-11-29 22:21:36 -05:00
|
|
|
)
|
2021-11-19 21:13:37 -05:00
|
|
|
if note.is_private():
|
|
|
|
continue
|
|
|
|
|
|
|
|
self.notes[note.folder.id].append(note)
|
2020-11-20 13:22:00 -05:00
|
|
|
self.note_lookup_dict[note.id] = note
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
def write_summary(self):
|
|
|
|
"""Write the SUMMARY.md that mdBook needs."""
|
|
|
|
# We construct a note tree by adding each note into its parent.
|
|
|
|
note_tree: Dict[str, List[Union[Note, Folder]]] = defaultdict(list)
|
|
|
|
|
|
|
|
# The note tree is a list of notes with their parents:
|
|
|
|
# [
|
|
|
|
# [parent1, parent2, note1]
|
|
|
|
# [parent1, parent3, note2]
|
|
|
|
# ]
|
|
|
|
# Then, we sort these by alphabetical order, and we're done.
|
|
|
|
note_tree = []
|
|
|
|
introduction: Optional[Note] = None # The "introduction" note.
|
|
|
|
folders: List[Folder] = list
|
|
|
|
for note_list in self.notes.values():
|
|
|
|
for note in note_list:
|
|
|
|
if note.folder.title == "Welcome":
|
|
|
|
introduction = note
|
|
|
|
continue
|
|
|
|
note_item = [note]
|
|
|
|
item: Union[Folder, Note] = note
|
|
|
|
while True:
|
|
|
|
if isinstance(item, Note):
|
|
|
|
item = item.folder
|
|
|
|
elif isinstance(item, Folder):
|
|
|
|
item = self.folders.get(item.parent_id)
|
|
|
|
if not item:
|
|
|
|
break
|
|
|
|
note_item.insert(0, item)
|
|
|
|
|
|
|
|
# Append the folders to the list if they weren't there before, as that's
|
|
|
|
# the only way this algorithm can generate headlines.
|
|
|
|
if folders != note_item[:-1]:
|
|
|
|
folders = note_item[:-1]
|
|
|
|
note_tree.append(folders)
|
|
|
|
|
|
|
|
note_tree.append(note_item)
|
|
|
|
note_tree.sort()
|
|
|
|
|
|
|
|
# Generate the summary file.
|
|
|
|
items = []
|
|
|
|
for note_list in note_tree:
|
|
|
|
level = len(note_list)
|
|
|
|
if isinstance(note_list[-1], Folder):
|
|
|
|
# The last item in the list is a folder, which means this is a header.
|
|
|
|
items.append(note_list[-1].get_summary_line(level))
|
|
|
|
else:
|
|
|
|
# This is a regular note.
|
|
|
|
note = note_list[-1]
|
|
|
|
print(f"Exporting {note.title}...")
|
|
|
|
items.append(note.get_summary_line(level))
|
|
|
|
|
|
|
|
with (self.content_dir / "SUMMARY.md").open("w") as outfile:
|
|
|
|
outfile.write("# Summary\n\n")
|
|
|
|
# Special-case the introduction.
|
2021-12-12 20:02:33 -05:00
|
|
|
if introduction:
|
|
|
|
outfile.write(introduction.get_summary_line(0) + "\n")
|
2021-11-19 21:13:37 -05:00
|
|
|
outfile.write("\n".join(items))
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def export(self):
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Export all the notes to a static site."""
|
2020-11-20 14:29:02 -05:00
|
|
|
self.read_data()
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
folder_list = sorted(self.folders.values())
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
self.clean_content_dir()
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
for folder in folder_list:
|
2020-11-20 15:10:56 -05:00
|
|
|
contents = []
|
2021-11-19 21:13:37 -05:00
|
|
|
dir = self.content_dir / folder.get_url()
|
2021-12-12 20:02:33 -05:00
|
|
|
dir.mkdir(parents=True, exist_ok=True)
|
2021-11-19 21:13:37 -05:00
|
|
|
for note in sorted(self.notes[folder.id], key=lambda n: n.title):
|
|
|
|
print(f"Exporting {folder.title} - {note.title}...")
|
|
|
|
contents.append((note.title, f"{note.get_url()}.html"))
|
2020-11-20 14:29:02 -05:00
|
|
|
with (self.content_dir / (note.get_url() + ".md")).open(
|
2021-12-12 20:02:33 -05:00
|
|
|
mode="w", encoding="utf-8"
|
2020-11-20 14:29:02 -05:00
|
|
|
) as outfile:
|
2020-11-20 13:22:00 -05:00
|
|
|
outfile.write(
|
2021-11-19 21:13:37 -05:00
|
|
|
f"""# {note.title}
|
|
|
|
|
2020-11-29 22:21:36 -05:00
|
|
|
{self.resolve_note_links(note)}
|
|
|
|
|
|
|
|
* * *
|
|
|
|
|
2021-01-04 06:59:06 -05:00
|
|
|
<p style="font-size:80%; font-style: italic">
|
2021-01-16 15:35:08 -05:00
|
|
|
Last updated on {note.updated_time:%B %d, %Y}. For any questions/feedback,
|
2021-01-04 06:59:06 -05:00
|
|
|
email me at <a href="mailto:hi@stavros.io">hi@stavros.io</a>.
|
|
|
|
</p>
|
2020-11-29 22:21:36 -05:00
|
|
|
"""
|
2020-11-20 13:22:00 -05:00
|
|
|
)
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
with (dir / "index.md").open(mode="w") as outfile:
|
2020-11-20 15:10:56 -05:00
|
|
|
contents_list = "\n1. ".join(
|
|
|
|
f"[{title}](../../{url})" for title, url in contents
|
|
|
|
)
|
|
|
|
outfile.write(
|
2021-11-19 21:13:37 -05:00
|
|
|
f"""# Contents
|
2020-11-20 15:10:56 -05:00
|
|
|
|
|
|
|
Click on a link in the list below to go to that page:
|
|
|
|
|
|
|
|
1. {contents_list}
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
2021-11-19 21:13:37 -05:00
|
|
|
self.write_summary()
|
2021-01-26 18:14:14 -05:00
|
|
|
self.copy_resources()
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print("Exporting Joplin database...")
|
2020-11-20 14:29:02 -05:00
|
|
|
JoplinExporter().export()
|