2020-11-20 13:22:00 -05:00
|
|
|
#!/usr/bin/env python3
|
2021-01-30 19:02:40 -05:00
|
|
|
import dataclasses
|
|
|
|
import mimetypes
|
2020-11-20 13:22:00 -05:00
|
|
|
import re
|
|
|
|
import sqlite3
|
|
|
|
from collections import defaultdict
|
2020-11-30 07:49:56 -05:00
|
|
|
from datetime import datetime
|
2020-11-20 13:22:00 -05:00
|
|
|
from pathlib import Path
|
2020-11-20 14:29:02 -05:00
|
|
|
from shutil import copy
|
2020-11-20 13:22:00 -05:00
|
|
|
from shutil import rmtree
|
2021-01-30 19:02:40 -05:00
|
|
|
from typing import Dict
|
|
|
|
from typing import List
|
2020-11-20 13:22:00 -05:00
|
|
|
from typing import Optional
|
2021-01-30 19:02:40 -05:00
|
|
|
from typing import Set
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
|
2021-01-07 12:31:18 -05:00
|
|
|
def contains_word(word: str, text: str) -> bool:
|
|
|
|
"""
|
|
|
|
Check whether `text` contains `word`, as a whole word.
|
|
|
|
|
|
|
|
Case insensitive.
|
|
|
|
"""
|
|
|
|
return re.search(f"\\b{word}\\b".lower(), text.lower()) is not None
|
|
|
|
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def slugify(text):
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Convert `text` into a slug."""
|
2020-11-20 13:22:00 -05:00
|
|
|
return re.sub(r"[\W_]+", "-", text.lower()).strip("-")
|
|
|
|
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
@dataclasses.dataclass
|
2020-11-20 13:22:00 -05:00
|
|
|
class Note:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""A helper type for a note."""
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
id: str
|
|
|
|
parent_id: str
|
|
|
|
parent_title: str
|
|
|
|
title: str
|
|
|
|
body: str
|
|
|
|
updated_time: datetime
|
|
|
|
tags: List[str] = dataclasses.field(default_factory=list)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
def get_url(self):
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Return the note's relative URL."""
|
2020-11-20 13:22:00 -05:00
|
|
|
return slugify(self.parent_title) + "/" + slugify(self.title)
|
|
|
|
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
@dataclasses.dataclass
|
|
|
|
class Resource:
|
|
|
|
"""A helper type for a resource."""
|
|
|
|
|
|
|
|
title: str
|
|
|
|
# The actual extension that the file stored in Joplin has.
|
|
|
|
extension: str
|
|
|
|
mimetype: str
|
|
|
|
|
|
|
|
@property
|
|
|
|
def derived_ext(self):
|
|
|
|
"""Return an extension derived from the resource's mime type."""
|
|
|
|
ext = mimetypes.guess_extension(self.mimetype, strict=False)
|
|
|
|
return "" if ext is None else ext
|
|
|
|
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
class JoplinExporter:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""The main exporter class."""
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
content_dir = Path("content")
|
2020-11-25 15:27:20 -05:00
|
|
|
static_dir = Path("static/resources")
|
2020-11-20 14:29:02 -05:00
|
|
|
joplin_dir = Path.home() / ".config/joplin-desktop"
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-01-26 18:14:14 -05:00
|
|
|
def __init__(self):
|
2021-01-30 19:02:40 -05:00
|
|
|
self.resources: Dict[str, Resource] = {}
|
|
|
|
self.used_resources: Set[str] = set()
|
2021-01-26 18:14:14 -05:00
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def clean_content_dir(self):
|
|
|
|
"""Reset the content directory to a known state to begin."""
|
|
|
|
rmtree(self.content_dir)
|
2020-11-20 14:29:02 -05:00
|
|
|
rmtree(self.static_dir)
|
2020-11-20 13:22:00 -05:00
|
|
|
self.content_dir.mkdir()
|
2020-11-20 14:29:02 -05:00
|
|
|
self.static_dir.mkdir()
|
2020-11-20 13:22:00 -05:00
|
|
|
with open(self.content_dir / "_index.md", mode="w") as outfile:
|
2020-11-20 14:54:18 -05:00
|
|
|
outfile.write('+++\nredirect_to = "welcome/stavros-notes/"\n+++')
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
def resolve_note_links(self, note: Note) -> str:
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Resolve the links between notes and replace them in the body."""
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def replacement(match):
|
2020-11-20 14:29:02 -05:00
|
|
|
item_id = match.group(1)
|
|
|
|
new_url = self.get_note_url_by_id(item_id)
|
|
|
|
if not new_url:
|
|
|
|
new_url = self.get_resource_url_by_id(item_id)
|
|
|
|
if not new_url:
|
|
|
|
new_url = item_id
|
2021-07-05 18:48:34 -04:00
|
|
|
if match.group(2):
|
|
|
|
new_url += match.group(2)
|
2020-11-20 14:29:02 -05:00
|
|
|
return f"](../../{new_url})"
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2021-07-05 18:48:34 -04:00
|
|
|
return re.sub(r"\]\(:/([a-f0-9]{32})(#.*?)?\)", replacement, note.body)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
def get_note_url_by_id(self, note_id: str) -> Optional[str]:
|
|
|
|
"""Return a note's relative URL by its ID."""
|
|
|
|
note = self.note_lookup_dict.get(note_id)
|
|
|
|
if not note:
|
|
|
|
return None
|
|
|
|
return note.get_url()
|
|
|
|
|
2020-11-20 14:29:02 -05:00
|
|
|
def get_resource_url_by_id(self, resource_id: str) -> Optional[str]:
|
|
|
|
"""Return a resource's relative URL by its ID."""
|
|
|
|
resource = self.resources.get(resource_id)
|
|
|
|
if not resource:
|
|
|
|
return None
|
2021-01-26 18:14:14 -05:00
|
|
|
# Add the resource to the set of used resources, so we can only copy
|
|
|
|
# the resources that are used.
|
|
|
|
self.used_resources.add(resource_id)
|
2021-01-30 19:02:40 -05:00
|
|
|
return "resources/" + resource_id + resource.derived_ext
|
2020-11-20 14:29:02 -05:00
|
|
|
|
|
|
|
def copy_resources(self):
|
2021-01-26 18:14:14 -05:00
|
|
|
"""Copy all the used resources to the output directory."""
|
|
|
|
for resource_id in self.used_resources:
|
|
|
|
resource = self.resources[resource_id]
|
2020-11-20 14:29:02 -05:00
|
|
|
copy(
|
2021-01-30 19:02:40 -05:00
|
|
|
self.joplin_dir / "resources" / (f"{resource_id}.{resource.extension}"),
|
|
|
|
self.static_dir / f"{resource_id}{resource.derived_ext}",
|
2020-11-20 14:29:02 -05:00
|
|
|
)
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
def read_data(self):
|
2020-11-20 14:29:02 -05:00
|
|
|
"""Read the data from the Joplin database."""
|
|
|
|
conn = sqlite3.connect(self.joplin_dir / "database.sqlite")
|
2020-11-20 13:22:00 -05:00
|
|
|
c = conn.cursor()
|
|
|
|
|
|
|
|
c.execute("""SELECT id, title FROM folders;""")
|
|
|
|
self.folders = {id: title for id, title in c.fetchall()}
|
|
|
|
|
2021-01-07 16:17:37 -05:00
|
|
|
# Get the tags by ID.
|
|
|
|
c.execute("""SELECT id, title FROM tags;""")
|
|
|
|
tags = {id: title for id, title in c.fetchall()}
|
|
|
|
# Get the tag IDs for each note ID.
|
|
|
|
c.execute("""SELECT note_id, tag_id FROM note_tags;""")
|
|
|
|
note_tags = defaultdict(list)
|
|
|
|
for note_id, tag_id in c.fetchall():
|
|
|
|
note_tags[note_id].append(tags[tag_id])
|
|
|
|
|
2021-01-30 19:02:40 -05:00
|
|
|
c.execute("""SELECT id, title, mime, file_extension FROM resources;""")
|
|
|
|
|
|
|
|
self.resources = {
|
|
|
|
id: Resource(
|
|
|
|
title=title,
|
|
|
|
extension=ext,
|
|
|
|
mimetype=mime,
|
|
|
|
)
|
|
|
|
for id, title, mime, ext in c.fetchall()
|
|
|
|
}
|
2020-11-20 13:22:00 -05:00
|
|
|
|
2020-11-29 22:21:36 -05:00
|
|
|
c.execute("""SELECT id, parent_id, title, body, updated_time FROM notes;""")
|
2020-11-20 13:22:00 -05:00
|
|
|
self.notes = defaultdict(list)
|
|
|
|
self.note_lookup_dict = {}
|
2020-11-29 22:21:36 -05:00
|
|
|
for id, parent_id, title, body, updated_time in c.fetchall():
|
|
|
|
note = Note(
|
2021-01-07 16:17:37 -05:00
|
|
|
id,
|
|
|
|
parent_id,
|
|
|
|
self.folders[parent_id],
|
|
|
|
title,
|
|
|
|
body,
|
2021-01-30 19:02:40 -05:00
|
|
|
datetime.fromtimestamp(updated_time / 1000),
|
2021-01-07 16:17:37 -05:00
|
|
|
tags=note_tags[id],
|
2020-11-29 22:21:36 -05:00
|
|
|
)
|
2020-11-20 13:22:00 -05:00
|
|
|
self.notes[note.parent_id].append(note)
|
|
|
|
self.note_lookup_dict[note.id] = note
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
def export(self):
|
2021-01-07 16:17:37 -05:00
|
|
|
"""Export all the notes to a static site."""
|
2020-11-20 14:29:02 -05:00
|
|
|
self.read_data()
|
|
|
|
|
2020-11-20 15:15:45 -05:00
|
|
|
# Private notes shouldn't be published.
|
|
|
|
folder_list = list(
|
2021-01-07 12:31:18 -05:00
|
|
|
i for i in self.folders.items() if not contains_word("private", i[1])
|
2020-11-20 15:15:45 -05:00
|
|
|
)
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
# Sort "Welcome" last.
|
2021-01-26 18:17:09 -05:00
|
|
|
folder_list.sort(
|
|
|
|
key=lambda x: x[1].lower().strip() if x[1] != "Welcome" else "0"
|
|
|
|
)
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
self.clean_content_dir()
|
|
|
|
|
2021-07-01 09:21:11 -04:00
|
|
|
def is_private(note) -> bool:
|
|
|
|
"""
|
|
|
|
Check whether a note is private.
|
|
|
|
|
|
|
|
This function checks a note's title and tags and returns whether it
|
|
|
|
should be considered private or whether it should be published.
|
|
|
|
"""
|
|
|
|
hidden_keywords = ["private", "wip", "draft"]
|
|
|
|
for keyword in hidden_keywords:
|
|
|
|
if contains_word(keyword, note.title) or keyword in note.tags:
|
|
|
|
print(
|
|
|
|
f"Note is unpublished, skipping: {folder_title} - {note.title}."
|
|
|
|
)
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2020-11-20 16:49:31 -05:00
|
|
|
for folder_counter, folder in enumerate(folder_list, start=1):
|
2020-11-20 13:22:00 -05:00
|
|
|
folder_id, folder_title = folder
|
2020-11-20 14:29:02 -05:00
|
|
|
dir = self.content_dir / slugify(folder_title)
|
2020-11-20 13:22:00 -05:00
|
|
|
dir.mkdir(parents=True)
|
2020-11-20 15:10:56 -05:00
|
|
|
contents = []
|
2021-01-07 15:51:04 -05:00
|
|
|
note_counter = 0
|
|
|
|
for note in sorted(self.notes[folder_id], key=lambda n: n.title):
|
2021-07-01 09:21:11 -04:00
|
|
|
if is_private(note):
|
2021-01-07 15:51:04 -05:00
|
|
|
print(
|
|
|
|
f"Note is unpublished, skipping: {folder_title} - {note.title}."
|
|
|
|
)
|
2021-01-07 12:31:18 -05:00
|
|
|
continue
|
2021-01-07 15:51:04 -05:00
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
print(f"Exporting {folder_title} - {note.title}...")
|
2021-01-07 15:51:04 -05:00
|
|
|
note_counter += 1
|
2020-11-20 15:10:56 -05:00
|
|
|
contents.append((note.title, note.get_url()))
|
2020-11-20 14:29:02 -05:00
|
|
|
with (self.content_dir / (note.get_url() + ".md")).open(
|
|
|
|
mode="w"
|
|
|
|
) as outfile:
|
2020-11-20 13:22:00 -05:00
|
|
|
outfile.write(
|
|
|
|
f"""+++
|
|
|
|
title = "{note.title}"
|
2020-11-20 16:49:31 -05:00
|
|
|
weight = {note_counter}
|
2020-11-20 13:22:00 -05:00
|
|
|
sort_by = "weight"
|
|
|
|
insert_anchor_links = "right"
|
|
|
|
+++
|
2020-11-29 22:21:36 -05:00
|
|
|
{self.resolve_note_links(note)}
|
|
|
|
|
|
|
|
* * *
|
|
|
|
|
2021-01-04 06:59:06 -05:00
|
|
|
<p style="font-size:80%; font-style: italic">
|
2021-01-16 15:35:08 -05:00
|
|
|
Last updated on {note.updated_time:%B %d, %Y}. For any questions/feedback,
|
2021-01-04 06:59:06 -05:00
|
|
|
email me at <a href="mailto:hi@stavros.io">hi@stavros.io</a>.
|
|
|
|
</p>
|
2020-11-29 22:21:36 -05:00
|
|
|
"""
|
2020-11-20 13:22:00 -05:00
|
|
|
)
|
|
|
|
|
2020-11-20 15:10:56 -05:00
|
|
|
with (dir / "_index.md").open(mode="w") as outfile:
|
|
|
|
contents_list = "\n1. ".join(
|
|
|
|
f"[{title}](../../{url})" for title, url in contents
|
|
|
|
)
|
|
|
|
outfile.write(
|
|
|
|
f"""+++
|
|
|
|
title = "{folder_title}"
|
2020-11-20 16:49:31 -05:00
|
|
|
weight = {folder_counter}
|
2020-11-20 15:10:56 -05:00
|
|
|
sort_by = "weight"
|
|
|
|
insert_anchor_links = "right"
|
|
|
|
+++
|
|
|
|
## Contents
|
|
|
|
|
|
|
|
Click on a link in the list below to go to that page:
|
|
|
|
|
|
|
|
1. {contents_list}
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
2021-01-26 18:14:14 -05:00
|
|
|
self.copy_resources()
|
|
|
|
|
2020-11-20 13:22:00 -05:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print("Exporting Joplin database...")
|
2020-11-20 14:29:02 -05:00
|
|
|
JoplinExporter().export()
|