joplin-mdbooks-website/joplinexport.py

214 lines
6.8 KiB
Python
Raw Normal View History

2020-11-20 13:22:00 -05:00
#!/usr/bin/env python3
import re
import sqlite3
from collections import defaultdict
2020-11-30 07:49:56 -05:00
from datetime import datetime
2020-11-20 13:22:00 -05:00
from pathlib import Path
2020-11-20 14:29:02 -05:00
from shutil import copy
2020-11-20 13:22:00 -05:00
from shutil import rmtree
from typing import Optional
2021-01-07 12:31:18 -05:00
def contains_word(word: str, text: str) -> bool:
"""
Check whether `text` contains `word`, as a whole word.
Case insensitive.
"""
return re.search(f"\\b{word}\\b".lower(), text.lower()) is not None
2020-11-20 13:22:00 -05:00
def slugify(text):
2021-01-07 16:17:37 -05:00
"""Convert `text` into a slug."""
2020-11-20 13:22:00 -05:00
return re.sub(r"[\W_]+", "-", text.lower()).strip("-")
class Note:
2021-01-07 16:17:37 -05:00
"""A helper type for a note."""
def __init__(self, id, parent_id, parent_title, title, body, updated_time, tags=[]):
2020-11-20 13:22:00 -05:00
self.id = id
self.parent_id = parent_id
self.parent_title = parent_title
self.title = title
self.body = body
2020-11-30 07:49:56 -05:00
self.updated_time = datetime.fromtimestamp(updated_time)
2021-01-07 16:17:37 -05:00
self.tags = tags
2020-11-20 13:22:00 -05:00
def get_url(self):
2021-01-07 16:17:37 -05:00
"""Return the note's relative URL."""
2020-11-20 13:22:00 -05:00
return slugify(self.parent_title) + "/" + slugify(self.title)
class JoplinExporter:
2021-01-07 16:17:37 -05:00
"""The main exporter class."""
2020-11-20 13:22:00 -05:00
content_dir = Path("content")
2020-11-25 15:27:20 -05:00
static_dir = Path("static/resources")
2020-11-20 14:29:02 -05:00
joplin_dir = Path.home() / ".config/joplin-desktop"
2020-11-20 13:22:00 -05:00
def clean_content_dir(self):
"""Reset the content directory to a known state to begin."""
rmtree(self.content_dir)
2020-11-20 14:29:02 -05:00
rmtree(self.static_dir)
2020-11-20 13:22:00 -05:00
self.content_dir.mkdir()
2020-11-20 14:29:02 -05:00
self.static_dir.mkdir()
2020-11-20 13:22:00 -05:00
with open(self.content_dir / "_index.md", mode="w") as outfile:
2020-11-20 14:54:18 -05:00
outfile.write('+++\nredirect_to = "welcome/stavros-notes/"\n+++')
2020-11-20 13:22:00 -05:00
def resolve_note_links(self, note: Note) -> str:
2021-01-07 16:17:37 -05:00
"""Resolve the links between notes and replace them in the body."""
2020-11-20 13:22:00 -05:00
def replacement(match):
2020-11-20 14:29:02 -05:00
item_id = match.group(1)
new_url = self.get_note_url_by_id(item_id)
if not new_url:
new_url = self.get_resource_url_by_id(item_id)
if not new_url:
new_url = item_id
return f"](../../{new_url})"
2020-11-20 13:22:00 -05:00
return re.sub(r"\]\(:/([a-f0-9]{32})\)", replacement, note.body)
def get_note_url_by_id(self, note_id: str) -> Optional[str]:
"""Return a note's relative URL by its ID."""
note = self.note_lookup_dict.get(note_id)
if not note:
return None
return note.get_url()
2020-11-20 14:29:02 -05:00
def get_resource_url_by_id(self, resource_id: str) -> Optional[str]:
"""Return a resource's relative URL by its ID."""
resource = self.resources.get(resource_id)
if not resource:
return None
2020-11-25 15:27:20 -05:00
return "resources/" + resource_id + "." + resource[1]
2020-11-20 14:29:02 -05:00
def copy_resources(self):
"""Copy all the resources to the output directory."""
for resource_id, resource in self.resources.items():
title, extension = resource
copy(
2020-11-20 14:32:13 -05:00
self.joplin_dir / "resources" / (f"{resource_id}.{extension}"),
2020-11-20 14:29:02 -05:00
self.static_dir,
)
2020-11-20 13:22:00 -05:00
def read_data(self):
2020-11-20 14:29:02 -05:00
"""Read the data from the Joplin database."""
conn = sqlite3.connect(self.joplin_dir / "database.sqlite")
2020-11-20 13:22:00 -05:00
c = conn.cursor()
c.execute("""SELECT id, title FROM folders;""")
self.folders = {id: title for id, title in c.fetchall()}
2021-01-07 16:17:37 -05:00
# Get the tags by ID.
c.execute("""SELECT id, title FROM tags;""")
tags = {id: title for id, title in c.fetchall()}
# Get the tag IDs for each note ID.
c.execute("""SELECT note_id, tag_id FROM note_tags;""")
note_tags = defaultdict(list)
for note_id, tag_id in c.fetchall():
note_tags[note_id].append(tags[tag_id])
2020-11-20 14:29:02 -05:00
c.execute("""SELECT id, title, file_extension FROM resources;""")
2020-11-20 14:32:13 -05:00
self.resources = {id: (title, ext) for id, title, ext in c.fetchall()}
2020-11-20 13:22:00 -05:00
2020-11-29 22:21:36 -05:00
c.execute("""SELECT id, parent_id, title, body, updated_time FROM notes;""")
2020-11-20 13:22:00 -05:00
self.notes = defaultdict(list)
self.note_lookup_dict = {}
2020-11-29 22:21:36 -05:00
for id, parent_id, title, body, updated_time in c.fetchall():
note = Note(
2021-01-07 16:17:37 -05:00
id,
parent_id,
self.folders[parent_id],
title,
body,
updated_time / 1000,
tags=note_tags[id],
2020-11-29 22:21:36 -05:00
)
2020-11-20 13:22:00 -05:00
self.notes[note.parent_id].append(note)
self.note_lookup_dict[note.id] = note
conn.close()
def export(self):
2021-01-07 16:17:37 -05:00
"""Export all the notes to a static site."""
2020-11-20 14:29:02 -05:00
self.read_data()
2020-11-20 15:15:45 -05:00
# Private notes shouldn't be published.
folder_list = list(
2021-01-07 12:31:18 -05:00
i for i in self.folders.items() if not contains_word("private", i[1])
2020-11-20 15:15:45 -05:00
)
2020-11-20 13:22:00 -05:00
# Sort "Welcome" last.
2020-11-29 22:21:36 -05:00
folder_list.sort(key=lambda x: x[1].lower().strip() if x[1] != "Welcome" else "0")
2020-11-20 13:22:00 -05:00
self.clean_content_dir()
2020-11-20 14:29:02 -05:00
self.copy_resources()
2020-11-20 13:22:00 -05:00
2020-11-20 16:49:31 -05:00
for folder_counter, folder in enumerate(folder_list, start=1):
2020-11-20 13:22:00 -05:00
folder_id, folder_title = folder
2020-11-20 14:29:02 -05:00
dir = self.content_dir / slugify(folder_title)
2020-11-20 13:22:00 -05:00
dir.mkdir(parents=True)
2020-11-20 15:10:56 -05:00
contents = []
2021-01-07 15:51:04 -05:00
note_counter = 0
for note in sorted(self.notes[folder_id], key=lambda n: n.title):
2021-01-07 16:17:37 -05:00
if (
contains_word("private", note.title)
or contains_word("wip", note.title)
or "wip" in note.tags
or "private" in note.tags
2021-01-07 15:51:04 -05:00
):
print(
f"Note is unpublished, skipping: {folder_title} - {note.title}."
)
2021-01-07 12:31:18 -05:00
continue
2021-01-07 15:51:04 -05:00
2020-11-20 13:22:00 -05:00
print(f"Exporting {folder_title} - {note.title}...")
2021-01-07 15:51:04 -05:00
note_counter += 1
2020-11-20 15:10:56 -05:00
contents.append((note.title, note.get_url()))
2020-11-20 14:29:02 -05:00
with (self.content_dir / (note.get_url() + ".md")).open(
mode="w"
) as outfile:
2020-11-20 13:22:00 -05:00
outfile.write(
f"""+++
title = "{note.title}"
2020-11-20 16:49:31 -05:00
weight = {note_counter}
2020-11-20 13:22:00 -05:00
sort_by = "weight"
insert_anchor_links = "right"
+++
2020-11-29 22:21:36 -05:00
{self.resolve_note_links(note)}
* * *
2021-01-04 06:59:06 -05:00
<p style="font-size:80%; font-style: italic">
2021-01-16 15:35:08 -05:00
Last updated on {note.updated_time:%B %d, %Y}. For any questions/feedback,
2021-01-04 06:59:06 -05:00
email me at <a href="mailto:hi@stavros.io">hi@stavros.io</a>.
</p>
2020-11-29 22:21:36 -05:00
"""
2020-11-20 13:22:00 -05:00
)
2020-11-20 15:10:56 -05:00
with (dir / "_index.md").open(mode="w") as outfile:
contents_list = "\n1. ".join(
f"[{title}](../../{url})" for title, url in contents
)
outfile.write(
f"""+++
title = "{folder_title}"
2020-11-20 16:49:31 -05:00
weight = {folder_counter}
2020-11-20 15:10:56 -05:00
sort_by = "weight"
insert_anchor_links = "right"
+++
## Contents
Click on a link in the list below to go to that page:
1. {contents_list}
"""
)
2020-11-20 13:22:00 -05:00
if __name__ == "__main__":
print("Exporting Joplin database...")
2020-11-20 14:29:02 -05:00
JoplinExporter().export()