joplin-mdbooks-website/joplinexport.py

#!/usr/bin/env python3
import re
import sqlite3
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from shutil import copy
from shutil import rmtree
from typing import Optional


def contains_word(word: str, text: str) -> bool:
    """
    Check whether `text` contains `word`, as a whole word.

    Case insensitive.
    """
    return re.search(f"\\b{word}\\b".lower(), text.lower()) is not None


def slugify(text):
    """Convert `text` into a slug."""
    return re.sub(r"[\W_]+", "-", text.lower()).strip("-")


class Note:
    """A helper type for a note."""

    def __init__(self, id, parent_id, parent_title, title, body, updated_time, tags=[]):
        self.id = id
        self.parent_id = parent_id
        self.parent_title = parent_title
        self.title = title
        self.body = body
        self.updated_time = datetime.fromtimestamp(updated_time)
        self.tags = tags

    def get_url(self):
        """Return the note's relative URL."""
        return slugify(self.parent_title) + "/" + slugify(self.title)


class JoplinExporter:
    """The main exporter class."""

    content_dir = Path("content")
    static_dir = Path("static/resources")
    joplin_dir = Path.home() / ".config/joplin-desktop"

    def clean_content_dir(self):
        """Reset the content directory to a known state to begin."""
        rmtree(self.content_dir)
        rmtree(self.static_dir)
        self.content_dir.mkdir()
        self.static_dir.mkdir()
        with open(self.content_dir / "_index.md", mode="w") as outfile:
            outfile.write('+++\nredirect_to = "welcome/stavros-notes/"\n+++')

    def resolve_note_links(self, note: Note) -> str:
        """Resolve the links between notes and replace them in the body."""

        def replacement(match):
            item_id = match.group(1)
            new_url = self.get_note_url_by_id(item_id)
            if not new_url:
                new_url = self.get_resource_url_by_id(item_id)
                if not new_url:
                    new_url = item_id
            return f"](../../{new_url})"

        return re.sub(r"\]\(:/([a-f0-9]{32})\)", replacement, note.body)

    def get_note_url_by_id(self, note_id: str) -> Optional[str]:
        """Return a note's relative URL by its ID."""
        note = self.note_lookup_dict.get(note_id)
        if not note:
            return None
        return note.get_url()

    def get_resource_url_by_id(self, resource_id: str) -> Optional[str]:
        """Return a resource's relative URL by its ID."""
        resource = self.resources.get(resource_id)
        if not resource:
            return None
        return "resources/" + resource_id + "." + resource[1]

    def copy_resources(self):
        """Copy all the resources to the output directory."""
        for resource_id, resource in self.resources.items():
            title, extension = resource
            copy(
                self.joplin_dir / "resources" / (f"{resource_id}.{extension}"),
                self.static_dir,
            )

    def read_data(self):
        """Read the data from the Joplin database."""
        conn = sqlite3.connect(self.joplin_dir / "database.sqlite")
        c = conn.cursor()

        c.execute("""SELECT id, title FROM folders;""")
        self.folders = {id: title for id, title in c.fetchall()}

        # Get the tags by ID.
        c.execute("""SELECT id, title FROM tags;""")
        tags = {id: title for id, title in c.fetchall()}
        # Get the tag IDs for each note ID.
        c.execute("""SELECT note_id, tag_id FROM note_tags;""")
        note_tags = defaultdict(list)
        for note_id, tag_id in c.fetchall():
            note_tags[note_id].append(tags[tag_id])

        c.execute("""SELECT id, title, file_extension FROM resources;""")
        self.resources = {id: (title, ext) for id, title, ext in c.fetchall()}

        c.execute("""SELECT id, parent_id, title, body, updated_time FROM notes;""")
        self.notes = defaultdict(list)
        self.note_lookup_dict = {}
        for id, parent_id, title, body, updated_time in c.fetchall():
            note = Note(
                id,
                parent_id,
                self.folders[parent_id],
                title,
                body,
                updated_time / 1000,
                tags=note_tags[id],
            )
            self.notes[note.parent_id].append(note)
            self.note_lookup_dict[note.id] = note

        conn.close()

    def export(self):
        """Export all the notes to a static site."""
        self.read_data()

        # Private notes shouldn't be published.
        folder_list = list(
            i for i in self.folders.items() if not contains_word("private", i[1])
        )

        # Sort "Welcome" last.
        folder_list.sort(key=lambda x: x[1].lower().strip() if x[1] != "Welcome" else "0")

        self.clean_content_dir()
        self.copy_resources()

        for folder_counter, folder in enumerate(folder_list, start=1):
            folder_id, folder_title = folder
            dir = self.content_dir / slugify(folder_title)
            dir.mkdir(parents=True)
            contents = []
            note_counter = 0
            for note in sorted(self.notes[folder_id], key=lambda n: n.title):
                if (
                    contains_word("private", note.title)
                    or contains_word("wip", note.title)
                    or "wip" in note.tags
                    or "private" in note.tags
                ):
                    print(
                        f"Note is unpublished, skipping: {folder_title} - {note.title}."
                    )
                    continue

                print(f"Exporting {folder_title} - {note.title}...")
                note_counter += 1
                contents.append((note.title, note.get_url()))
                with (self.content_dir / (note.get_url() + ".md")).open(
                    mode="w"
                ) as outfile:
                    outfile.write(
                        f"""+++
title = "{note.title}"
weight = {note_counter}
sort_by = "weight"
insert_anchor_links = "right"
+++
{self.resolve_note_links(note)}

* * *

<p style="font-size:80%; font-style: italic">
Last updated on {note.updated_time:%B %d, %Y}. For any questions/feedback,
email me at <a href="mailto:hi@stavros.io">hi@stavros.io</a>.
</p>
"""
                    )

            with (dir / "_index.md").open(mode="w") as outfile:
                contents_list = "\n1. ".join(
                    f"[{title}](../../{url})" for title, url in contents
                )
                outfile.write(
                    f"""+++
title = "{folder_title}"
weight = {folder_counter}
sort_by = "weight"
insert_anchor_links = "right"
+++
## Contents

Click on a link in the list below to go to that page:

1. {contents_list}
"""
                )


if __name__ == "__main__":
    print("Exporting Joplin database...")
    JoplinExporter().export()
Initial commit 2020-11-20 13:22:00 -05:00			`#!/usr/bin/env python3`
			`import re`
			`import sqlite3`
			`from collections import defaultdict`
Style updated date 2020-11-30 07:49:56 -05:00			`from datetime import datetime`
Initial commit 2020-11-20 13:22:00 -05:00			`from pathlib import Path`
Updates 2020-11-20 14:29:02 -05:00			`from shutil import copy`
Initial commit 2020-11-20 13:22:00 -05:00			`from shutil import rmtree`
			`from typing import Optional`


Allow for private notes 2021-01-07 12:31:18 -05:00			`def contains_word(word: str, text: str) -> bool:`
			`"""`
			Check whether `text` contains `word`, as a whole word.

			`Case insensitive.`
			`"""`
			`return re.search(f"\\b{word}\\b".lower(), text.lower()) is not None`


Initial commit 2020-11-20 13:22:00 -05:00			`def slugify(text):`
Update docstrings 2021-01-07 16:17:37 -05:00			"""Convert `text` into a slug."""
Initial commit 2020-11-20 13:22:00 -05:00			`return re.sub(r"[\W_]+", "-", text.lower()).strip("-")`


			`class Note:`
Update docstrings 2021-01-07 16:17:37 -05:00			`"""A helper type for a note."""`

			`def __init__(self, id, parent_id, parent_title, title, body, updated_time, tags=[]):`
Initial commit 2020-11-20 13:22:00 -05:00			`self.id = id`
			`self.parent_id = parent_id`
			`self.parent_title = parent_title`
			`self.title = title`
			`self.body = body`
Style updated date 2020-11-30 07:49:56 -05:00			`self.updated_time = datetime.fromtimestamp(updated_time)`
Update docstrings 2021-01-07 16:17:37 -05:00			`self.tags = tags`
Initial commit 2020-11-20 13:22:00 -05:00
			`def get_url(self):`
Update docstrings 2021-01-07 16:17:37 -05:00			`"""Return the note's relative URL."""`
Initial commit 2020-11-20 13:22:00 -05:00			`return slugify(self.parent_title) + "/" + slugify(self.title)`


			`class JoplinExporter:`
Update docstrings 2021-01-07 16:17:37 -05:00			`"""The main exporter class."""`

Initial commit 2020-11-20 13:22:00 -05:00			`content_dir = Path("content")`
Update 2020-11-25 15:27:20 -05:00			`static_dir = Path("static/resources")`
Updates 2020-11-20 14:29:02 -05:00			`joplin_dir = Path.home() / ".config/joplin-desktop"`
Initial commit 2020-11-20 13:22:00 -05:00
			`def clean_content_dir(self):`
			`"""Reset the content directory to a known state to begin."""`
			`rmtree(self.content_dir)`
Updates 2020-11-20 14:29:02 -05:00			`rmtree(self.static_dir)`
Initial commit 2020-11-20 13:22:00 -05:00			`self.content_dir.mkdir()`
Updates 2020-11-20 14:29:02 -05:00			`self.static_dir.mkdir()`
Initial commit 2020-11-20 13:22:00 -05:00			`with open(self.content_dir / "_index.md", mode="w") as outfile:`
Fix redirect again 2020-11-20 14:54:18 -05:00			`outfile.write('+++\nredirect_to = "welcome/stavros-notes/"\n+++')`
Initial commit 2020-11-20 13:22:00 -05:00
			`def resolve_note_links(self, note: Note) -> str:`
Update docstrings 2021-01-07 16:17:37 -05:00			`"""Resolve the links between notes and replace them in the body."""`

Initial commit 2020-11-20 13:22:00 -05:00			`def replacement(match):`
Updates 2020-11-20 14:29:02 -05:00			`item_id = match.group(1)`
			`new_url = self.get_note_url_by_id(item_id)`
			`if not new_url:`
			`new_url = self.get_resource_url_by_id(item_id)`
			`if not new_url:`
			`new_url = item_id`
			`return f"](../../{new_url})"`
Initial commit 2020-11-20 13:22:00 -05:00
			`return re.sub(r"\]\(:/([a-f0-9]{32})\)", replacement, note.body)`

			`def get_note_url_by_id(self, note_id: str) -> Optional[str]:`
			`"""Return a note's relative URL by its ID."""`
			`note = self.note_lookup_dict.get(note_id)`
			`if not note:`
			`return None`
			`return note.get_url()`

Updates 2020-11-20 14:29:02 -05:00			`def get_resource_url_by_id(self, resource_id: str) -> Optional[str]:`
			`"""Return a resource's relative URL by its ID."""`
			`resource = self.resources.get(resource_id)`
			`if not resource:`
			`return None`
Update 2020-11-25 15:27:20 -05:00			`return "resources/" + resource_id + "." + resource[1]`
Updates 2020-11-20 14:29:02 -05:00
			`def copy_resources(self):`
			`"""Copy all the resources to the output directory."""`
			`for resource_id, resource in self.resources.items():`
			`title, extension = resource`
			`copy(`
Appease flake8 2020-11-20 14:32:13 -05:00			`self.joplin_dir / "resources" / (f"{resource_id}.{extension}"),`
Updates 2020-11-20 14:29:02 -05:00			`self.static_dir,`
			`)`

Initial commit 2020-11-20 13:22:00 -05:00			`def read_data(self):`
Updates 2020-11-20 14:29:02 -05:00			`"""Read the data from the Joplin database."""`
			`conn = sqlite3.connect(self.joplin_dir / "database.sqlite")`
Initial commit 2020-11-20 13:22:00 -05:00			`c = conn.cursor()`

			`c.execute("""SELECT id, title FROM folders;""")`
			`self.folders = {id: title for id, title in c.fetchall()}`

Update docstrings 2021-01-07 16:17:37 -05:00			`# Get the tags by ID.`
			`c.execute("""SELECT id, title FROM tags;""")`
			`tags = {id: title for id, title in c.fetchall()}`
			`# Get the tag IDs for each note ID.`
			`c.execute("""SELECT note_id, tag_id FROM note_tags;""")`
			`note_tags = defaultdict(list)`
			`for note_id, tag_id in c.fetchall():`
			`note_tags[note_id].append(tags[tag_id])`

Updates 2020-11-20 14:29:02 -05:00			`c.execute("""SELECT id, title, file_extension FROM resources;""")`
Appease flake8 2020-11-20 14:32:13 -05:00			`self.resources = {id: (title, ext) for id, title, ext in c.fetchall()}`
Initial commit 2020-11-20 13:22:00 -05:00
Update 2020-11-29 22:21:36 -05:00			`c.execute("""SELECT id, parent_id, title, body, updated_time FROM notes;""")`
Initial commit 2020-11-20 13:22:00 -05:00			`self.notes = defaultdict(list)`
			`self.note_lookup_dict = {}`
Update 2020-11-29 22:21:36 -05:00			`for id, parent_id, title, body, updated_time in c.fetchall():`
			`note = Note(`
Update docstrings 2021-01-07 16:17:37 -05:00			`id,`
			`parent_id,`
			`self.folders[parent_id],`
			`title,`
			`body,`
			`updated_time / 1000,`
			`tags=note_tags[id],`
Update 2020-11-29 22:21:36 -05:00			`)`
Initial commit 2020-11-20 13:22:00 -05:00			`self.notes[note.parent_id].append(note)`
			`self.note_lookup_dict[note.id] = note`

			`conn.close()`

			`def export(self):`
Update docstrings 2021-01-07 16:17:37 -05:00			`"""Export all the notes to a static site."""`
Updates 2020-11-20 14:29:02 -05:00			`self.read_data()`

Don't export private notes 2020-11-20 15:15:45 -05:00			`# Private notes shouldn't be published.`
			`folder_list = list(`
Allow for private notes 2021-01-07 12:31:18 -05:00			`i for i in self.folders.items() if not contains_word("private", i[1])`
Don't export private notes 2020-11-20 15:15:45 -05:00			`)`

Initial commit 2020-11-20 13:22:00 -05:00			`# Sort "Welcome" last.`
Update 2020-11-29 22:21:36 -05:00			`folder_list.sort(key=lambda x: x[1].lower().strip() if x[1] != "Welcome" else "0")`
Initial commit 2020-11-20 13:22:00 -05:00
			`self.clean_content_dir()`
Updates 2020-11-20 14:29:02 -05:00			`self.copy_resources()`
Initial commit 2020-11-20 13:22:00 -05:00
Fix script bug 2020-11-20 16:49:31 -05:00			`for folder_counter, folder in enumerate(folder_list, start=1):`
Initial commit 2020-11-20 13:22:00 -05:00			`folder_id, folder_title = folder`
Updates 2020-11-20 14:29:02 -05:00			`dir = self.content_dir / slugify(folder_title)`
Initial commit 2020-11-20 13:22:00 -05:00			`dir.mkdir(parents=True)`
Add content list 2020-11-20 15:10:56 -05:00			`contents = []`
Add "WIP" to the exclusion list 2021-01-07 15:51:04 -05:00			`note_counter = 0`
			`for note in sorted(self.notes[folder_id], key=lambda n: n.title):`
Update docstrings 2021-01-07 16:17:37 -05:00			`if (`
			`contains_word("private", note.title)`
			`or contains_word("wip", note.title)`
			`or "wip" in note.tags`
			`or "private" in note.tags`
Add "WIP" to the exclusion list 2021-01-07 15:51:04 -05:00			`):`
			`print(`
			`f"Note is unpublished, skipping: {folder_title} - {note.title}."`
			`)`
Allow for private notes 2021-01-07 12:31:18 -05:00			`continue`
Add "WIP" to the exclusion list 2021-01-07 15:51:04 -05:00
Initial commit 2020-11-20 13:22:00 -05:00			`print(f"Exporting {folder_title} - {note.title}...")`
Add "WIP" to the exclusion list 2021-01-07 15:51:04 -05:00			`note_counter += 1`
Add content list 2020-11-20 15:10:56 -05:00			`contents.append((note.title, note.get_url()))`
Updates 2020-11-20 14:29:02 -05:00			`with (self.content_dir / (note.get_url() + ".md")).open(`
			`mode="w"`
			`) as outfile:`
Initial commit 2020-11-20 13:22:00 -05:00			`outfile.write(`
			`f"""+++`
			`title = "{note.title}"`
Fix script bug 2020-11-20 16:49:31 -05:00			`weight = {note_counter}`
Initial commit 2020-11-20 13:22:00 -05:00			`sort_by = "weight"`
			`insert_anchor_links = "right"`
			`+++`
Update 2020-11-29 22:21:36 -05:00			`{self.resolve_note_links(note)}`

			`* * *`

Updates 2021-01-04 06:59:06 -05:00			`<p style="font-size:80%; font-style: italic">`
Updates 2021-01-16 15:35:08 -05:00			`Last updated on {note.updated_time:%B %d, %Y}. For any questions/feedback,`
Updates 2021-01-04 06:59:06 -05:00			`email me at <a href="mailto:hi@stavros.io">hi@stavros.io</a>.`
			`</p>`
Update 2020-11-29 22:21:36 -05:00			`"""`
Initial commit 2020-11-20 13:22:00 -05:00			`)`

Add content list 2020-11-20 15:10:56 -05:00			`with (dir / "_index.md").open(mode="w") as outfile:`
			`contents_list = "\n1. ".join(`
			`f"[{title}](../../{url})" for title, url in contents`
			`)`
			`outfile.write(`
			`f"""+++`
			`title = "{folder_title}"`
Fix script bug 2020-11-20 16:49:31 -05:00			`weight = {folder_counter}`
Add content list 2020-11-20 15:10:56 -05:00			`sort_by = "weight"`
			`insert_anchor_links = "right"`
			`+++`
			`## Contents`

			`Click on a link in the list below to go to that page:`

			`1. {contents_list}`
			`"""`
			`)`

Initial commit 2020-11-20 13:22:00 -05:00
			`if __name__ == "__main__":`
			`print("Exporting Joplin database...")`
Updates 2020-11-20 14:29:02 -05:00			`JoplinExporter().export()`