joplin-mdbooks-website/move_html_to_dir

117 lines
3.7 KiB
Plaintext
Raw Normal View History

2021-11-20 10:22:59 -05:00
#!/usr/bin/env python3
"""
Rename files like `foo/bar.html` to `foo/bar/index.html` for prettier URLs.
"""
import re
import sys
from pathlib import Path
from typing import Dict
2021-12-14 17:27:35 -05:00
def convert_relative_to_absolute(path: Path):
"""
Convert relative links in files to absolute ones.
Sometimes files contain links relative to their location, e.g.
`public/test/foo.html` might contain a link to `bar/baz.html`, which makes it hard
for us to match `public/test/bar/baz.html` that way. This function converts that
into its full path (`public/test/bar/baz.html`) so we can match it later.
"""
def replace_wrapper(filename: Path):
def replace_link(match: re.Match) -> str:
property, text, suffix = match.groups()
if "://" in text or text == "/" or text.startswith("#"):
# Not a valid filename, return it.
return f"{property}{text}{suffix}"
if text.startswith("/"):
filepath = (path / text[1:]).resolve()
else:
filepath = (filename.parent / text).resolve()
if not filepath.exists():
# Not a valid filename, return it.
sys.exit(f"Possible broken link in {filename}: {text}")
replacement_path = filepath.relative_to(path.absolute())
return f"{property}/{replacement_path}{suffix}"
return replace_link
for filename in path.glob("**/*.html"):
print(f"Converting relative links in {filename}...")
with filename.open("r+") as f:
contents = f.read()
f.truncate(0)
f.seek(0)
contents = re.sub(
r"""
((?:href|src|root)\s*=\s*") # Various tags like href="
([^\"]+?) # Anything non-quote, non-greedily.
(
(?:\#[^\"]*|) # Either an anchor or nothing.
") # Ending quote.
""",
replace_wrapper(filename),
contents,
flags=re.VERBOSE,
)
f.write(contents)
2021-11-20 10:22:59 -05:00
def replace_links(path: Path, replacements: Dict[str, str]):
2021-12-14 17:27:35 -05:00
"""
Convert links in files.
`path` - The root of the repo.
`replacements` - A dictionary of replacement URLs in the form of
`{"dir/file.html": "dir/file/"}`.
"""
2021-11-20 10:22:59 -05:00
for filename in path.glob("**/*.html"):
2021-12-14 17:27:35 -05:00
print(f"Converting links in {filename}...")
2021-11-20 10:22:59 -05:00
with filename.open("r+") as f:
contents = f.read()
f.truncate(0)
f.seek(0)
for source, target in replacements.items():
contents = contents.replace(source, target)
2021-12-14 17:27:35 -05:00
# Convert relative links to absolute.
2021-11-20 10:22:59 -05:00
contents = re.sub(
2021-11-20 10:29:56 -05:00
r'((?:href|src|root)\s*=\s*")((?:\.\./)+)([^\.])', r"\1/\3", contents
2021-11-20 10:22:59 -05:00
)
f.write(contents)
def main(path: Path):
2021-12-14 17:27:35 -05:00
convert_relative_to_absolute(path)
2021-11-20 11:10:12 -05:00
replacements: Dict[str, str] = {}
2021-11-20 10:22:59 -05:00
for p in path.glob("**/*.html"):
if str(p.parent) == ".":
# Don't convert top-level files.
continue
if p.name == "index.html":
# Don't convert top-level files.
continue
2021-12-14 17:27:35 -05:00
print(f"Renaming {p}...")
2021-11-20 10:22:59 -05:00
dir_path = p.parent / p.stem
dir_path.mkdir(parents=True, exist_ok=True)
new_path = dir_path / "index.html"
p.rename(new_path)
2021-12-14 17:27:35 -05:00
# Construct the dictionary of replacements that have been done.
2021-11-20 10:54:59 -05:00
replacements[str(p.relative_to(path))] = f"{new_path.parent.relative_to(path)}/"
2021-11-20 10:22:59 -05:00
replace_links(path, replacements)
if __name__ == "__main__":
main(Path(sys.argv[1]))