#!/usr/bin/env python3 """ Rename files like `foo/bar.html` to `foo/bar/index.html` for prettier URLs. """ import re import sys from pathlib import Path from typing import Dict def convert_relative_to_absolute(path: Path): """ Convert relative links in files to absolute ones. Sometimes files contain links relative to their location, e.g. `public/test/foo.html` might contain a link to `bar/baz.html`, which makes it hard for us to match `public/test/bar/baz.html` that way. This function converts that into its full path (`public/test/bar/baz.html`) so we can match it later. """ def replace_wrapper(filename: Path): def replace_link(match: re.Match) -> str: property, text, suffix = match.groups() if "://" in text or text == "/" or text.startswith("#"): # Not a valid filename, return it. return f"{property}{text}{suffix}" if text.startswith("/"): filepath = (path / text[1:]).resolve() else: filepath = (filename.parent / text).resolve() if not filepath.exists(): # Not a valid filename, return it. sys.exit(f"Possible broken link in {filename}: {text}") replacement_path = filepath.relative_to(path.absolute()) return f"{property}/{replacement_path}{suffix}" return replace_link for filename in path.glob("**/*.html"): print(f"Converting relative links in {filename}...") with filename.open("r+") as f: contents = f.read() f.truncate(0) f.seek(0) contents = re.sub( r""" ((?:href|src|root)\s*=\s*") # Various tags like href=" ([^\"]+?) # Anything non-quote, non-greedily. ( (?:\#[^\"]*|) # Either an anchor or nothing. ") # Ending quote. """, replace_wrapper(filename), contents, flags=re.VERBOSE, ) f.write(contents) def replace_links(path: Path, replacements: Dict[str, str]): """ Convert links in files. `path` - The root of the repo. `replacements` - A dictionary of replacement URLs in the form of `{"dir/file.html": "dir/file/"}`. """ for filename in path.glob("**/*.html"): print(f"Converting links in {filename}...") with filename.open("r+") as f: contents = f.read() f.truncate(0) f.seek(0) for source, target in replacements.items(): contents = contents.replace(source, target) # Convert relative links to absolute. contents = re.sub( r'((?:href|src|root)\s*=\s*")((?:\.\./)+)([^\.])', r"\1/\3", contents ) f.write(contents) def main(path: Path): convert_relative_to_absolute(path) replacements: Dict[str, str] = {} for p in path.glob("**/*.html"): if str(p.parent) == ".": # Don't convert top-level files. continue if p.name == "index.html": # Don't convert top-level files. continue print(f"Renaming {p}...") dir_path = p.parent / p.stem dir_path.mkdir(parents=True, exist_ok=True) new_path = dir_path / "index.html" p.rename(new_path) # Construct the dictionary of replacements that have been done. replacements[str(p.relative_to(path))] = f"{new_path.parent.relative_to(path)}/" replace_links(path, replacements) if __name__ == "__main__": main(Path(sys.argv[1]))