From 22388bed8f557bfdc3db091297796ef867cebcd3 Mon Sep 17 00:00:00 2001 From: Stavros Korokithakis Date: Wed, 15 Dec 2021 00:27:35 +0200 Subject: [PATCH] Updates --- move_html_to_dir | 70 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/move_html_to_dir b/move_html_to_dir index 67ebc03..98eadbe 100755 --- a/move_html_to_dir +++ b/move_html_to_dir @@ -8,8 +8,69 @@ from pathlib import Path from typing import Dict -def replace_links(path: Path, replacements: Dict[str, str]): +def convert_relative_to_absolute(path: Path): + """ + Convert relative links in files to absolute ones. + + Sometimes files contain links relative to their location, e.g. + `public/test/foo.html` might contain a link to `bar/baz.html`, which makes it hard + for us to match `public/test/bar/baz.html` that way. This function converts that + into its full path (`public/test/bar/baz.html`) so we can match it later. + """ + + def replace_wrapper(filename: Path): + def replace_link(match: re.Match) -> str: + property, text, suffix = match.groups() + if "://" in text or text == "/" or text.startswith("#"): + # Not a valid filename, return it. + return f"{property}{text}{suffix}" + + if text.startswith("/"): + filepath = (path / text[1:]).resolve() + else: + filepath = (filename.parent / text).resolve() + + if not filepath.exists(): + # Not a valid filename, return it. + sys.exit(f"Possible broken link in {filename}: {text}") + + replacement_path = filepath.relative_to(path.absolute()) + return f"{property}/{replacement_path}{suffix}" + + return replace_link + for filename in path.glob("**/*.html"): + print(f"Converting relative links in {filename}...") + with filename.open("r+") as f: + contents = f.read() + f.truncate(0) + f.seek(0) + contents = re.sub( + r""" + ((?:href|src|root)\s*=\s*") # Various tags like href=" + ([^\"]+?) # Anything non-quote, non-greedily. + ( + (?:\#[^\"]*|) # Either an anchor or nothing. + ") # Ending quote. + """, + replace_wrapper(filename), + contents, + flags=re.VERBOSE, + ) + f.write(contents) + + +def replace_links(path: Path, replacements: Dict[str, str]): + """ + Convert links in files. + + `path` - The root of the repo. + `replacements` - A dictionary of replacement URLs in the form of + `{"dir/file.html": "dir/file/"}`. + """ + + for filename in path.glob("**/*.html"): + print(f"Converting links in {filename}...") with filename.open("r+") as f: contents = f.read() f.truncate(0) @@ -17,7 +78,7 @@ def replace_links(path: Path, replacements: Dict[str, str]): for source, target in replacements.items(): contents = contents.replace(source, target) - # Convert absolute links to relative. + # Convert relative links to absolute. contents = re.sub( r'((?:href|src|root)\s*=\s*")((?:\.\./)+)([^\.])', r"\1/\3", contents ) @@ -26,6 +87,8 @@ def replace_links(path: Path, replacements: Dict[str, str]): def main(path: Path): + convert_relative_to_absolute(path) + replacements: Dict[str, str] = {} for p in path.glob("**/*.html"): if str(p.parent) == ".": @@ -36,13 +99,14 @@ def main(path: Path): # Don't convert top-level files. continue - print(f"Converting {p}...") + print(f"Renaming {p}...") dir_path = p.parent / p.stem dir_path.mkdir(parents=True, exist_ok=True) new_path = dir_path / "index.html" p.rename(new_path) + # Construct the dictionary of replacements that have been done. replacements[str(p.relative_to(path))] = f"{new_path.parent.relative_to(path)}/" replace_links(path, replacements)