diff options
| author | alex <alex@pdp7.net> | 2026-02-01 21:47:23 +0100 |
|---|---|---|
| committer | alexpdp7 <alex@corcoles.net> | 2026-02-01 22:03:42 +0100 |
| commit | c48ca0ca40ed9ac009d7f84ce213ef5c69536ca7 (patch) | |
| tree | 791845e06bb167794ec916fd6e2b04b6bba1234b /blog_v2/src | |
| parent | f94e0a1bc1aea34cb4eadd2305ddf724722cd9b9 (diff) | |
Migrate blog
Turns into a static site generator. All Gemini files with links have
been rewritten to be relative.
Closes: #731
Closes: #372
Closes: #28
Diffstat (limited to 'blog_v2/src')
| -rw-r--r-- | blog_v2/src/blog/.gitignore | 1 | ||||
| -rw-r--r-- | blog_v2/src/blog/__init__.py | 163 | ||||
| -rw-r--r-- | blog_v2/src/blog/gemtext.py | 348 | ||||
| -rw-r--r-- | blog_v2/src/blog/pretty.py | 5 |
4 files changed, 0 insertions, 517 deletions
diff --git a/blog_v2/src/blog/.gitignore b/blog_v2/src/blog/.gitignore deleted file mode 100644 index bee8a64b..00000000 --- a/blog_v2/src/blog/.gitignore +++ /dev/null @@ -1 +0,0 @@ -__pycache__ diff --git a/blog_v2/src/blog/__init__.py b/blog_v2/src/blog/__init__.py deleted file mode 100644 index 15d6e4f1..00000000 --- a/blog_v2/src/blog/__init__.py +++ /dev/null @@ -1,163 +0,0 @@ -import argparse -import datetime -import pathlib -import re -import shutil -import textwrap - -from feedgen import feed -import lxml - -from blog import gemtext - - -def migrate(from_: pathlib.Path, to: pathlib.Path): - to.mkdir(parents=True, exist_ok=False) - shutil.copytree(from_ / "content", to, dirs_exist_ok=True) - shutil.copytree(from_ / "static" / "about", to / "about", dirs_exist_ok=True) - - laspelis = to / "laspelis" - laspelis.mkdir() - - laspelis_index_lines = (from_ / "static" / "laspelis" / "index.gmi").read_text().splitlines() - laspelis_index_lines = ["".join(l.split("/", 1)) for l in laspelis_index_lines] - (to / "laspelis" / "index.gmi").write_text("\n".join(laspelis_index_lines) + "\n") - - for lp in (from_ / "static" / "laspelis").glob("*"): - if lp.name == "index.gmi": - continue - shutil.copy(lp / "mail", laspelis / f"{lp.name}.mail") - shutil.copy(lp / "index.gmi", laspelis / f"{lp.name}.gmi") - - (to / "about" / "index.gmi").replace(to / "about.gmi") - (to / "about").rmdir() - - # Make links relative - for g in to.glob("**/*.gmi"): - path_in_site = g.relative_to(to) - - lines = g.read_text().splitlines() - for i, l in enumerate(lines): - if not l.startswith("=> "): - continue - l = l.removeprefix("=> ") - url, *rest = l.split(maxsplit=1) - - ABSOLUTE_BLOG = "gemini://alex.corcoles.net/" - if url.startswith(ABSOLUTE_BLOG): - url = url.removeprefix(ABSOLUTE_BLOG) - url = url.removesuffix("/") - url = "../" * (len(path_in_site.parts) - 1) + url - gemini_link = " ".join((f"=> {url}", " ".join(rest))) - lines[i] = gemini_link - continue - - ABSOLUTE_LASPELIS = "/laspelis/" - if url.startswith(ABSOLUTE_LASPELIS): - url = "../" * (len(path_in_site.parts) - 1) + url[1:] - gemini_link = " ".join((f"=> {url}", " ".join(rest))) - lines[i] = gemini_link - continue - - reconstruct = "\n".join(lines) - if not reconstruct.endswith("\n"): - reconstruct += "\n" - g.write_text(reconstruct) - - # Build old URL slash removal redirect list - redirections = textwrap.dedent(""" - # This is a list of the pre-migration URLs of this blog. - # URLs in this list might had been linked with a trailing slash. - # Use Apache httpd's RewriteMap to preserve only old URLs - """).lstrip() - for g in to.glob("**/*.gmi"): - url = str(g.relative_to(to).with_suffix("")) - redirections += f"{url}/ {url}\n" - pathlib.Path(to / "redirections.txt").write_text(redirections) - - -def build(from_: pathlib.Path, to: pathlib.Path): - TITLE = "El blog es mío" - SUBTITLE = "Hay otros como él, pero este es el mío" - - to.mkdir(parents=True, exist_ok=False) - shutil.copytree(from_, to, dirs_exist_ok=True) - - dated_entries = [b for b in to.glob("**/*.gmi") if re.match(r"\d{4}-\d{2}-\d{2}", b.read_text().splitlines()[1])] - dated_entries.sort() - dated_entries.reverse() - - # Generate index.gmi - index = textwrap.dedent(f""" - # {TITLE} - - {SUBTITLE} - - Envíame email a alex arroba corcoles punto net. - - """).lstrip() - for e in dated_entries: - title, date, *_ = e.read_text().splitlines() - title = title.removeprefix("# ") - index += f"=> {'/'.join(e.parts[-3:]).removesuffix('.gmi')} {date} {title}\n" - - (to / "index.gmi").write_text(index) - - - # Convert to HTML - for gmi in to.glob("**/*.gmi"): - html = gmi.with_suffix(".html") - title = None - if gmi.relative_to(to).parts[0] == "laspelis": - title = "laspelis" - if gmi.name != "index.gmi": - subject = gmi.read_text().splitlines()[3] - assert subject.startswith("Subject: "), subject - title = subject.removeprefix("Subject: ") - html.write_text(gemtext.convert(gmi.read_text(), title, ("feed/", TITLE) if gmi.relative_to(to) == pathlib.Path("index.gmi") else None)) - - # Generate RSS - fg = feed.FeedGenerator() - fg.title(TITLE) - fg.subtitle(SUBTITLE) - fg.link(href="https://alex.corcoles.net", rel="self") - - for e in reversed(dated_entries[0:10]): - title, date, *_ = e.read_text().splitlines() - title = title.removeprefix("# ") - path = "/".join(e.parts[2:]).removesuffix('.gmi') - - fe = fg.add_entry() - url = f"https://alex.corcoles.net/{path}" - fe.link(href=url) - fe.published( - datetime.datetime.combine( - datetime.date.fromisoformat(date), - datetime.datetime.min.time(), - tzinfo=datetime.UTC, - ) - ) - fe.title(title) - fe.content(b"\n".join(list(map(lxml.html.tostring, lxml.html.document_fromstring(e.with_suffix(".html").read_bytes()).body[2:]))), type="html") - feed_dir = to / "feed" - feed_dir.mkdir() - (feed_dir / "index.rss").write_bytes(fg.rss_str(pretty=True)) - - -def main() -> None: - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(required=True) - - subparser = subparsers.add_parser("migrate") - subparser.add_argument("from_", type=pathlib.Path) - subparser.add_argument("to", type=pathlib.Path) - subparser.set_defaults(command=migrate) - - subparser = subparsers.add_parser("build") - subparser.add_argument("from_", type=pathlib.Path) - subparser.add_argument("to", type=pathlib.Path) - subparser.set_defaults(command=build) - - args = vars(parser.parse_args()) - command = args.pop("command") - command(**args) diff --git a/blog_v2/src/blog/gemtext.py b/blog_v2/src/blog/gemtext.py deleted file mode 100644 index 3fe293a9..00000000 --- a/blog_v2/src/blog/gemtext.py +++ /dev/null @@ -1,348 +0,0 @@ -import dataclasses -import re -import textwrap - -import htmlgenerator as h - -from blog import pretty - - -def parse(s): - r""" - >>> parse('''# Header 1 - ... - ... ## Header 2 - ... - ... ### Header 3 - ... - ... * List 1 - ... * List 2 - ... - ... > First line quote. - ... > Second line of quote. - ... - ... ``` - ... Fenced - ... Lines - ... ``` - ... - ... Paragraph. - ... - ... Another paragraph. - ... ''') - [Header(level=1, text='Header 1'), - Line(text=''), - Header(level=2, text='Header 2'), - Line(text=''), - Header(level=3, text='Header 3'), - Line(text=''), - List(items=[ListItem(text='List 1'), - ListItem(text='List 2')]), - Line(text=''), - BlockQuote(lines=[BlockQuoteLine(text='First line quote.'), - BlockQuoteLine(text='Second line of quote.')]), - Line(text=''), - Pre(content='Fenced\nLines\n'), - Line(text=''), - Line(text='Paragraph.'), - Line(text=''), - Line(text='Another paragraph.')] - """ - lines = s.splitlines() - - i = 0 - gem = [] - - while i < len(lines): - line = parse_line(lines[i]) - - if isinstance(line, Link): - gem.append(line) - i = i + 1 - continue - - if isinstance(line, Header): - gem.append(line) - i = i + 1 - continue - - if isinstance(line, ListItem): - items = [] - while i < len(lines) and isinstance(parse_line(lines[i]), ListItem): - items.append(parse_line(lines[i])) - i = i + 1 - gem.append(List(items)) - continue - - if isinstance(line, BlockQuoteLine): - quotes = [] - while i < len(lines) and isinstance(parse_line(lines[i]), BlockQuoteLine): - quotes.append(parse_line(lines[i])) - i = i + 1 - gem.append(BlockQuote(quotes)) - continue - - if isinstance(line, PreFence): - content = "" - i = i + 1 - while i < len(lines) and not isinstance(parse_line(lines[i]), PreFence): - content += lines[i] - content += "\n" - i = i + 1 - gem.append(Pre(content)) - i = i + 1 - continue - - gem.append(line) - i = i + 1 - - return gem - - -def parse_line(l): - if Link.is_link(l): - return Link(l) - if Header.is_header(l): - return Header(l) - if ListItem.is_list_item(l): - return ListItem(l) - if BlockQuoteLine.is_block_quote_line(l): - return BlockQuoteLine(l) - if PreFence.is_pre_fence(l): - return PreFence() - return Line(l) - - -@dataclasses.dataclass -class Link: - """ - >>> Link("=> http://example.com") - Link(url='http://example.com', text=None) - - >>> Link("=> http://example.com Example text") - Link(url='http://example.com', text='Example text') - """ - - url: str - text: str | None - - def __init__(self, line: str): - assert Link.is_link(line) - parts = line.split(None, 2) - self.url = parts[1] - self.text = parts[2] if len(parts) > 2 else None # noqa: PLR2004, > 2 "magic constant" - - @staticmethod - def is_link(line: str): - return line.startswith("=>") - - -@dataclasses.dataclass -class Header: - """ - >>> Header("# Level one") - Header(level=1, text='Level one') - - >>> Header("## Level two") - Header(level=2, text='Level two') - - >>> Header("### Level three") - Header(level=3, text='Level three') - """ - - level: int - text: str - - def __init__(self, line: str): - assert Header.is_header(line) - hashes, self.text = line.split(None, 1) - self.level = len(hashes) - - @staticmethod - def is_header(line: str): - return re.match("#{1,3} .*", line) - - -@dataclasses.dataclass -class ListItem: - """ - >>> ListItem("* foo") - ListItem(text='foo') - """ - - text: str - - def __init__(self, line: str): - assert ListItem.is_list_item(line) - self.text = line[2:] - - @staticmethod - def is_list_item(line: str): - return line.startswith("* ") - - -@dataclasses.dataclass -class BlockQuoteLine: - """ - >>> BlockQuoteLine("> foo") - BlockQuoteLine(text='foo') - - >>> BlockQuoteLine(">foo") - BlockQuoteLine(text='foo') - """ - - text: str - - def __init__(self, line: str): - assert BlockQuoteLine.is_block_quote_line(line) - self.text = line[2:] if line.startswith("> ") else line[1:] - - @staticmethod - def is_block_quote_line(line: str): - return line.startswith(">") - - -class PreFence: - @staticmethod - def is_pre_fence(line: str): - return line == "```" - - -@dataclasses.dataclass -class Line: - text: str - - -@dataclasses.dataclass -class List: - items: list[ListItem] - - -@dataclasses.dataclass -class BlockQuote: - lines: list[BlockQuoteLine] - - -@dataclasses.dataclass -class Pre: - content: str - - -def convert(gemtext, title=None, feed=None): - content = parse(gemtext) - if not title: - title = content[0] - assert isinstance(title, Header), title - assert title.level == 1 - title = title.text - - if feed: - href, title = feed - feed = [ - h.LINK( - rel="alternate", - type="application/rss+xml", - title=title, - href=href, - ), - ] - else: - feed = [] - return pretty.pretty_html( - h.render( - h.HTML( - h.HEAD( - h.TITLE(title), - h.STYLE( - textwrap.dedent(""" - :root { - color-scheme: light dark; - } - body { - max-width: 40em; - margin-left: auto; - margin-right: auto; - padding-left: 2em; - padding-right: 2em; - } - p, blockquote, li { - /* from Mozilla reader mode */ - line-height: 1.6em; - font-size: 20px; - } - """).lstrip() - ), - *feed, - ), - h.BODY( - *gemini_to_html(content), - ), - doctype="html", - ), - {}, - ) - ) - - -def gemini_to_html(parsed): # noqa: C901, PLR0912, PLR0915 - i = 0 - result = [] - while i < len(parsed): - gem_element = parsed[i] - - if isinstance(gem_element, Header): - header = [h.H1, h.H2, h.H3, h.H4, h.H5, h.H6][gem_element.level - 1] - result.append(header(gem_element.text)) - i = i + 1 - continue - - if isinstance(gem_element, List): - result.append(h.UL(*[h.LI(i.text) for i in gem_element.items])) - i = i + 1 - continue - - if isinstance(gem_element, Link): - url = gem_element.url - if url.startswith("gemini://"): - url = url.replace("gemini://", "https://portal.mozz.us/gemini/") - - result.append(h.P(h.A(gem_element.text or gem_element.url, href=url))) - i = i + 1 - continue - - if gem_element == Line(""): - i = i + 1 - continue - - if isinstance(gem_element, BlockQuote): - content = [] - for line in gem_element.lines: - if line.text: - content.append(line.text) - content.append(h.BR()) - result.append(h.BLOCKQUOTE(*content)) - i = i + 1 - continue - - if isinstance(gem_element, Line): - paragraph = [gem_element.text] - i = i + 1 - while i < len(parsed): - gem_element = parsed[i] - if isinstance(gem_element, Line) and gem_element.text != "": - paragraph.append(h.BR()) - paragraph.append(gem_element.text) - i = i + 1 - else: - break - result.append(h.P(*paragraph)) - continue - - if isinstance(gem_element, Pre): - result.append(h.PRE(gem_element.content)) - i = i + 1 - continue - - assert False, f"unknown element {gem_element}" - - return result diff --git a/blog_v2/src/blog/pretty.py b/blog_v2/src/blog/pretty.py deleted file mode 100644 index 2ae916a7..00000000 --- a/blog_v2/src/blog/pretty.py +++ /dev/null @@ -1,5 +0,0 @@ -from lxml import etree, html - - -def pretty_html(s): - return etree.tostring(html.fromstring(s), pretty_print=True).decode("utf8") |
