From 504c0930bed2a274c4c468614555d378fb859a22 Mon Sep 17 00:00:00 2001 From: alex Date: Sat, 24 Jan 2026 14:16:50 +0100 Subject: More porting to new system --- blog_v2/src/blog/__init__.py | 127 +++++++++++++++- blog_v2/src/blog/gemtext.py | 336 +++++++++++++++++++++++++++++++++++++++++++ blog_v2/src/blog/pretty.py | 5 + 3 files changed, 466 insertions(+), 2 deletions(-) create mode 100644 blog_v2/src/blog/gemtext.py create mode 100644 blog_v2/src/blog/pretty.py (limited to 'blog_v2/src') diff --git a/blog_v2/src/blog/__init__.py b/blog_v2/src/blog/__init__.py index 03f48996..cdc15a8d 100644 --- a/blog_v2/src/blog/__init__.py +++ b/blog_v2/src/blog/__init__.py @@ -1,21 +1,139 @@ import argparse +import datetime import pathlib +import re import shutil +import textwrap + +from feedgen import feed +import lxml + +from blog import gemtext def migrate(from_: pathlib.Path, to: pathlib.Path): to.mkdir(parents=True, exist_ok=False) shutil.copytree(from_ / "content", to, dirs_exist_ok=True) shutil.copytree(from_ / "static" / "about", to / "about", dirs_exist_ok=True) + laspelis = to / "laspelis" laspelis.mkdir() + + laspelis_index_lines = (from_ / "static" / "laspelis" / "index.gmi").read_text().splitlines() + laspelis_index_lines = ["".join(l.split("/", 1)) for l in laspelis_index_lines] + (to / "laspelis" / "index.gmi").write_text("\n".join(laspelis_index_lines) + "\n") + for lp in (from_ / "static" / "laspelis").glob("*"): - if not lp.is_dir(): - print("skipping", lp) + if lp.name == "index.gmi": continue shutil.copy(lp / "mail", laspelis / f"{lp.name}.mail") shutil.copy(lp / "index.gmi", laspelis / f"{lp.name}.gmi") + (to / "about" / "index.gmi").replace(to / "about.gmi") + (to / "about").rmdir() + + # Make links relative + for g in to.glob("**/*.gmi"): + path_in_site = g.relative_to(to) + + lines = g.read_text().splitlines() + for i, l in enumerate(lines): + if not l.startswith("=> "): + continue + l = l.removeprefix("=> ") + url, *rest = l.split(maxsplit=1) + + ABSOLUTE_BLOG = "gemini://alex.corcoles.net/" + if url.startswith(ABSOLUTE_BLOG): + url = url.removeprefix(ABSOLUTE_BLOG) + url = url.removesuffix("/") + url = "../" * (len(path_in_site.parts) - 1) + url + gemini_link = " ".join((f"=> {url}", " ".join(rest))) + lines[i] = gemini_link + continue + + ABSOLUTE_LASPELIS = "/laspelis/" + if url.startswith(ABSOLUTE_LASPELIS): + url = "../" * (len(path_in_site.parts) - 1) + url[1:] + gemini_link = " ".join((f"=> {url}", " ".join(rest))) + lines[i] = gemini_link + continue + + reconstruct = "\n".join(lines) + if not reconstruct.endswith("\n"): + reconstruct += "\n" + g.write_text(reconstruct) + + +def build(from_: pathlib.Path, to: pathlib.Path): + TITLE = "El blog es mío" + SUBTITLE = "Hay otros como él, pero este es el mío" + + to.mkdir(parents=True, exist_ok=False) + shutil.copytree(from_, to, dirs_exist_ok=True) + + dated_entries = [b for b in to.glob("**/*.gmi") if re.match(r"\d{4}-\d{2}-\d{2}", b.read_text().splitlines()[1])] + dated_entries.sort() + dated_entries.reverse() + + # Generate index.gmi + index = textwrap.dedent(f""" + # {TITLE} + + {SUBTITLE} + + Envíame email a alex arroba corcoles punto net. + + """).lstrip() + for e in dated_entries: + title, date, *_ = e.read_text().splitlines() + title = title.removeprefix("# ") + index += f"=> {'/'.join(e.parts[2:]).removesuffix('.gmi')} {date} {title}\n" + + (to / "index.gmi").write_text(index) + + + # Convert to HTML + for gmi in to.glob("**/*.gmi"): + html = gmi.with_suffix(".html") + title = None + if gmi.relative_to(to).parts[0] == "laspelis": + title = "laspelis" + if gmi.name != "index.gmi": + subject = gmi.read_text().splitlines()[3] + assert subject.startswith("Subject: "), subject + title = subject.removeprefix("Subject: ") + html.write_text(gemtext.convert(gmi.read_text(), title)) + + + # Generate RSS + fg = feed.FeedGenerator() + fg.title(TITLE) + fg.subtitle(SUBTITLE) + fg.link(href="https://alex.corcoles.net", rel="self") + + for e in reversed(dated_entries[0:10]): + title, date, *_ = e.read_text().splitlines() + print(date, datetime.date.fromisoformat(date)) + title = title.removeprefix("# ") + path = "/".join(e.parts[2:]).removesuffix('.gmi') + + fe = fg.add_entry() + url = f"https://alex.corcoles.net/{path}" + fe.link(href=url) + fe.published( + datetime.datetime.combine( + datetime.date.fromisoformat(date), + datetime.datetime.min.time(), + tzinfo=datetime.UTC, + ) + ) + fe.title(title) + fe.content(b"\n".join(list(map(lxml.html.tostring, lxml.html.document_fromstring(e.with_suffix(".html").read_bytes()).body[2:]))), type="html") + feed_dir = to / "feed" + feed_dir.mkdir() + (feed_dir / "index.html").write_bytes(fg.rss_str(pretty=True)) + def main() -> None: parser = argparse.ArgumentParser() @@ -26,6 +144,11 @@ def main() -> None: subparser.add_argument("to", type=pathlib.Path) subparser.set_defaults(command=migrate) + subparser = subparsers.add_parser("build") + subparser.add_argument("from_", type=pathlib.Path) + subparser.add_argument("to", type=pathlib.Path) + subparser.set_defaults(command=build) + args = vars(parser.parse_args()) command = args.pop("command") command(**args) diff --git a/blog_v2/src/blog/gemtext.py b/blog_v2/src/blog/gemtext.py new file mode 100644 index 00000000..254fdbe2 --- /dev/null +++ b/blog_v2/src/blog/gemtext.py @@ -0,0 +1,336 @@ +import dataclasses +import re +import textwrap + +import htmlgenerator as h + +from blog import gemtext +from blog import pretty + + +def parse(s): + r""" + >>> parse('''# Header 1 + ... + ... ## Header 2 + ... + ... ### Header 3 + ... + ... * List 1 + ... * List 2 + ... + ... > First line quote. + ... > Second line of quote. + ... + ... ``` + ... Fenced + ... Lines + ... ``` + ... + ... Paragraph. + ... + ... Another paragraph. + ... ''') + [Header(level=1, text='Header 1'), + Line(text=''), + Header(level=2, text='Header 2'), + Line(text=''), + Header(level=3, text='Header 3'), + Line(text=''), + List(items=[ListItem(text='List 1'), + ListItem(text='List 2')]), + Line(text=''), + BlockQuote(lines=[BlockQuoteLine(text='First line quote.'), + BlockQuoteLine(text='Second line of quote.')]), + Line(text=''), + Pre(content='Fenced\nLines\n'), + Line(text=''), + Line(text='Paragraph.'), + Line(text=''), + Line(text='Another paragraph.')] + """ + lines = s.splitlines() + + i = 0 + gem = [] + + while i < len(lines): + line = parse_line(lines[i]) + + if isinstance(line, Link): + gem.append(line) + i = i + 1 + continue + + if isinstance(line, Header): + gem.append(line) + i = i + 1 + continue + + if isinstance(line, ListItem): + items = [] + while i < len(lines) and isinstance(parse_line(lines[i]), ListItem): + items.append(parse_line(lines[i])) + i = i + 1 + gem.append(List(items)) + continue + + if isinstance(line, BlockQuoteLine): + quotes = [] + while i < len(lines) and isinstance(parse_line(lines[i]), BlockQuoteLine): + quotes.append(parse_line(lines[i])) + i = i + 1 + gem.append(BlockQuote(quotes)) + continue + + if isinstance(line, PreFence): + content = "" + i = i + 1 + while i < len(lines) and not isinstance(parse_line(lines[i]), PreFence): + content += lines[i] + content += "\n" + i = i + 1 + gem.append(Pre(content)) + i = i + 1 + continue + + gem.append(line) + i = i + 1 + + return gem + + +def parse_line(l): + if Link.is_link(l): + return Link(l) + if Header.is_header(l): + return Header(l) + if ListItem.is_list_item(l): + return ListItem(l) + if BlockQuoteLine.is_block_quote_line(l): + return BlockQuoteLine(l) + if PreFence.is_pre_fence(l): + return PreFence() + return Line(l) + + +@dataclasses.dataclass +class Link: + """ + >>> Link("=> http://example.com") + Link(url='http://example.com', text=None) + + >>> Link("=> http://example.com Example text") + Link(url='http://example.com', text='Example text') + """ + + url: str + text: str | None + + def __init__(self, line: str): + assert Link.is_link(line) + parts = line.split(None, 2) + self.url = parts[1] + self.text = parts[2] if len(parts) > 2 else None # noqa: PLR2004, > 2 "magic constant" + + @staticmethod + def is_link(line: str): + return line.startswith("=>") + + +@dataclasses.dataclass +class Header: + """ + >>> Header("# Level one") + Header(level=1, text='Level one') + + >>> Header("## Level two") + Header(level=2, text='Level two') + + >>> Header("### Level three") + Header(level=3, text='Level three') + """ + + level: int + text: str + + def __init__(self, line: str): + assert Header.is_header(line) + hashes, self.text = line.split(None, 1) + self.level = len(hashes) + + @staticmethod + def is_header(line: str): + return re.match("#{1,3} .*", line) + + +@dataclasses.dataclass +class ListItem: + """ + >>> ListItem("* foo") + ListItem(text='foo') + """ + + text: str + + def __init__(self, line: str): + assert ListItem.is_list_item(line) + self.text = line[2:] + + @staticmethod + def is_list_item(line: str): + return line.startswith("* ") + + +@dataclasses.dataclass +class BlockQuoteLine: + """ + >>> BlockQuoteLine("> foo") + BlockQuoteLine(text='foo') + + >>> BlockQuoteLine(">foo") + BlockQuoteLine(text='foo') + """ + + text: str + + def __init__(self, line: str): + assert BlockQuoteLine.is_block_quote_line(line) + self.text = line[2:] if line.startswith("> ") else line[1:] + + @staticmethod + def is_block_quote_line(line: str): + return line.startswith(">") + + +class PreFence: + @staticmethod + def is_pre_fence(line: str): + return line == "```" + + +@dataclasses.dataclass +class Line: + text: str + + +@dataclasses.dataclass +class List: + items: list[ListItem] + + +@dataclasses.dataclass +class BlockQuote: + lines: list[BlockQuoteLine] + + +@dataclasses.dataclass +class Pre: + content: str + + +def convert(gemtext, title=None): + content = parse(gemtext) + if not title: + title = content[0] + assert isinstance(title, Header), title + assert title.level == 1 + title = title.text + + return pretty.pretty_html( + h.render( + h.HTML( + h.HEAD( + h.TITLE(title), + h.STYLE( + textwrap.dedent(""" + :root { + color-scheme: light dark; + } + body { + max-width: 40em; + margin-left: auto; + margin-right: auto; + padding-left: 2em; + padding-right: 2em; + } + p, blockquote, li { + /* from Mozilla reader mode */ + line-height: 1.6em; + font-size: 20px; + } + """).lstrip() + ), + ), + h.BODY( + *gemini_to_html(content), + ), + doctype="html", + ), + {}, + ) + ) + + +def gemini_to_html(parsed): # noqa: C901, PLR0912, PLR0915 + i = 0 + result = [] + while i < len(parsed): + gem_element = parsed[i] + + if isinstance(gem_element, gemtext.Header): + header = [h.H1, h.H2, h.H3, h.H4, h.H5, h.H6][gem_element.level - 1] + result.append(header(gem_element.text)) + i = i + 1 + continue + + if isinstance(gem_element, gemtext.List): + result.append(h.UL(*[h.LI(i.text) for i in gem_element.items])) + i = i + 1 + continue + + if isinstance(gem_element, gemtext.Link): + url = gem_element.url + if url.startswith("gemini://"): + url = url.replace("gemini://", "https://portal.mozz.us/gemini/") + + result.append(h.P(h.A(gem_element.text or gem_element.url, href=url))) + i = i + 1 + continue + + if gem_element == gemtext.Line(""): + i = i + 1 + continue + + if isinstance(gem_element, gemtext.BlockQuote): + content = [] + for line in gem_element.lines: + if line.text: + content.append(line.text) + content.append(h.BR()) + result.append(h.BLOCKQUOTE(*content)) + i = i + 1 + continue + + if isinstance(gem_element, gemtext.Line): + paragraph = [gem_element.text] + i = i + 1 + while i < len(parsed): + gem_element = parsed[i] + if isinstance(gem_element, gemtext.Line) and gem_element.text != "": + paragraph.append(h.BR()) + paragraph.append(gem_element.text) + i = i + 1 + else: + break + result.append(h.P(*paragraph)) + continue + + if isinstance(gem_element, gemtext.Pre): + result.append(h.PRE(gem_element.content)) + i = i + 1 + continue + + assert False, f"unknown element {gem_element}" + + return result diff --git a/blog_v2/src/blog/pretty.py b/blog_v2/src/blog/pretty.py new file mode 100644 index 00000000..2ae916a7 --- /dev/null +++ b/blog_v2/src/blog/pretty.py @@ -0,0 +1,5 @@ +from lxml import etree, html + + +def pretty_html(s): + return etree.tostring(html.fromstring(s), pretty_print=True).decode("utf8") -- cgit v1.2.3