diff options
| author | alex <alex@pdp7.net> | 2026-02-01 21:47:23 +0100 |
|---|---|---|
| committer | alexpdp7 <alex@corcoles.net> | 2026-02-01 22:03:42 +0100 |
| commit | c48ca0ca40ed9ac009d7f84ce213ef5c69536ca7 (patch) | |
| tree | 791845e06bb167794ec916fd6e2b04b6bba1234b /blog/src | |
| parent | f94e0a1bc1aea34cb4eadd2305ddf724722cd9b9 (diff) | |
Migrate blog
Turns into a static site generator. All Gemini files with links have
been rewritten to be relative.
Closes: #731
Closes: #372
Closes: #28
Diffstat (limited to 'blog/src')
| -rw-r--r-- | blog/src/blog/.gitignore | 1 | ||||
| -rw-r--r-- | blog/src/blog/__init__.py | 211 | ||||
| -rw-r--r-- | blog/src/blog/__main__.py | 43 | ||||
| -rw-r--r-- | blog/src/blog/blog_pages.py | 168 | ||||
| -rw-r--r-- | blog/src/blog/gemtext.py | 125 | ||||
| -rw-r--r-- | blog/src/blog/html.py | 155 | ||||
| -rw-r--r-- | blog/src/blog/meta.py | 16 | ||||
| -rw-r--r-- | blog/src/blog/page.py | 36 |
8 files changed, 278 insertions, 477 deletions
diff --git a/blog/src/blog/.gitignore b/blog/src/blog/.gitignore new file mode 100644 index 00000000..bee8a64b --- /dev/null +++ b/blog/src/blog/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/blog/src/blog/__init__.py b/blog/src/blog/__init__.py index 3d2c4224..b50cb617 100644 --- a/blog/src/blog/__init__.py +++ b/blog/src/blog/__init__.py @@ -1,70 +1,163 @@ -import importlib.resources +import argparse +import datetime +import pathlib import re +import shutil +import textwrap -import bicephalus -import htmlgenerator as h +from feedgen import feed +import lxml -from blog import blog_pages, gemtext, html, page, pretty +from blog import gemtext -STATIC = importlib.resources.files("static").iterdir().__next__().parent +def migrate(from_: pathlib.Path, to: pathlib.Path): + to.mkdir(parents=True, exist_ok=False) + shutil.copytree(from_ / "content", to, dirs_exist_ok=True) + shutil.copytree(from_ / "static" / "about", to / "about", dirs_exist_ok=True) -class SimplePage(page.BasePage): - def __init__(self, request, url, title): - super().__init__(request) - self.url = url - self.title = title + laspelis = to / "laspelis" + laspelis.mkdir() - def get_gemini_content(self): - file = STATIC / self.url[1:] / "index.gmi" - return ( - bicephalus.Status.OK, - "text/gemini", - file.read_text(), - ) + laspelis_index_lines = (from_ / "static" / "laspelis" / "index.gmi").read_text().splitlines() + laspelis_index_lines = ["".join(l.split("/", 1)) for l in laspelis_index_lines] + (to / "laspelis" / "index.gmi").write_text("\n".join(laspelis_index_lines) + "\n") - def get_http_content(self): - return ( - bicephalus.Status.OK, - "text/html", - pretty.pretty_html( - h.render( - h.HTML( - h.HEAD( - h.TITLE(self.title), - ), - h.BODY( - *html.gemini_to_html( - gemtext.parse(self.get_gemini_content()[2]) - ) - ), - ), - {}, - ) - ), - ) + for lp in (from_ / "static" / "laspelis").glob("*"): + if lp.name == "index.gmi": + continue + shutil.copy(lp / "mail", laspelis / f"{lp.name}.mail") + shutil.copy(lp / "index.gmi", laspelis / f"{lp.name}.gmi") + + (to / "about" / "index.gmi").replace(to / "about.gmi") + (to / "about").rmdir() + + # Make links relative + for g in to.glob("**/*.gmi"): + path_in_site = g.relative_to(to) + + lines = g.read_text().splitlines() + for i, l in enumerate(lines): + if not l.startswith("=> "): + continue + l = l.removeprefix("=> ") + url, *rest = l.split(maxsplit=1) + + ABSOLUTE_BLOG = "gemini://alex.corcoles.net/" + if url.startswith(ABSOLUTE_BLOG): + url = url.removeprefix(ABSOLUTE_BLOG) + url = url.removesuffix("/") + url = "../" * (len(path_in_site.parts) - 1) + url + gemini_link = " ".join((f"=> {url}", " ".join(rest))) + lines[i] = gemini_link + continue + + ABSOLUTE_LASPELIS = "/laspelis/" + if url.startswith(ABSOLUTE_LASPELIS): + url = "../" * (len(path_in_site.parts) - 1) + url[1:] + gemini_link = " ".join((f"=> {url}", " ".join(rest))) + lines[i] = gemini_link + continue + + reconstruct = "\n".join(lines) + if not reconstruct.endswith("\n"): + reconstruct += "\n" + g.write_text(reconstruct) + + # Build old URL slash removal redirect list + redirections = textwrap.dedent(""" + # This is a list of the pre-migration URLs of this blog. + # URLs in this list might had been linked with a trailing slash. + # Use Apache httpd's RewriteMap to preserve only old URLs + """).lstrip() + for g in to.glob("**/*.gmi"): + url = str(g.relative_to(to).with_suffix("")) + redirections += f"{url}/ {url}\n" + pathlib.Path(to / "redirections.txt").write_text(redirections) + + +def build(from_: pathlib.Path, to: pathlib.Path): + TITLE = "El blog es mío" + SUBTITLE = "Hay otros como él, pero este es el mío" + + to.mkdir(parents=True, exist_ok=False) + shutil.copytree(from_, to, dirs_exist_ok=True) + + dated_entries = [b for b in to.glob("**/*.gmi") if re.match(r"\d{4}-\d{2}-\d{2}", b.read_text().splitlines()[1])] + dated_entries.sort() + dated_entries.reverse() + # Generate index.gmi + index = textwrap.dedent(f""" + # {TITLE} -def handler(request: bicephalus.Request) -> bicephalus.Response: # noqa: PLR0911 - if not request.path.endswith("/"): - return bicephalus.Response( - request.path + "/", None, bicephalus.Status.PERMANENT_REDIRECTION + {SUBTITLE} + + Envíame email a alex arroba corcoles punto net. + + """).lstrip() + for e in dated_entries: + title, date, *_ = e.read_text().splitlines() + title = title.removeprefix("# ") + index += f"=> {'/'.join(e.parts[-3:]).removesuffix('.gmi')} {date} {title}\n" + + (to / "index.gmi").write_text(index) + + + # Convert to HTML + for gmi in to.glob("**/*.gmi"): + html = gmi.with_suffix(".html") + title = None + if gmi.parts[-2] == "laspelis": + title = "laspelis" + if gmi.name != "index.gmi": + subject = gmi.read_text().splitlines()[3] + assert subject.startswith("Subject: "), subject + title = subject.removeprefix("Subject: ") + html.write_text(gemtext.convert(gmi.read_text(), title, ("feed/", TITLE) if gmi.relative_to(to) == pathlib.Path("index.gmi") else None)) + + # Generate RSS + fg = feed.FeedGenerator() + fg.title(TITLE) + fg.subtitle(SUBTITLE) + fg.link(href="https://alex.corcoles.net", rel="self") + + for e in reversed(dated_entries[0:10]): + title, date, *_ = e.read_text().splitlines() + title = title.removeprefix("# ") + path = "/".join(e.parts[2:]).removesuffix('.gmi') + + fe = fg.add_entry() + url = f"https://alex.corcoles.net/{path}" + fe.link(href=url) + fe.published( + datetime.datetime.combine( + datetime.date.fromisoformat(date), + datetime.datetime.min.time(), + tzinfo=datetime.UTC, + ) ) - if request.path == "/": - return blog_pages.Root(request).response() - if re.match(r"/\d{4}/\d{2}/.*/", request.path): - blog_file = blog_pages.CONTENT / (request.path[1:-1] + ".gmi") - if blog_file.exists(): - return blog_pages.EntryPage(request, blog_file).response() - if request.path == "/feed/" and request.proto == bicephalus.Proto.HTTP: - return blog_pages.Root(request).feed() - if request.path == "/about/": - return SimplePage(request, request.path, "About Álex Córcoles").response() - if request.path == "/laspelis/": - return SimplePage(request, request.path, "laspelis").response() - if re.match(r"/laspelis/\d+/", request.path): - return SimplePage( - request, request.path.removesuffix("/") + "/", request.path - ).response() - - return page.NotFound(request).response() + fe.title(title) + fe.content(b"\n".join(list(map(lxml.html.tostring, lxml.html.document_fromstring(e.with_suffix(".html").read_bytes()).body[2:]))), type="html") + feed_dir = to / "feed" + feed_dir.mkdir() + (feed_dir / "index.rss").write_bytes(fg.rss_str(pretty=True)) + + +def main() -> None: + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(required=True) + + subparser = subparsers.add_parser("migrate") + subparser.add_argument("from_", type=pathlib.Path) + subparser.add_argument("to", type=pathlib.Path) + subparser.set_defaults(command=migrate) + + subparser = subparsers.add_parser("build") + subparser.add_argument("from_", type=pathlib.Path) + subparser.add_argument("to", type=pathlib.Path) + subparser.set_defaults(command=build) + + args = vars(parser.parse_args()) + command = args.pop("command") + command(**args) diff --git a/blog/src/blog/__main__.py b/blog/src/blog/__main__.py deleted file mode 100644 index 9d1ed9e2..00000000 --- a/blog/src/blog/__main__.py +++ /dev/null @@ -1,43 +0,0 @@ -import argparse -import logging -import sys - -from bicephalus import main as bicephalus_main -from bicephalus import otel, ssl - -import blog -from blog import meta - - -def main(): - otel.configure(log_level=logging.INFO) - - parser = argparse.ArgumentParser() - parser.add_argument( - "--key-cert", - nargs=2, - metavar=( - "KEY", - "CERT", - ), - help="Path to a key and a file", - ) - parser.add_argument("schema") - parser.add_argument("host") - args = parser.parse_args() - meta.SCHEMA = args.schema - meta.HOST = args.host - - if args.key_cert: - key, cert = args.key_cert - with ssl.ssl_context_from_files(cert, key) as ssl_context: - bicephalus_main.main(blog.handler, ssl_context, 8000) - sys.exit(0) - - with ssl.temporary_ssl_context("localhost") as ssl_context: - bicephalus_main.main(blog.handler, ssl_context, 8000) - sys.exit(0) - - -if __name__ == "__main__": - main() diff --git a/blog/src/blog/blog_pages.py b/blog/src/blog/blog_pages.py deleted file mode 100644 index a7d71c7e..00000000 --- a/blog/src/blog/blog_pages.py +++ /dev/null @@ -1,168 +0,0 @@ -import datetime -import importlib.resources -import itertools -import pathlib -import textwrap - -import bicephalus -import htmlgenerator as h -from feedgen import feed - -from blog import gemtext, html, meta, page, pretty - -CONTENT = importlib.resources.files("content").iterdir().__next__().parent - - -def gemini_links(): - return "\n".join([f"=> {url} {text}" for text, url in meta.LINKS]) - - -class Entry: - def __init__(self, path: pathlib.Path): - assert path.is_relative_to(CONTENT), ( - f"bad path {path} not relative to {CONTENT}" - ) - self.path = path - self.content = path.read_text() - self.relative_path = path.relative_to(CONTENT) - - @property - def title(self): - return self.content.splitlines()[0][2:] - - @property - def posted(self): - return datetime.date.fromisoformat(self.content.splitlines()[1]) - - @property - def uri(self): - """ - >>> Entry(CONTENT / "2003/11/toda-saga-tiene-su-inicio.gmi").uri - '/2003/11/toda-saga-tiene-su-inicio/' - """ - return "/".join( - [ - "", - self.relative_path.parts[0], - self.relative_path.parts[1], - self.relative_path.stem, - "", - ] - ) - - @property - def edit_url(self): - return f"https://github.com/alexpdp7/alexpdp7/edit/master/blog/content{self.uri[:-1]}.gmi" - - def html(self): - parsed = gemtext.parse(self.content) - - assert isinstance(parsed[0], gemtext.Header) - assert parsed[0].level == 1 - assert isinstance(parsed[1], gemtext.Line) - assert parsed[2] == gemtext.Line("") - - result = html.gemini_to_html(parsed[3:]) - result += [ - h.P(meta.EMAIL_TEXT), - h.P(h.A("Editar", href=self.edit_url)), - ] - return result - - -class Root(page.BasePage): - def entries(self): - entries = map(Entry, CONTENT.glob("*/*/*.gmi")) - return sorted(entries, key=lambda e: e.posted, reverse=True) - - def get_gemini_content(self): - posts = "\n".join([f"=> {e.uri} {e.posted} {e.title}" for e in self.entries()]) - content = ( - textwrap.dedent( - f"""\ - # {meta.TITLE} - - ## {meta.SUBTITLE} - - """ - ) - + gemini_links() - + f"\n{meta.EMAIL_TEXT}\n" - + "\n" - + posts - ) - return bicephalus.Status.OK, "text/gemini", content - - def get_http_content(self): - posts = [ - (h.H3(h.A(f"{e.title} ({e.posted})", href=e.uri))) for e in self.entries() - ] - return ( - bicephalus.Status.OK, - "text/html", - html.html_template( - *itertools.chain(posts), path=self.request.path, full=True - ), - ) - - def feed(self): - fg = feed.FeedGenerator() - fg.title(meta.TITLE) - fg.subtitle(meta.SUBTITLE) - fg.link(href=f"{meta.SCHEMA}://{meta.HOST}", rel="self") - - for entry in self.entries()[0:10]: - fe = fg.add_entry() - url = f"{meta.SCHEMA}://{meta.HOST}/{entry.uri}" - fe.link(href=url) - fe.published( - datetime.datetime.combine( - entry.posted, - datetime.datetime.min.time(), - tzinfo=datetime.UTC, - ) - ) - fe.title(entry.title) - html = h.render(h.BaseElement(*entry.html()), {}) - html = pretty.pretty_html(html) - fe.content(html, type="html") - - return bicephalus.Response( - status=bicephalus.Status.OK, - content_type="application/rss+xml", - content=fg.rss_str(pretty=True), - ) - - -class EntryPage(page.BasePage): - def __init__(self, request, path): - super().__init__(request) - self.path = path - self.entry = Entry(path) - - def get_gemini_content(self): - content = ( - textwrap.dedent(f"""\ - => gemini://{meta.HOST} alex.corcoles.net - {meta.EMAIL_TEXT} - - """) - + self.entry.content - + textwrap.dedent(f"""\ - => {self.entry.edit_url} Editar - """) - ) - - return bicephalus.Status.OK, "text/gemini", content - - def get_http_content(self): - return ( - bicephalus.Status.OK, - "text/html", - html.html_template( - *self.entry.html(), - page_title=f"{self.entry.title} - {self.entry.posted}", - path=self.request.path, - full=False, - ), - ) diff --git a/blog/src/blog/gemtext.py b/blog/src/blog/gemtext.py index f79be9ee..3fe293a9 100644 --- a/blog/src/blog/gemtext.py +++ b/blog/src/blog/gemtext.py @@ -1,5 +1,10 @@ import dataclasses import re +import textwrap + +import htmlgenerator as h + +from blog import pretty def parse(s): @@ -221,3 +226,123 @@ class BlockQuote: @dataclasses.dataclass class Pre: content: str + + +def convert(gemtext, title=None, feed=None): + content = parse(gemtext) + if not title: + title = content[0] + assert isinstance(title, Header), title + assert title.level == 1 + title = title.text + + if feed: + href, title = feed + feed = [ + h.LINK( + rel="alternate", + type="application/rss+xml", + title=title, + href=href, + ), + ] + else: + feed = [] + return pretty.pretty_html( + h.render( + h.HTML( + h.HEAD( + h.TITLE(title), + h.STYLE( + textwrap.dedent(""" + :root { + color-scheme: light dark; + } + body { + max-width: 40em; + margin-left: auto; + margin-right: auto; + padding-left: 2em; + padding-right: 2em; + } + p, blockquote, li { + /* from Mozilla reader mode */ + line-height: 1.6em; + font-size: 20px; + } + """).lstrip() + ), + *feed, + ), + h.BODY( + *gemini_to_html(content), + ), + doctype="html", + ), + {}, + ) + ) + + +def gemini_to_html(parsed): # noqa: C901, PLR0912, PLR0915 + i = 0 + result = [] + while i < len(parsed): + gem_element = parsed[i] + + if isinstance(gem_element, Header): + header = [h.H1, h.H2, h.H3, h.H4, h.H5, h.H6][gem_element.level - 1] + result.append(header(gem_element.text)) + i = i + 1 + continue + + if isinstance(gem_element, List): + result.append(h.UL(*[h.LI(i.text) for i in gem_element.items])) + i = i + 1 + continue + + if isinstance(gem_element, Link): + url = gem_element.url + if url.startswith("gemini://"): + url = url.replace("gemini://", "https://portal.mozz.us/gemini/") + + result.append(h.P(h.A(gem_element.text or gem_element.url, href=url))) + i = i + 1 + continue + + if gem_element == Line(""): + i = i + 1 + continue + + if isinstance(gem_element, BlockQuote): + content = [] + for line in gem_element.lines: + if line.text: + content.append(line.text) + content.append(h.BR()) + result.append(h.BLOCKQUOTE(*content)) + i = i + 1 + continue + + if isinstance(gem_element, Line): + paragraph = [gem_element.text] + i = i + 1 + while i < len(parsed): + gem_element = parsed[i] + if isinstance(gem_element, Line) and gem_element.text != "": + paragraph.append(h.BR()) + paragraph.append(gem_element.text) + i = i + 1 + else: + break + result.append(h.P(*paragraph)) + continue + + if isinstance(gem_element, Pre): + result.append(h.PRE(gem_element.content)) + i = i + 1 + continue + + assert False, f"unknown element {gem_element}" + + return result diff --git a/blog/src/blog/html.py b/blog/src/blog/html.py deleted file mode 100644 index 19fe6ea1..00000000 --- a/blog/src/blog/html.py +++ /dev/null @@ -1,155 +0,0 @@ -import itertools -import textwrap - -import htmlgenerator as h - -from blog import gemtext, meta, pretty - - -def html_template(*content, page_title=None, path, full): - title = [h.A(meta.TITLE, href=f"{meta.SCHEMA}://{meta.HOST}")] - if page_title: - title += f" - {page_title}" - - title = h.BaseElement(*title) - - links = list( - itertools.chain(*[(h.A(text, href=href), ", ") for text, href in meta.LINKS]) - ) - - links += [h.BaseElement(f" {meta.EMAIL_TEXT}")] - - full_part = [] - if full: - full_part = [ - h.H2(meta.SUBTITLE), - h.P( - h.A( - "Buscar con DuckDuckGo en esta página", - href="https://html.duckduckgo.com/html/?q=site:alex.corcoles.net", - ) - ), - h.P(*links), - ] - - gemini_url = f"gemini://alex.corcoles.net{path}" - - return pretty.pretty_html( - h.render( - h.HTML( - h.HEAD( - h.TITLE(meta.TITLE + (f" - {page_title}" if page_title else "")), - h.LINK( - rel="alternate", - type="application/rss+xml", - title=meta.TITLE, - href=f"{meta.SCHEMA}://{meta.HOST}/feed/", - ), - h.STYLE( - textwrap.dedent(""" - :root { - color-scheme: light dark; - } - body { - max-width: 40em; - margin-left: auto; - margin-right: auto; - padding-left: 2em; - padding-right: 2em; - } - p, blockquote, li { - /* from Mozilla reader mode */ - line-height: 1.6em; - font-size: 20px; - } - """).lstrip() - ), - ), - h.BODY( - h.P( - "Contenido tambien disponible en Gemini en ", - h.A(gemini_url, href=gemini_url), - ". ", - h.A( - "Información sobre Gemini.", - href="https://geminiprotocol.net/", - ), - ), - h.H1(title), - *full_part, - *content, - ), - doctype="html", - ), - {}, - ) - ) - - -def gemini_to_html(parsed): # noqa: C901, PLR0912, PLR0915 - i = 0 - result = [] - while i < len(parsed): - gem_element = parsed[i] - - if isinstance(gem_element, gemtext.Header): - header = [h.H1, h.H2, h.H3, h.H4, h.H5, h.H6][gem_element.level - 1] - result.append(header(gem_element.text)) - i = i + 1 - continue - - if isinstance(gem_element, gemtext.List): - result.append(h.UL(*[h.LI(i.text) for i in gem_element.items])) - i = i + 1 - continue - - if isinstance(gem_element, gemtext.Link): - url = gem_element.url - if url.startswith("gemini://"): - if url.startswith("gemini://alex.corcoles.net/"): - url = url.replace( - "gemini://alex.corcoles.net/", f"{meta.SCHEMA}://{meta.HOST}/" - ) - else: - url = url.replace("gemini://", "https://portal.mozz.us/gemini/") - - result.append(h.P(h.A(gem_element.text or gem_element.url, href=url))) - i = i + 1 - continue - - if gem_element == gemtext.Line(""): - i = i + 1 - continue - - if isinstance(gem_element, gemtext.BlockQuote): - content = [] - for line in gem_element.lines: - if line.text: - content.append(line.text) - content.append(h.BR()) - result.append(h.BLOCKQUOTE(*content)) - i = i + 1 - continue - - if isinstance(gem_element, gemtext.Line): - paragraph = [gem_element.text] - i = i + 1 - while i < len(parsed): - gem_element = parsed[i] - if isinstance(gem_element, gemtext.Line) and gem_element.text != "": - paragraph.append(h.BR()) - paragraph.append(gem_element.text) - i = i + 1 - else: - break - result.append(h.P(*paragraph)) - continue - - if isinstance(gem_element, gemtext.Pre): - result.append(h.PRE(gem_element.content)) - i = i + 1 - continue - - assert False, f"unknown element {gem_element}" - - return result diff --git a/blog/src/blog/meta.py b/blog/src/blog/meta.py deleted file mode 100644 index 673454f5..00000000 --- a/blog/src/blog/meta.py +++ /dev/null @@ -1,16 +0,0 @@ -TITLE = "El blog es mío" -SUBTITLE = "Hay otros como él, pero este es el mío" -HOST = None -SCHEMA = None - -LINKS = ( - ("@yo@alex.femto.pub", "https://alex.femto.pub/@yo@alex.femto.pub/"), - ("IMDb", "https://www.imdb.com/user/ur0633823/ratings/?sort=top_rated%2Cdesc"), - ("GitHub", "https://github.com/alexpdp7/"), - ("TVmaze", "https://www.tvmaze.com/users/35495/koala/stats"), - ("LinkedIn", "https://es.linkedin.com/in/alexcorcoles"), - ("Project Euler", "https://projecteuler.net/profile/koalillo.png"), - ("Stack Exchange", "https://stackexchange.com/users/13361/alex"), -) - -EMAIL_TEXT = "envíame mail al dominio que uso cambiando el primer punto por una arroba" diff --git a/blog/src/blog/page.py b/blog/src/blog/page.py deleted file mode 100644 index 5e9df638..00000000 --- a/blog/src/blog/page.py +++ /dev/null @@ -1,36 +0,0 @@ -import bicephalus - - -class BasePage: - def __init__(self, request): - self.request = request - - def response(self): - if self.request.proto == bicephalus.Proto.GEMINI: - status, content_type, content = self.get_gemini_content() - elif self.request.proto == bicephalus.Proto.HTTP: - status, content_type, content = self.get_http_content() - else: - assert False, f"unknown protocol {self.request.proto}" - - return bicephalus.Response( - content=content.encode("utf8"), - content_type=content_type, - status=status, - ) - - -class NotFound(BasePage): - def get_gemini_content(self): - return ( - bicephalus.Status.NOT_FOUND, - "text/gemini", - f"{self.request.path} not found", - ) - - def get_http_content(self): - return ( - bicephalus.Status.NOT_FOUND, - "text/html", - f"{self.request.path} not found", - ) |
