aboutsummaryrefslogtreecommitdiff
path: root/blog_v2/src
diff options
context:
space:
mode:
authoralex <alex@pdp7.net>2026-01-24 14:16:50 +0100
committeralexpdp7 <alex@corcoles.net>2026-01-24 17:51:44 +0000
commit504c0930bed2a274c4c468614555d378fb859a22 (patch)
tree25e602471c78b272119bd46503d38e752f94d093 /blog_v2/src
parentf82d54a8cc80ee9b567ad37f824d5ed346e73dc7 (diff)
More porting to new system
Diffstat (limited to 'blog_v2/src')
-rw-r--r--blog_v2/src/blog/__init__.py127
-rw-r--r--blog_v2/src/blog/gemtext.py336
-rw-r--r--blog_v2/src/blog/pretty.py5
3 files changed, 466 insertions, 2 deletions
diff --git a/blog_v2/src/blog/__init__.py b/blog_v2/src/blog/__init__.py
index 03f48996..cdc15a8d 100644
--- a/blog_v2/src/blog/__init__.py
+++ b/blog_v2/src/blog/__init__.py
@@ -1,21 +1,139 @@
import argparse
+import datetime
import pathlib
+import re
import shutil
+import textwrap
+
+from feedgen import feed
+import lxml
+
+from blog import gemtext
def migrate(from_: pathlib.Path, to: pathlib.Path):
to.mkdir(parents=True, exist_ok=False)
shutil.copytree(from_ / "content", to, dirs_exist_ok=True)
shutil.copytree(from_ / "static" / "about", to / "about", dirs_exist_ok=True)
+
laspelis = to / "laspelis"
laspelis.mkdir()
+
+ laspelis_index_lines = (from_ / "static" / "laspelis" / "index.gmi").read_text().splitlines()
+ laspelis_index_lines = ["".join(l.split("/", 1)) for l in laspelis_index_lines]
+ (to / "laspelis" / "index.gmi").write_text("\n".join(laspelis_index_lines) + "\n")
+
for lp in (from_ / "static" / "laspelis").glob("*"):
- if not lp.is_dir():
- print("skipping", lp)
+ if lp.name == "index.gmi":
continue
shutil.copy(lp / "mail", laspelis / f"{lp.name}.mail")
shutil.copy(lp / "index.gmi", laspelis / f"{lp.name}.gmi")
+ (to / "about" / "index.gmi").replace(to / "about.gmi")
+ (to / "about").rmdir()
+
+ # Make links relative
+ for g in to.glob("**/*.gmi"):
+ path_in_site = g.relative_to(to)
+
+ lines = g.read_text().splitlines()
+ for i, l in enumerate(lines):
+ if not l.startswith("=> "):
+ continue
+ l = l.removeprefix("=> ")
+ url, *rest = l.split(maxsplit=1)
+
+ ABSOLUTE_BLOG = "gemini://alex.corcoles.net/"
+ if url.startswith(ABSOLUTE_BLOG):
+ url = url.removeprefix(ABSOLUTE_BLOG)
+ url = url.removesuffix("/")
+ url = "../" * (len(path_in_site.parts) - 1) + url
+ gemini_link = " ".join((f"=> {url}", " ".join(rest)))
+ lines[i] = gemini_link
+ continue
+
+ ABSOLUTE_LASPELIS = "/laspelis/"
+ if url.startswith(ABSOLUTE_LASPELIS):
+ url = "../" * (len(path_in_site.parts) - 1) + url[1:]
+ gemini_link = " ".join((f"=> {url}", " ".join(rest)))
+ lines[i] = gemini_link
+ continue
+
+ reconstruct = "\n".join(lines)
+ if not reconstruct.endswith("\n"):
+ reconstruct += "\n"
+ g.write_text(reconstruct)
+
+
+def build(from_: pathlib.Path, to: pathlib.Path):
+ TITLE = "El blog es mío"
+ SUBTITLE = "Hay otros como él, pero este es el mío"
+
+ to.mkdir(parents=True, exist_ok=False)
+ shutil.copytree(from_, to, dirs_exist_ok=True)
+
+ dated_entries = [b for b in to.glob("**/*.gmi") if re.match(r"\d{4}-\d{2}-\d{2}", b.read_text().splitlines()[1])]
+ dated_entries.sort()
+ dated_entries.reverse()
+
+ # Generate index.gmi
+ index = textwrap.dedent(f"""
+ # {TITLE}
+
+ {SUBTITLE}
+
+ Envíame email a alex arroba corcoles punto net.
+
+ """).lstrip()
+ for e in dated_entries:
+ title, date, *_ = e.read_text().splitlines()
+ title = title.removeprefix("# ")
+ index += f"=> {'/'.join(e.parts[2:]).removesuffix('.gmi')} {date} {title}\n"
+
+ (to / "index.gmi").write_text(index)
+
+
+ # Convert to HTML
+ for gmi in to.glob("**/*.gmi"):
+ html = gmi.with_suffix(".html")
+ title = None
+ if gmi.relative_to(to).parts[0] == "laspelis":
+ title = "laspelis"
+ if gmi.name != "index.gmi":
+ subject = gmi.read_text().splitlines()[3]
+ assert subject.startswith("Subject: "), subject
+ title = subject.removeprefix("Subject: ")
+ html.write_text(gemtext.convert(gmi.read_text(), title))
+
+
+ # Generate RSS
+ fg = feed.FeedGenerator()
+ fg.title(TITLE)
+ fg.subtitle(SUBTITLE)
+ fg.link(href="https://alex.corcoles.net", rel="self")
+
+ for e in reversed(dated_entries[0:10]):
+ title, date, *_ = e.read_text().splitlines()
+ print(date, datetime.date.fromisoformat(date))
+ title = title.removeprefix("# ")
+ path = "/".join(e.parts[2:]).removesuffix('.gmi')
+
+ fe = fg.add_entry()
+ url = f"https://alex.corcoles.net/{path}"
+ fe.link(href=url)
+ fe.published(
+ datetime.datetime.combine(
+ datetime.date.fromisoformat(date),
+ datetime.datetime.min.time(),
+ tzinfo=datetime.UTC,
+ )
+ )
+ fe.title(title)
+ fe.content(b"\n".join(list(map(lxml.html.tostring, lxml.html.document_fromstring(e.with_suffix(".html").read_bytes()).body[2:]))), type="html")
+ feed_dir = to / "feed"
+ feed_dir.mkdir()
+ (feed_dir / "index.html").write_bytes(fg.rss_str(pretty=True))
+
def main() -> None:
parser = argparse.ArgumentParser()
@@ -26,6 +144,11 @@ def main() -> None:
subparser.add_argument("to", type=pathlib.Path)
subparser.set_defaults(command=migrate)
+ subparser = subparsers.add_parser("build")
+ subparser.add_argument("from_", type=pathlib.Path)
+ subparser.add_argument("to", type=pathlib.Path)
+ subparser.set_defaults(command=build)
+
args = vars(parser.parse_args())
command = args.pop("command")
command(**args)
diff --git a/blog_v2/src/blog/gemtext.py b/blog_v2/src/blog/gemtext.py
new file mode 100644
index 00000000..254fdbe2
--- /dev/null
+++ b/blog_v2/src/blog/gemtext.py
@@ -0,0 +1,336 @@
+import dataclasses
+import re
+import textwrap
+
+import htmlgenerator as h
+
+from blog import gemtext
+from blog import pretty
+
+
+def parse(s):
+ r"""
+ >>> parse('''# Header 1
+ ...
+ ... ## Header 2
+ ...
+ ... ### Header 3
+ ...
+ ... * List 1
+ ... * List 2
+ ...
+ ... > First line quote.
+ ... > Second line of quote.
+ ...
+ ... ```
+ ... Fenced
+ ... Lines
+ ... ```
+ ...
+ ... Paragraph.
+ ...
+ ... Another paragraph.
+ ... ''')
+ [Header(level=1, text='Header 1'),
+ Line(text=''),
+ Header(level=2, text='Header 2'),
+ Line(text=''),
+ Header(level=3, text='Header 3'),
+ Line(text=''),
+ List(items=[ListItem(text='List 1'),
+ ListItem(text='List 2')]),
+ Line(text=''),
+ BlockQuote(lines=[BlockQuoteLine(text='First line quote.'),
+ BlockQuoteLine(text='Second line of quote.')]),
+ Line(text=''),
+ Pre(content='Fenced\nLines\n'),
+ Line(text=''),
+ Line(text='Paragraph.'),
+ Line(text=''),
+ Line(text='Another paragraph.')]
+ """
+ lines = s.splitlines()
+
+ i = 0
+ gem = []
+
+ while i < len(lines):
+ line = parse_line(lines[i])
+
+ if isinstance(line, Link):
+ gem.append(line)
+ i = i + 1
+ continue
+
+ if isinstance(line, Header):
+ gem.append(line)
+ i = i + 1
+ continue
+
+ if isinstance(line, ListItem):
+ items = []
+ while i < len(lines) and isinstance(parse_line(lines[i]), ListItem):
+ items.append(parse_line(lines[i]))
+ i = i + 1
+ gem.append(List(items))
+ continue
+
+ if isinstance(line, BlockQuoteLine):
+ quotes = []
+ while i < len(lines) and isinstance(parse_line(lines[i]), BlockQuoteLine):
+ quotes.append(parse_line(lines[i]))
+ i = i + 1
+ gem.append(BlockQuote(quotes))
+ continue
+
+ if isinstance(line, PreFence):
+ content = ""
+ i = i + 1
+ while i < len(lines) and not isinstance(parse_line(lines[i]), PreFence):
+ content += lines[i]
+ content += "\n"
+ i = i + 1
+ gem.append(Pre(content))
+ i = i + 1
+ continue
+
+ gem.append(line)
+ i = i + 1
+
+ return gem
+
+
+def parse_line(l):
+ if Link.is_link(l):
+ return Link(l)
+ if Header.is_header(l):
+ return Header(l)
+ if ListItem.is_list_item(l):
+ return ListItem(l)
+ if BlockQuoteLine.is_block_quote_line(l):
+ return BlockQuoteLine(l)
+ if PreFence.is_pre_fence(l):
+ return PreFence()
+ return Line(l)
+
+
+@dataclasses.dataclass
+class Link:
+ """
+ >>> Link("=> http://example.com")
+ Link(url='http://example.com', text=None)
+
+ >>> Link("=> http://example.com Example text")
+ Link(url='http://example.com', text='Example text')
+ """
+
+ url: str
+ text: str | None
+
+ def __init__(self, line: str):
+ assert Link.is_link(line)
+ parts = line.split(None, 2)
+ self.url = parts[1]
+ self.text = parts[2] if len(parts) > 2 else None # noqa: PLR2004, > 2 "magic constant"
+
+ @staticmethod
+ def is_link(line: str):
+ return line.startswith("=>")
+
+
+@dataclasses.dataclass
+class Header:
+ """
+ >>> Header("# Level one")
+ Header(level=1, text='Level one')
+
+ >>> Header("## Level two")
+ Header(level=2, text='Level two')
+
+ >>> Header("### Level three")
+ Header(level=3, text='Level three')
+ """
+
+ level: int
+ text: str
+
+ def __init__(self, line: str):
+ assert Header.is_header(line)
+ hashes, self.text = line.split(None, 1)
+ self.level = len(hashes)
+
+ @staticmethod
+ def is_header(line: str):
+ return re.match("#{1,3} .*", line)
+
+
+@dataclasses.dataclass
+class ListItem:
+ """
+ >>> ListItem("* foo")
+ ListItem(text='foo')
+ """
+
+ text: str
+
+ def __init__(self, line: str):
+ assert ListItem.is_list_item(line)
+ self.text = line[2:]
+
+ @staticmethod
+ def is_list_item(line: str):
+ return line.startswith("* ")
+
+
+@dataclasses.dataclass
+class BlockQuoteLine:
+ """
+ >>> BlockQuoteLine("> foo")
+ BlockQuoteLine(text='foo')
+
+ >>> BlockQuoteLine(">foo")
+ BlockQuoteLine(text='foo')
+ """
+
+ text: str
+
+ def __init__(self, line: str):
+ assert BlockQuoteLine.is_block_quote_line(line)
+ self.text = line[2:] if line.startswith("> ") else line[1:]
+
+ @staticmethod
+ def is_block_quote_line(line: str):
+ return line.startswith(">")
+
+
+class PreFence:
+ @staticmethod
+ def is_pre_fence(line: str):
+ return line == "```"
+
+
+@dataclasses.dataclass
+class Line:
+ text: str
+
+
+@dataclasses.dataclass
+class List:
+ items: list[ListItem]
+
+
+@dataclasses.dataclass
+class BlockQuote:
+ lines: list[BlockQuoteLine]
+
+
+@dataclasses.dataclass
+class Pre:
+ content: str
+
+
+def convert(gemtext, title=None):
+ content = parse(gemtext)
+ if not title:
+ title = content[0]
+ assert isinstance(title, Header), title
+ assert title.level == 1
+ title = title.text
+
+ return pretty.pretty_html(
+ h.render(
+ h.HTML(
+ h.HEAD(
+ h.TITLE(title),
+ h.STYLE(
+ textwrap.dedent("""
+ :root {
+ color-scheme: light dark;
+ }
+ body {
+ max-width: 40em;
+ margin-left: auto;
+ margin-right: auto;
+ padding-left: 2em;
+ padding-right: 2em;
+ }
+ p, blockquote, li {
+ /* from Mozilla reader mode */
+ line-height: 1.6em;
+ font-size: 20px;
+ }
+ """).lstrip()
+ ),
+ ),
+ h.BODY(
+ *gemini_to_html(content),
+ ),
+ doctype="html",
+ ),
+ {},
+ )
+ )
+
+
+def gemini_to_html(parsed): # noqa: C901, PLR0912, PLR0915
+ i = 0
+ result = []
+ while i < len(parsed):
+ gem_element = parsed[i]
+
+ if isinstance(gem_element, gemtext.Header):
+ header = [h.H1, h.H2, h.H3, h.H4, h.H5, h.H6][gem_element.level - 1]
+ result.append(header(gem_element.text))
+ i = i + 1
+ continue
+
+ if isinstance(gem_element, gemtext.List):
+ result.append(h.UL(*[h.LI(i.text) for i in gem_element.items]))
+ i = i + 1
+ continue
+
+ if isinstance(gem_element, gemtext.Link):
+ url = gem_element.url
+ if url.startswith("gemini://"):
+ url = url.replace("gemini://", "https://portal.mozz.us/gemini/")
+
+ result.append(h.P(h.A(gem_element.text or gem_element.url, href=url)))
+ i = i + 1
+ continue
+
+ if gem_element == gemtext.Line(""):
+ i = i + 1
+ continue
+
+ if isinstance(gem_element, gemtext.BlockQuote):
+ content = []
+ for line in gem_element.lines:
+ if line.text:
+ content.append(line.text)
+ content.append(h.BR())
+ result.append(h.BLOCKQUOTE(*content))
+ i = i + 1
+ continue
+
+ if isinstance(gem_element, gemtext.Line):
+ paragraph = [gem_element.text]
+ i = i + 1
+ while i < len(parsed):
+ gem_element = parsed[i]
+ if isinstance(gem_element, gemtext.Line) and gem_element.text != "":
+ paragraph.append(h.BR())
+ paragraph.append(gem_element.text)
+ i = i + 1
+ else:
+ break
+ result.append(h.P(*paragraph))
+ continue
+
+ if isinstance(gem_element, gemtext.Pre):
+ result.append(h.PRE(gem_element.content))
+ i = i + 1
+ continue
+
+ assert False, f"unknown element {gem_element}"
+
+ return result
diff --git a/blog_v2/src/blog/pretty.py b/blog_v2/src/blog/pretty.py
new file mode 100644
index 00000000..2ae916a7
--- /dev/null
+++ b/blog_v2/src/blog/pretty.py
@@ -0,0 +1,5 @@
+from lxml import etree, html
+
+
+def pretty_html(s):
+ return etree.tostring(html.fromstring(s), pretty_print=True).decode("utf8")