aboutsummaryrefslogtreecommitdiff
path: root/gemini-to-web/src
diff options
context:
space:
mode:
authoralex <alex@pdp7.net>2026-02-15 10:42:00 +0100
committeralex <alex@pdp7.net>2026-02-15 10:42:00 +0100
commitcc5d4e07146da033ded55c216b41378bca9a10b4 (patch)
tree0fb4e8dff5d64a9c47d02706716e4377a81a6d04 /gemini-to-web/src
parent144273180f8b4fdcc3356bbe72c7bdc799f2c6eb (diff)
Add RSS generation
Remove old deprecated CLI converter in __init__
Diffstat (limited to 'gemini-to-web/src')
-rw-r--r--gemini-to-web/src/gemini_to_web/__init__.py25
-rw-r--r--gemini-to-web/src/gemini_to_web/indexer.py8
-rw-r--r--gemini-to-web/src/gemini_to_web/rss.py63
3 files changed, 68 insertions, 28 deletions
diff --git a/gemini-to-web/src/gemini_to_web/__init__.py b/gemini-to-web/src/gemini_to_web/__init__.py
index 0b2b70e..827031f 100644
--- a/gemini-to-web/src/gemini_to_web/__init__.py
+++ b/gemini-to-web/src/gemini_to_web/__init__.py
@@ -1,25 +1,4 @@
-import argparse
-import pathlib
-import shutil
+import re
-import htmlgenerator
-from gemini_to_web import html
-
-def converter():
- parser = argparse.ArgumentParser()
- parser.add_argument("source", type=pathlib.Path)
- parser.add_argument("target", type=pathlib.Path)
- args = parser.parse_args()
-
- shutil.copytree(args.source, args.target)
- for gmi in args.target.glob("**/*.gmi"):
- html_path = gmi.with_suffix(".html")
- html_path.write_text(
- html.pretty(
- htmlgenerator.render(
- html.to_html(gmi.read_text()),
- {}
- )
- )
- )
+ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$")
diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py
index 3d463e3..a6ceee0 100644
--- a/gemini-to-web/src/gemini_to_web/indexer.py
+++ b/gemini-to-web/src/gemini_to_web/indexer.py
@@ -1,15 +1,12 @@
import argparse
import pathlib
-import re
import sys
+import gemini_to_web
from gemini_to_web import parser
from gemini_to_web import html
-ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$")
-
-
def cli_indexer():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument("base_path", type=pathlib.Path)
@@ -25,9 +22,10 @@ def cli_indexer():
parsed = parser.parse(path.read_text())
parsed = list(parsed)
first_header_title = html.first_header_title_extractor(parsed)
- match = ENTRY_ELEMENT_TITLE.match(first_header_title)
+ match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(first_header_title)
if match:
posts.append((match.group(1), path.relative_to(args.base_path), match.group(2)))
posts = sorted(posts, reverse=True)
for date, path, title in posts:
+ path = str(path).removesuffix(".gmi")
print(f"=> {path} {date} {title}")
diff --git a/gemini-to-web/src/gemini_to_web/rss.py b/gemini-to-web/src/gemini_to_web/rss.py
new file mode 100644
index 0000000..a03a4aa
--- /dev/null
+++ b/gemini-to-web/src/gemini_to_web/rss.py
@@ -0,0 +1,63 @@
+import argparse
+import datetime
+import pathlib
+import sys
+
+from feedgen import feed
+import htmlgenerator
+
+import gemini_to_web
+from gemini_to_web import html
+from gemini_to_web import parser
+
+
+def cli_to_rss():
+ argument_parser = argparse.ArgumentParser()
+ argument_parser.add_argument("--title", default="")
+ argument_parser.add_argument("--subtitle", default=None)
+ argument_parser.add_argument("--base-url", default="")
+ argument_parser.add_argument("base_dir", type=pathlib.Path)
+ args = argument_parser.parse_args()
+
+ fg = feed.FeedGenerator()
+ fg.title(args.title)
+ fg.subtitle(args.subtitle)
+ fg.link(href=args.base_url, rel="self")
+
+ input_ = sys.stdin.read()
+ parsed = parser.parse(input_)
+ parsed = list(parsed)
+
+ entries = []
+ for element in parsed:
+ match element:
+ case parser.LinkLine(url, link_name):
+ match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(link_name)
+ if match:
+ entries.append((match.group(1), url, match.group(2)))
+ case _:
+ pass
+
+ entries = sorted(entries, reverse=True)
+ entries = entries[0:10]
+
+ for (date, url, title) in entries:
+ feed_entry = fg.add_entry()
+ feed_entry.link(href=args.base_url + "/" + url)
+ feed_entry.published(
+ datetime.datetime.combine(
+ datetime.date.fromisoformat(date),
+ datetime.datetime.min.time(),
+ tzinfo=datetime.UTC,
+ )
+ )
+ feed_entry.title(title)
+ parsed = parser.parse((args.base_dir / pathlib.Path(url).with_suffix(".gmi")).read_text())
+ parsed = list(parsed)
+ parsed = parsed[1:]
+ content = html.to_html(parsed)
+ rendered = htmlgenerator.render(content[1], {})
+ rendered = html.pretty(rendered)
+ feed_entry.content(rendered.encode("utf8"), type="html")
+
+ print(fg.rss_str(pretty=True).decode("utf8"), end="")