diff options
| author | alex <alex@pdp7.net> | 2026-02-15 10:42:00 +0100 |
|---|---|---|
| committer | alex <alex@pdp7.net> | 2026-02-15 10:42:00 +0100 |
| commit | cc5d4e07146da033ded55c216b41378bca9a10b4 (patch) | |
| tree | 0fb4e8dff5d64a9c47d02706716e4377a81a6d04 /gemini-to-web | |
| parent | 144273180f8b4fdcc3356bbe72c7bdc799f2c6eb (diff) | |
Add RSS generation
Remove old deprecated CLI converter in __init__
Diffstat (limited to 'gemini-to-web')
| -rw-r--r-- | gemini-to-web/README.md | 3 | ||||
| -rwxr-xr-x | gemini-to-web/example/build.sh | 4 | ||||
| -rw-r--r-- | gemini-to-web/pyproject.toml | 2 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/__init__.py | 25 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/indexer.py | 8 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/rss.py | 63 | ||||
| -rw-r--r-- | gemini-to-web/uv.lock | 33 |
7 files changed, 109 insertions, 29 deletions
diff --git a/gemini-to-web/README.md b/gemini-to-web/README.md index 503fc8c..2c492db 100644 --- a/gemini-to-web/README.md +++ b/gemini-to-web/README.md @@ -33,4 +33,7 @@ $ uv run coppewebite-to-html <<EOT `coppewebite-indexer` outputs a list of gemtext links, sorted in reverse chronological order, suitable for use as a Gemini page subscription. You can use `coppewebite-indexer` to create a gemlog index automatically. + +`coppewebite-to-rss` reads from standard input a gemtext file and produces the equivalent RSS to the gemtext subscription. + Refer to the [`example`](example) directory for an example. diff --git a/gemini-to-web/example/build.sh b/gemini-to-web/example/build.sh index 93346a8..e792215 100755 --- a/gemini-to-web/example/build.sh +++ b/gemini-to-web/example/build.sh @@ -5,6 +5,8 @@ set -eu rm -rf target mkdir target +cp -r source/* target/ + { cat <<HEAD # Welcome to my blog @@ -13,4 +15,4 @@ HEAD find . -path './source/2???/??/*.gmi' -type f -print0 | uv --project .. run coppewebite-indexer source/ } >target/index.gmi -cp -r source/* target/ +uv --project .. run coppewebite-to-rss <target/index.gmi >target/index.rss --title Example --subtitle Subtitle --base-url https://www.example.com target/ diff --git a/gemini-to-web/pyproject.toml b/gemini-to-web/pyproject.toml index bf58d08..5226714 100644 --- a/gemini-to-web/pyproject.toml +++ b/gemini-to-web/pyproject.toml @@ -6,6 +6,7 @@ authors = [ ] requires-python = ">=3.12" dependencies = [ + "feedgen>=1.0.0", "htmlgenerator>=1.2.32", "lxml>=6.0.2", ] @@ -14,6 +15,7 @@ dependencies = [ coppewebite-parse = "gemini_to_web.parser:cli_parse" coppewebite-to-html = "gemini_to_web.html:cli_to_html" coppewebite-indexer = "gemini_to_web.indexer:cli_indexer" +coppewebite-to-rss = "gemini_to_web.rss:cli_to_rss" [build-system] requires = ["uv_build>=0.9.30,<0.10.0"] diff --git a/gemini-to-web/src/gemini_to_web/__init__.py b/gemini-to-web/src/gemini_to_web/__init__.py index 0b2b70e..827031f 100644 --- a/gemini-to-web/src/gemini_to_web/__init__.py +++ b/gemini-to-web/src/gemini_to_web/__init__.py @@ -1,25 +1,4 @@ -import argparse -import pathlib -import shutil +import re -import htmlgenerator -from gemini_to_web import html - -def converter(): - parser = argparse.ArgumentParser() - parser.add_argument("source", type=pathlib.Path) - parser.add_argument("target", type=pathlib.Path) - args = parser.parse_args() - - shutil.copytree(args.source, args.target) - for gmi in args.target.glob("**/*.gmi"): - html_path = gmi.with_suffix(".html") - html_path.write_text( - html.pretty( - htmlgenerator.render( - html.to_html(gmi.read_text()), - {} - ) - ) - ) +ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py index 3d463e3..a6ceee0 100644 --- a/gemini-to-web/src/gemini_to_web/indexer.py +++ b/gemini-to-web/src/gemini_to_web/indexer.py @@ -1,15 +1,12 @@ import argparse import pathlib -import re import sys +import gemini_to_web from gemini_to_web import parser from gemini_to_web import html -ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") - - def cli_indexer(): argument_parser = argparse.ArgumentParser() argument_parser.add_argument("base_path", type=pathlib.Path) @@ -25,9 +22,10 @@ def cli_indexer(): parsed = parser.parse(path.read_text()) parsed = list(parsed) first_header_title = html.first_header_title_extractor(parsed) - match = ENTRY_ELEMENT_TITLE.match(first_header_title) + match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(first_header_title) if match: posts.append((match.group(1), path.relative_to(args.base_path), match.group(2))) posts = sorted(posts, reverse=True) for date, path, title in posts: + path = str(path).removesuffix(".gmi") print(f"=> {path} {date} {title}") diff --git a/gemini-to-web/src/gemini_to_web/rss.py b/gemini-to-web/src/gemini_to_web/rss.py new file mode 100644 index 0000000..a03a4aa --- /dev/null +++ b/gemini-to-web/src/gemini_to_web/rss.py @@ -0,0 +1,63 @@ +import argparse +import datetime +import pathlib +import sys + +from feedgen import feed +import htmlgenerator + +import gemini_to_web +from gemini_to_web import html +from gemini_to_web import parser + + +def cli_to_rss(): + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument("--title", default="") + argument_parser.add_argument("--subtitle", default=None) + argument_parser.add_argument("--base-url", default="") + argument_parser.add_argument("base_dir", type=pathlib.Path) + args = argument_parser.parse_args() + + fg = feed.FeedGenerator() + fg.title(args.title) + fg.subtitle(args.subtitle) + fg.link(href=args.base_url, rel="self") + + input_ = sys.stdin.read() + parsed = parser.parse(input_) + parsed = list(parsed) + + entries = [] + for element in parsed: + match element: + case parser.LinkLine(url, link_name): + match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(link_name) + if match: + entries.append((match.group(1), url, match.group(2))) + case _: + pass + + entries = sorted(entries, reverse=True) + entries = entries[0:10] + + for (date, url, title) in entries: + feed_entry = fg.add_entry() + feed_entry.link(href=args.base_url + "/" + url) + feed_entry.published( + datetime.datetime.combine( + datetime.date.fromisoformat(date), + datetime.datetime.min.time(), + tzinfo=datetime.UTC, + ) + ) + feed_entry.title(title) + parsed = parser.parse((args.base_dir / pathlib.Path(url).with_suffix(".gmi")).read_text()) + parsed = list(parsed) + parsed = parsed[1:] + content = html.to_html(parsed) + rendered = htmlgenerator.render(content[1], {}) + rendered = html.pretty(rendered) + feed_entry.content(rendered.encode("utf8"), type="html") + + print(fg.rss_str(pretty=True).decode("utf8"), end="") diff --git a/gemini-to-web/uv.lock b/gemini-to-web/uv.lock index 1a55425..8810df8 100644 --- a/gemini-to-web/uv.lock +++ b/gemini-to-web/uv.lock @@ -3,16 +3,28 @@ revision = 3 requires-python = ">=3.12" [[package]] +name = "feedgen" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6b/59/be0a6f852b5dfbf19e6c8e962c8f41407697f9f52a7902250ed98683ae89/feedgen-1.0.0.tar.gz", hash = "sha256:d9bd51c3b5e956a2a52998c3708c4d2c729f2fcc311188e1e5d3b9726393546a", size = 258496, upload-time = "2023-12-25T18:04:08.421Z" } + +[[package]] name = "gemini-to-web" version = "0.0.0" source = { editable = "." } dependencies = [ + { name = "feedgen" }, { name = "htmlgenerator" }, { name = "lxml" }, ] [package.metadata] requires-dist = [ + { name = "feedgen", specifier = ">=1.0.0" }, { name = "htmlgenerator", specifier = ">=1.2.32" }, { name = "lxml", specifier = ">=6.0.2" }, ] @@ -105,3 +117,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" }, { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, ] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] |
