diff options
| -rw-r--r-- | gemini-to-web/README.md | 7 | ||||
| -rw-r--r-- | gemini-to-web/example/.gitignore | 1 | ||||
| -rwxr-xr-x | gemini-to-web/example/build.sh | 16 | ||||
| -rw-r--r-- | gemini-to-web/example/source/2024/12/old-post.gmi | 3 | ||||
| -rw-r--r-- | gemini-to-web/example/source/2025/12/new-post.gmi | 3 | ||||
| -rw-r--r-- | gemini-to-web/pyproject.toml | 1 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/indexer.py | 33 |
7 files changed, 64 insertions, 0 deletions
diff --git a/gemini-to-web/README.md b/gemini-to-web/README.md index 419beef..503fc8c 100644 --- a/gemini-to-web/README.md +++ b/gemini-to-web/README.md @@ -27,3 +27,10 @@ $ uv run coppewebite-to-html <<EOT </body> </html> ``` + +`coppewebite-indexer` reads from standard input a `\0`-separated list of files (such as the output of `find -print0`). +`coppewebite-indexer` parses all files as gemtext, extracting the first header, and matching the header text as a Gemini page subscription entry element label. +`coppewebite-indexer` outputs a list of gemtext links, sorted in reverse chronological order, suitable for use as a Gemini page subscription. + +You can use `coppewebite-indexer` to create a gemlog index automatically. +Refer to the [`example`](example) directory for an example. diff --git a/gemini-to-web/example/.gitignore b/gemini-to-web/example/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/gemini-to-web/example/.gitignore @@ -0,0 +1 @@ +target diff --git a/gemini-to-web/example/build.sh b/gemini-to-web/example/build.sh new file mode 100755 index 0000000..93346a8 --- /dev/null +++ b/gemini-to-web/example/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eu + +rm -rf target +mkdir target + +{ + cat <<HEAD +# Welcome to my blog + +HEAD + find . -path './source/2???/??/*.gmi' -type f -print0 | uv --project .. run coppewebite-indexer source/ +} >target/index.gmi + +cp -r source/* target/ diff --git a/gemini-to-web/example/source/2024/12/old-post.gmi b/gemini-to-web/example/source/2024/12/old-post.gmi new file mode 100644 index 0000000..bdfd94c --- /dev/null +++ b/gemini-to-web/example/source/2024/12/old-post.gmi @@ -0,0 +1,3 @@ +# 2024-12-03 Old post + +This is the old post. diff --git a/gemini-to-web/example/source/2025/12/new-post.gmi b/gemini-to-web/example/source/2025/12/new-post.gmi new file mode 100644 index 0000000..d78cad4 --- /dev/null +++ b/gemini-to-web/example/source/2025/12/new-post.gmi @@ -0,0 +1,3 @@ +# 2025-12-03 New post + +This is the new post. diff --git a/gemini-to-web/pyproject.toml b/gemini-to-web/pyproject.toml index 54576df..bf58d08 100644 --- a/gemini-to-web/pyproject.toml +++ b/gemini-to-web/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ [project.scripts] coppewebite-parse = "gemini_to_web.parser:cli_parse" coppewebite-to-html = "gemini_to_web.html:cli_to_html" +coppewebite-indexer = "gemini_to_web.indexer:cli_indexer" [build-system] requires = ["uv_build>=0.9.30,<0.10.0"] diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py new file mode 100644 index 0000000..3d463e3 --- /dev/null +++ b/gemini-to-web/src/gemini_to_web/indexer.py @@ -0,0 +1,33 @@ +import argparse +import pathlib +import re +import sys + +from gemini_to_web import parser +from gemini_to_web import html + + +ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") + + +def cli_indexer(): + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument("base_path", type=pathlib.Path) + args = argument_parser.parse_args() + + paths = sys.stdin.read() + paths = paths.split("\0")[:-1] + + posts = [] + + for path in paths: + path = pathlib.Path(path) + parsed = parser.parse(path.read_text()) + parsed = list(parsed) + first_header_title = html.first_header_title_extractor(parsed) + match = ENTRY_ELEMENT_TITLE.match(first_header_title) + if match: + posts.append((match.group(1), path.relative_to(args.base_path), match.group(2))) + posts = sorted(posts, reverse=True) + for date, path, title in posts: + print(f"=> {path} {date} {title}") |
