aboutsummaryrefslogtreecommitdiff
path: root/gemini-to-web/src
diff options
context:
space:
mode:
authoralex <alex@pdp7.net>2026-02-15 09:55:22 +0100
committeralex <alex@pdp7.net>2026-02-15 09:56:28 +0100
commit144273180f8b4fdcc3356bbe72c7bdc799f2c6eb (patch)
tree490a985f2317cee2d4442f89b5f66e0f0c5c135a /gemini-to-web/src
parenta0239f0024e07aca71de35828c3f47df77abf9ff (diff)
Add coppewebite-indexer with example
Diffstat (limited to 'gemini-to-web/src')
-rw-r--r--gemini-to-web/src/gemini_to_web/indexer.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py
new file mode 100644
index 0000000..3d463e3
--- /dev/null
+++ b/gemini-to-web/src/gemini_to_web/indexer.py
@@ -0,0 +1,33 @@
+import argparse
+import pathlib
+import re
+import sys
+
+from gemini_to_web import parser
+from gemini_to_web import html
+
+
+ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$")
+
+
+def cli_indexer():
+ argument_parser = argparse.ArgumentParser()
+ argument_parser.add_argument("base_path", type=pathlib.Path)
+ args = argument_parser.parse_args()
+
+ paths = sys.stdin.read()
+ paths = paths.split("\0")[:-1]
+
+ posts = []
+
+ for path in paths:
+ path = pathlib.Path(path)
+ parsed = parser.parse(path.read_text())
+ parsed = list(parsed)
+ first_header_title = html.first_header_title_extractor(parsed)
+ match = ENTRY_ELEMENT_TITLE.match(first_header_title)
+ if match:
+ posts.append((match.group(1), path.relative_to(args.base_path), match.group(2)))
+ posts = sorted(posts, reverse=True)
+ for date, path, title in posts:
+ print(f"=> {path} {date} {title}")