diff options
| author | alex <alex@pdp7.net> | 2026-02-15 09:55:22 +0100 |
|---|---|---|
| committer | alex <alex@pdp7.net> | 2026-02-15 09:56:28 +0100 |
| commit | 144273180f8b4fdcc3356bbe72c7bdc799f2c6eb (patch) | |
| tree | 490a985f2317cee2d4442f89b5f66e0f0c5c135a /gemini-to-web/src/gemini_to_web/indexer.py | |
| parent | a0239f0024e07aca71de35828c3f47df77abf9ff (diff) | |
Add coppewebite-indexer with example
Diffstat (limited to 'gemini-to-web/src/gemini_to_web/indexer.py')
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/indexer.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py new file mode 100644 index 0000000..3d463e3 --- /dev/null +++ b/gemini-to-web/src/gemini_to_web/indexer.py @@ -0,0 +1,33 @@ +import argparse +import pathlib +import re +import sys + +from gemini_to_web import parser +from gemini_to_web import html + + +ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") + + +def cli_indexer(): + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument("base_path", type=pathlib.Path) + args = argument_parser.parse_args() + + paths = sys.stdin.read() + paths = paths.split("\0")[:-1] + + posts = [] + + for path in paths: + path = pathlib.Path(path) + parsed = parser.parse(path.read_text()) + parsed = list(parsed) + first_header_title = html.first_header_title_extractor(parsed) + match = ENTRY_ELEMENT_TITLE.match(first_header_title) + if match: + posts.append((match.group(1), path.relative_to(args.base_path), match.group(2))) + posts = sorted(posts, reverse=True) + for date, path, title in posts: + print(f"=> {path} {date} {title}") |
