aboutsummaryrefslogtreecommitdiff
path: root/gemini-to-web/src/gemini_to_web/indexer.py
blob: a6ceee0a5ba145606467231c886ad29a9011e4e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import argparse
import pathlib
import sys

import gemini_to_web
from gemini_to_web import parser
from gemini_to_web import html


def cli_indexer():
    argument_parser = argparse.ArgumentParser()
    argument_parser.add_argument("base_path", type=pathlib.Path)
    args = argument_parser.parse_args()

    paths = sys.stdin.read()
    paths = paths.split("\0")[:-1]

    posts = []

    for path  in paths:
        path = pathlib.Path(path)
        parsed = parser.parse(path.read_text())
        parsed = list(parsed)
        first_header_title = html.first_header_title_extractor(parsed)
        match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(first_header_title)
        if match:
            posts.append((match.group(1), path.relative_to(args.base_path), match.group(2)))
    posts = sorted(posts, reverse=True)
    for date, path, title in posts:
        path = str(path).removesuffix(".gmi")
        print(f"=> {path} {date} {title}")