diff options
Diffstat (limited to 'gemini-to-web/src/gemini_to_web')
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/__init__.py | 25 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/indexer.py | 8 | ||||
| -rw-r--r-- | gemini-to-web/src/gemini_to_web/rss.py | 63 |
3 files changed, 68 insertions, 28 deletions
diff --git a/gemini-to-web/src/gemini_to_web/__init__.py b/gemini-to-web/src/gemini_to_web/__init__.py index 0b2b70e..827031f 100644 --- a/gemini-to-web/src/gemini_to_web/__init__.py +++ b/gemini-to-web/src/gemini_to_web/__init__.py @@ -1,25 +1,4 @@ -import argparse -import pathlib -import shutil +import re -import htmlgenerator -from gemini_to_web import html - -def converter(): - parser = argparse.ArgumentParser() - parser.add_argument("source", type=pathlib.Path) - parser.add_argument("target", type=pathlib.Path) - args = parser.parse_args() - - shutil.copytree(args.source, args.target) - for gmi in args.target.glob("**/*.gmi"): - html_path = gmi.with_suffix(".html") - html_path.write_text( - html.pretty( - htmlgenerator.render( - html.to_html(gmi.read_text()), - {} - ) - ) - ) +ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") diff --git a/gemini-to-web/src/gemini_to_web/indexer.py b/gemini-to-web/src/gemini_to_web/indexer.py index 3d463e3..a6ceee0 100644 --- a/gemini-to-web/src/gemini_to_web/indexer.py +++ b/gemini-to-web/src/gemini_to_web/indexer.py @@ -1,15 +1,12 @@ import argparse import pathlib -import re import sys +import gemini_to_web from gemini_to_web import parser from gemini_to_web import html -ENTRY_ELEMENT_TITLE = re.compile(r"^(\d{4}-\d{2}-\d{2}) (.*)$") - - def cli_indexer(): argument_parser = argparse.ArgumentParser() argument_parser.add_argument("base_path", type=pathlib.Path) @@ -25,9 +22,10 @@ def cli_indexer(): parsed = parser.parse(path.read_text()) parsed = list(parsed) first_header_title = html.first_header_title_extractor(parsed) - match = ENTRY_ELEMENT_TITLE.match(first_header_title) + match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(first_header_title) if match: posts.append((match.group(1), path.relative_to(args.base_path), match.group(2))) posts = sorted(posts, reverse=True) for date, path, title in posts: + path = str(path).removesuffix(".gmi") print(f"=> {path} {date} {title}") diff --git a/gemini-to-web/src/gemini_to_web/rss.py b/gemini-to-web/src/gemini_to_web/rss.py new file mode 100644 index 0000000..a03a4aa --- /dev/null +++ b/gemini-to-web/src/gemini_to_web/rss.py @@ -0,0 +1,63 @@ +import argparse +import datetime +import pathlib +import sys + +from feedgen import feed +import htmlgenerator + +import gemini_to_web +from gemini_to_web import html +from gemini_to_web import parser + + +def cli_to_rss(): + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument("--title", default="") + argument_parser.add_argument("--subtitle", default=None) + argument_parser.add_argument("--base-url", default="") + argument_parser.add_argument("base_dir", type=pathlib.Path) + args = argument_parser.parse_args() + + fg = feed.FeedGenerator() + fg.title(args.title) + fg.subtitle(args.subtitle) + fg.link(href=args.base_url, rel="self") + + input_ = sys.stdin.read() + parsed = parser.parse(input_) + parsed = list(parsed) + + entries = [] + for element in parsed: + match element: + case parser.LinkLine(url, link_name): + match = gemini_to_web.ENTRY_ELEMENT_TITLE.match(link_name) + if match: + entries.append((match.group(1), url, match.group(2))) + case _: + pass + + entries = sorted(entries, reverse=True) + entries = entries[0:10] + + for (date, url, title) in entries: + feed_entry = fg.add_entry() + feed_entry.link(href=args.base_url + "/" + url) + feed_entry.published( + datetime.datetime.combine( + datetime.date.fromisoformat(date), + datetime.datetime.min.time(), + tzinfo=datetime.UTC, + ) + ) + feed_entry.title(title) + parsed = parser.parse((args.base_dir / pathlib.Path(url).with_suffix(".gmi")).read_text()) + parsed = list(parsed) + parsed = parsed[1:] + content = html.to_html(parsed) + rendered = htmlgenerator.render(content[1], {}) + rendered = html.pretty(rendered) + feed_entry.content(rendered.encode("utf8"), type="html") + + print(fg.rss_str(pretty=True).decode("utf8"), end="") |
