diff options
| author | alex <alex@pdp7.net> | 2026-03-22 19:28:12 +0100 |
|---|---|---|
| committer | alex <alex@pdp7.net> | 2026-03-22 19:28:12 +0100 |
| commit | 5b34b41bd33b0a8602625ae4b9946c95d0839502 (patch) | |
| tree | 21e00df5eb9c13fc81521f63067e2e6baeebfc1c /parse-feed.py | |
| parent | 5a560cb4c986b70e54955587671807457bb61b84 (diff) | |
Diffstat (limited to 'parse-feed.py')
| -rw-r--r-- | parse-feed.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/parse-feed.py b/parse-feed.py new file mode 100644 index 0000000..8fd66f3 --- /dev/null +++ b/parse-feed.py @@ -0,0 +1,25 @@ +import fileinput +import re +import sys + +import urllib.parse +urllib.parse.uses_relative += ["gemini"] +urllib.parse.uses_netloc += ["gemini"] + + +base_url = sys.argv[1] + +page_title = sys.argv[2] + +for line in sys.stdin.read().splitlines(): + if not line.startswith("=> "): + continue + _, url, text = line.split(maxsplit=2) + + url = urllib.parse.urljoin(base_url, url) + + match = re.match(r"(\d{4}-\d{2}-\d{2})\s+(.*)", text) + if not match: + continue + date, title = match.groups() + print(date, url, f"{page_title} - {title}") |
