From 87df74b49c8792e9dddd550d862b2cfa9a2e909b Mon Sep 17 00:00:00 2001 From: alex Date: Fri, 13 Oct 2023 16:02:34 +0200 Subject: Adapt to new layout --- gemini2rss/gemini2rss.py | 58 ------------------------------------------------ 1 file changed, 58 deletions(-) delete mode 100644 gemini2rss/gemini2rss.py (limited to 'gemini2rss/gemini2rss.py') diff --git a/gemini2rss/gemini2rss.py b/gemini2rss/gemini2rss.py deleted file mode 100644 index 3803be7f..00000000 --- a/gemini2rss/gemini2rss.py +++ /dev/null @@ -1,58 +0,0 @@ -import datetime -import re -import sys -import urllib.request - -import bs4 -from feedgen import feed - -url = sys.argv[1] -feed_size = int(sys.argv[2]) -title = sys.argv[3] -id = sys.argv[4] -author_name = sys.argv[5] -author_email = sys.argv[6] - - -with urllib.request.urlopen(url) as f: - content = f.read().decode("utf8") - - -soup = bs4.BeautifulSoup(content, features="lxml") - -posts = 0 - -f = feed.FeedGenerator() -f.title(title) -f.id(id + "/") - -for a in soup.find_all("a"): - if posts == feed_size: - break - match = re.fullmatch("(....-..-..) (.*)", a.string) - if not match: - continue - title = match.group(2) - date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date() - - fi = f.add_item() - fi.title(title) - fi.id(url + a["href"]) - fi.link(href=a["href"]) - fi.author(name=author_name, email=author_email) - fi.updated(datetime.datetime.combine(date, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc)) - - node = a.parent.next_sibling - content = "" - while True: - node = node.next_sibling - if node and node.name == "p" and node.a and node.a.string and re.fullmatch("(....-..-..) (.*)", node.a.string): - break - if node and node.name == "h1" and node.string and node.string == "Sobre mí": - break - content += node if isinstance(node, bs4.NavigableString) else node.prettify() - fi.content(content, type="html") - - posts += 1 - -print(f.atom_str(pretty=True).decode("utf8")) \ No newline at end of file -- cgit v1.2.3