aboutsummaryrefslogtreecommitdiff
path: root/gemini2rss/gemini2rss.py
diff options
context:
space:
mode:
Diffstat (limited to 'gemini2rss/gemini2rss.py')
-rw-r--r--gemini2rss/gemini2rss.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/gemini2rss/gemini2rss.py b/gemini2rss/gemini2rss.py
new file mode 100644
index 00000000..673ea7e2
--- /dev/null
+++ b/gemini2rss/gemini2rss.py
@@ -0,0 +1,54 @@
+import datetime
+import re
+import urllib.request
+
+import bs4
+from feedgen import feed
+
+
+url = "http://localhost:8080/"
+feed_size = 10
+title = "El blog es mío"
+id = "https://foo.bar"
+
+with urllib.request.urlopen(url) as f:
+ content = f.read().decode("utf8")
+
+
+soup = bs4.BeautifulSoup(content, features="lxml")
+
+posts = 0
+
+f = feed.FeedGenerator()
+f.title(title)
+f.id(id)
+
+for a in soup.find_all("a"):
+ if posts == feed_size:
+ break
+ match = re.fullmatch("(....-..-..) (.*)", a.string)
+ if not match:
+ continue
+ title = match.group(2)
+ date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date()
+
+ fi = f.add_item()
+ fi.title(title)
+ fi.id(a["href"])
+ fi.link(href=a["href"])
+ fi.updated(datetime.datetime.combine(date, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc))
+
+ node = a.parent.next_sibling
+ content = ""
+ while True:
+ node = node.next_sibling
+ if node and node.name == "p" and node.a and node.a.string and re.fullmatch("(....-..-..) (.*)", node.a.string):
+ break
+ if node and node.name == "h1" and node.string and node.string == "Sobre mí":
+ break
+ content += node if isinstance(node, bs4.NavigableString) else node.prettify()
+ fi.content(content)
+
+ posts += 1
+
+print(f.atom_str(pretty=True).decode("utf8")) \ No newline at end of file