diff options
| author | alex <alex@pdp7.net> | 2021-03-13 19:29:57 +0100 |
|---|---|---|
| committer | alex <alex@pdp7.net> | 2021-03-13 19:29:57 +0100 |
| commit | e3f57c49d64cbd057f87206c2340575b3bba2566 (patch) | |
| tree | a928f62247c4827e3bc7e26b3078764b093b88d2 /gemini2rss/gemini2rss.py | |
| parent | 5c74711c33d9c75a31e01c161e645797ee669a15 (diff) | |
WIP
Diffstat (limited to 'gemini2rss/gemini2rss.py')
| -rw-r--r-- | gemini2rss/gemini2rss.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/gemini2rss/gemini2rss.py b/gemini2rss/gemini2rss.py new file mode 100644 index 00000000..673ea7e2 --- /dev/null +++ b/gemini2rss/gemini2rss.py @@ -0,0 +1,54 @@ +import datetime +import re +import urllib.request + +import bs4 +from feedgen import feed + + +url = "http://localhost:8080/" +feed_size = 10 +title = "El blog es mío" +id = "https://foo.bar" + +with urllib.request.urlopen(url) as f: + content = f.read().decode("utf8") + + +soup = bs4.BeautifulSoup(content, features="lxml") + +posts = 0 + +f = feed.FeedGenerator() +f.title(title) +f.id(id) + +for a in soup.find_all("a"): + if posts == feed_size: + break + match = re.fullmatch("(....-..-..) (.*)", a.string) + if not match: + continue + title = match.group(2) + date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date() + + fi = f.add_item() + fi.title(title) + fi.id(a["href"]) + fi.link(href=a["href"]) + fi.updated(datetime.datetime.combine(date, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc)) + + node = a.parent.next_sibling + content = "" + while True: + node = node.next_sibling + if node and node.name == "p" and node.a and node.a.string and re.fullmatch("(....-..-..) (.*)", node.a.string): + break + if node and node.name == "h1" and node.string and node.string == "Sobre mí": + break + content += node if isinstance(node, bs4.NavigableString) else node.prettify() + fi.content(content) + + posts += 1 + +print(f.atom_str(pretty=True).decode("utf8"))
\ No newline at end of file |
