blob: 673ea7e23db843d2770a07040db3b98eb3ea83c5 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
import datetime
import re
import urllib.request
import bs4
from feedgen import feed
url = "http://localhost:8080/"
feed_size = 10
title = "El blog es mío"
id = "https://foo.bar"
with urllib.request.urlopen(url) as f:
content = f.read().decode("utf8")
soup = bs4.BeautifulSoup(content, features="lxml")
posts = 0
f = feed.FeedGenerator()
f.title(title)
f.id(id)
for a in soup.find_all("a"):
if posts == feed_size:
break
match = re.fullmatch("(....-..-..) (.*)", a.string)
if not match:
continue
title = match.group(2)
date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date()
fi = f.add_item()
fi.title(title)
fi.id(a["href"])
fi.link(href=a["href"])
fi.updated(datetime.datetime.combine(date, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc))
node = a.parent.next_sibling
content = ""
while True:
node = node.next_sibling
if node and node.name == "p" and node.a and node.a.string and re.fullmatch("(....-..-..) (.*)", node.a.string):
break
if node and node.name == "h1" and node.string and node.string == "Sobre mí":
break
content += node if isinstance(node, bs4.NavigableString) else node.prettify()
fi.content(content)
posts += 1
print(f.atom_str(pretty=True).decode("utf8"))
|