aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralex <alex@pdp7.net>2022-03-12 14:19:54 +0100
committeralex <alex@pdp7.net>2022-03-12 14:19:54 +0100
commita467deb7c389f2b6cdafd1c333b7689933d796db (patch)
tree8e5303f26e9d5803a5d0f50649b2d4693fe2f002
parentfa3de0b9c71a385beb880d4d425e1d96b4293e71 (diff)
Polish old blog importer
-rw-r--r--from_old_blog.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/from_old_blog.py b/from_old_blog.py
index 6f56c01c..096eb378 100644
--- a/from_old_blog.py
+++ b/from_old_blog.py
@@ -1,5 +1,6 @@
import pathlib
import re
+import subprocess
import yaml
@@ -7,7 +8,7 @@ def slug(s):
orig = s
s = re.sub(r"""[*\^/\-":+#.,¿?¡!()[\]'%]+""", " ", s)
s = s.strip()
- s = re.sub(" +", "_", s)
+ s = re.sub(" +", "-", s)
s = s.lower()
acc = {
"áà": "a",
@@ -21,7 +22,7 @@ def slug(s):
for ds, r in acc.items():
for d in ds:
s = s.replace(d, r)
- assert re.match("^[a-z0-9_]+$", s), (orig, s)
+ assert re.match(r"^[a-z0-9\-]+$", s), (orig, s)
return s
@@ -34,6 +35,14 @@ for post in posts:
post["post"] = post["post"].encode("iso-8859-1").decode("utf8")
post["title"] = post["title"].encode("iso-8859-1").decode("utf8")
+ t = post["title"]
+
y,m,d = post["posted"].split(" ")[0].split("-")
- p = pathlib.Path("content") / y / m / d / slug(post["title"])
+ p: pathlib.Path = pathlib.Path("content") / y / m / (slug(post["title"]) + ".gmi")
+ p.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(p, "w") as f:
+ f.write(f"# {t}\n")
+ f.write(f"{y}-{m}-{d}\n\n")
+ f.write(subprocess.run(["/home/alex/go/bin/html2gmi", "-mn"], input=post["post"].encode("utf8"), stdout=subprocess.PIPE).stdout.decode("utf8"))