aboutsummaryrefslogtreecommitdiff
path: root/gemini-to-web/src/gemini_to_web/html.py
diff options
context:
space:
mode:
Diffstat (limited to 'gemini-to-web/src/gemini_to_web/html.py')
-rw-r--r--gemini-to-web/src/gemini_to_web/html.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/gemini-to-web/src/gemini_to_web/html.py b/gemini-to-web/src/gemini_to_web/html.py
new file mode 100644
index 0000000..9594155
--- /dev/null
+++ b/gemini-to-web/src/gemini_to_web/html.py
@@ -0,0 +1,106 @@
+import typing
+
+import htmlgenerator
+from lxml import etree, html
+
+from gemini_to_web import parser
+
+
+def first_header_title_extractor(parsed: list[parser.GemElement]):
+ heading_lines = [element for element in parsed if isinstance(element, parser.HeadingLine)]
+ if heading_lines:
+ return heading_lines[0].heading_text
+
+
+def to_html(parsed: list[parser.GemElement], title_extractor=first_header_title_extractor):
+ body = []
+ building_element = None
+ building_content = None
+
+ def close(body, building_element, building_content):
+ if building_element and building_content:
+ body.append(building_element(*building_content))
+ return (body, None, None)
+
+ head = []
+
+ if title_extractor:
+ head.append(htmlgenerator.TITLE(title_extractor(parsed)))
+
+ for item in parsed:
+ match item:
+ case parser.HeadingLine(level, heading_text):
+ if building_element:
+ body, building_element, building_content = close(body, building_element, building_content)
+ headers = [htmlgenerator.H1, htmlgenerator.H2, htmlgenerator.H3]
+ body.append(headers[level-1](heading_text))
+ case parser.QuoteLine(text):
+ # https://geminiprotocol.net/docs/gemtext.gmi#blockquotes says:
+ #
+ # > The quoted content is written as a single long line [...]
+ if building_element:
+ body, building_element, building_content = close(body, building_element, building_content)
+ body.append(htmlgenerator.BLOCKQUOTE(text))
+ case parser.PreformattingToggleLine(alt_text):
+ if building_element == htmlgenerator.PRE:
+ assert not alt_text, f"Closing preformatting toggle line with alt text {alt_text}"
+ body, building_element, building_content = close(body, building_element, building_content)
+ else:
+ body, building_element, building_content = close(body, building_element, building_content)
+ building_element = htmlgenerator.PRE
+ building_content = ""
+ case parser.PreformattedTextLine(text):
+ assert building_element == htmlgenerator.PRE
+ building_content += text
+ building_content += "\n"
+ case parser.TextLine(""):
+ if building_element:
+ body, building_element, building_content = close(body, building_element, building_content)
+ case parser.TextLine(text):
+ if building_element == htmlgenerator.P:
+ building_content += [htmlgenerator.BR(), text]
+ continue
+ elif building_element is not None and building_element != htmlgenerator.P:
+ body, building_element, building_content = close(body, building_element, building_content)
+ building_element = htmlgenerator.P
+ building_content = [text]
+ case parser.LinkLine(url, link_name):
+ if building_element == htmlgenerator.P:
+ building_content += [htmlgenerator.BR(), htmlgenerator.A(link_name, href=url)]
+ continue
+ elif building_element is not None and building_element != htmlgenerator.P:
+ body, building_element, building_content = close(body, building_element, building_content)
+ building_element = htmlgenerator.P
+ building_content = [htmlgenerator.A(link_name, href=url)]
+ case parser.ListItem(text):
+ if building_element == htmlgenerator.UL:
+ building_content.append(htmlgenerator.LI(text))
+ continue
+ elif building_element is not None and building_element != htmlgenerator.UL:
+ body, building_element, building_content = close(body, building_element, building_content)
+ building_element = htmlgenerator.UL
+ building_content = [htmlgenerator.LI(text)]
+ case _:
+ assert False, f"unknown element {item}"
+
+ close(body, building_element, building_content)
+ html = htmlgenerator.HTML(
+ htmlgenerator.HEAD(*head),
+ htmlgenerator.BODY(*body),
+ )
+ return html
+
+
+def pretty(s):
+ return etree.tostring(html.fromstring(s), pretty_print=True).decode("utf8")
+
+
+def cli_to_html():
+ import sys
+ input_ = sys.stdin.read()
+ gemtext = parser.parse(input_)
+ gemtext = list(gemtext)
+ html = to_html(gemtext)
+ rendered = htmlgenerator.render(html, {})
+ rendered = pretty(rendered)
+ print(rendered)