aboutsummaryrefslogtreecommitdiff
path: root/gemini-to-web/src/gemini_to_web/html.py
blob: 959415533195fb9e9fda7439c69701f206cb435a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import typing

import htmlgenerator
from lxml import etree, html

from gemini_to_web import parser


def first_header_title_extractor(parsed: list[parser.GemElement]):
    heading_lines = [element for element in parsed if isinstance(element, parser.HeadingLine)]
    if heading_lines:
        return heading_lines[0].heading_text


def to_html(parsed: list[parser.GemElement], title_extractor=first_header_title_extractor):
    body = []
    building_element = None
    building_content = None

    def close(body, building_element, building_content):
        if building_element and building_content:
            body.append(building_element(*building_content))
        return (body, None, None)

    head = []

    if title_extractor:
       head.append(htmlgenerator.TITLE(title_extractor(parsed)))

    for item in parsed:
        match item:
            case parser.HeadingLine(level, heading_text):
                if building_element:
                    body, building_element, building_content = close(body, building_element, building_content)
                headers = [htmlgenerator.H1, htmlgenerator.H2, htmlgenerator.H3]
                body.append(headers[level-1](heading_text))
            case parser.QuoteLine(text):
                # https://geminiprotocol.net/docs/gemtext.gmi#blockquotes says:
                #
                # > The quoted content is written as a single long line [...]
                if building_element:
                    body, building_element, building_content = close(body, building_element, building_content)
                body.append(htmlgenerator.BLOCKQUOTE(text))
            case parser.PreformattingToggleLine(alt_text):
                if building_element == htmlgenerator.PRE:
                    assert not alt_text, f"Closing preformatting toggle line with alt text {alt_text}"
                    body, building_element, building_content = close(body, building_element, building_content)
                else:
                    body, building_element, building_content = close(body, building_element, building_content)
                    building_element = htmlgenerator.PRE
                    building_content = ""
            case parser.PreformattedTextLine(text):
                assert building_element == htmlgenerator.PRE
                building_content += text
                building_content += "\n"
            case parser.TextLine(""):
                if building_element:
                    body, building_element, building_content = close(body, building_element, building_content)
            case parser.TextLine(text):
                if building_element == htmlgenerator.P:
                    building_content += [htmlgenerator.BR(), text]
                    continue
                elif building_element is not None and building_element != htmlgenerator.P:
                    body, building_element, building_content = close(body, building_element, building_content)
                building_element = htmlgenerator.P
                building_content = [text]
            case parser.LinkLine(url, link_name):
                if building_element == htmlgenerator.P:
                    building_content += [htmlgenerator.BR(), htmlgenerator.A(link_name, href=url)]
                    continue
                elif building_element is not None and building_element != htmlgenerator.P:
                    body, building_element, building_content = close(body, building_element, building_content)
                building_element = htmlgenerator.P
                building_content = [htmlgenerator.A(link_name, href=url)]
            case parser.ListItem(text):
                if building_element == htmlgenerator.UL:
                    building_content.append(htmlgenerator.LI(text))
                    continue
                elif building_element is not None and building_element != htmlgenerator.UL:
                    body, building_element, building_content = close(body, building_element, building_content)
                building_element = htmlgenerator.UL
                building_content = [htmlgenerator.LI(text)]
            case _:
                assert False, f"unknown element {item}"

    close(body, building_element, building_content)
    html = htmlgenerator.HTML(
        htmlgenerator.HEAD(*head),
        htmlgenerator.BODY(*body),
    )
    return html


def pretty(s):
    return etree.tostring(html.fromstring(s), pretty_print=True).decode("utf8")


def cli_to_html():
    import sys
    input_ = sys.stdin.read()
    gemtext = parser.parse(input_)
    gemtext = list(gemtext)
    html = to_html(gemtext)
    rendered = htmlgenerator.render(html, {})
    rendered = pretty(rendered)
    print(rendered)