File size: 7,251 Bytes
b6a38d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
DefineClass.HTMLParser = {
__parents = { "PropertyObject" },
properties = {
{ category = "General", id = "TextColor", editor = "color", default = RGB(80, 80, 80) },
{ category = "General", id = "BoldColor", editor = "color", default = RGB(0, 0, 0) },
{ category = "General", id = "HyperlinkColor", editor = "color", default = RGB(0, 0, 238) },
{ category = "General", id = "HeadingFont1", editor = "text", default = "Heading1" },
{ category = "General", id = "HeadingFont2", editor = "text", default = "Heading2" },
{ category = "General", id = "HeadingFont3", editor = "text", default = "Heading3" },
{ category = "General", id = "HeadingFont4", editor = "text", default = "Heading4" },
},
numbered_entries = 0,
errors = false,
}
function HTMLParser:MakeListItem(content)
if #content > 0 then return "\n" .. content .. "\n" end
return ""
end
function HTMLParser:HandleText(text)
--handle escaped HTML characters in the form Ӓ
local text_pieces = { }
while #text > 0 do
local first, last, content = string.find(text, "&([#%w%d]+);")
if not first or not last or #content == 0 then
table.insert(text_pieces, text)
break
end
if string.starts_with(content, '#') then
--allow only a visible ASCII characters
local codepoint = tonumber(string.sub(content, 2))
if codepoint and 31 < codepoint and codepoint < 127 then
table.insert(text_pieces, string.sub(text, 1, first - 1))
table.insert(text_pieces, string.char(codepoint))
else
table.insert(text_pieces, string.sub(text, first, last))
end
else
local char
if content == "lt" then char = '<' end
if content == "gt" then char = '>' end
if content == "amp" then char = '&' end
if content == "nbsp" then char = ' ' end --special nbsp character in the apostrophes
if char then
table.insert(text_pieces, string.sub(text, 1, first - 1))
table.insert(text_pieces, char)
else
table.insert(text_pieces, string.sub(text, first, last))
end
end
text = string.sub(text, last + 1)
end
text = table.concat(text_pieces, "")
--sequences of whitespaces collapse into a single space
text = text:gsub("%s+", " ")
return text
end
function HTMLParser:BeginTag(tag, attributes, state)
if tag == "UL" then
local old_state = {self.MakeListItem}
self.MakeListItem = function(self, content) return "\n•" .. content end
return old_state
end
if tag == "OL" then
local old_state = {self.MakeListItem, self.numbered_entries}
self.MakeListItem = function(self, content)
self.numbered_entries = self.numbered_entries + 1
return string.format("\n%s. %s", self.numbered_entries, content)
end
return old_state
end
end
function HTMLParser:EndTag(tag, attributes, state, original_inner_html, processed_html)
local level = string.match(tag, "H(%d+)")
if level then
level = tonumber(level)
if not level or level < 1 or level > 4 then level = 1 end
local fontstyle = self["HeadingFont" .. level]
local r, g, b, a = GetRGBA(self.BoldColor)
return string.format("\n<style %s><color %s %s %s %s>", fontstyle, r, g, b, a) .. processed_html .. "</color></style>\n"
end
if tag == "P" then
return "\n" .. processed_html:gsub("\n", "") .. "\n"
end
if tag == "BR" then
return "</br>" -- keep BRs as they will be replaced much later to new lines.
end
if tag == "STRONG" or tag == "B" then
local r, g, b, a = GetRGBA(self.BoldColor)
return string.format("<color %s %s %s %s>", r, g, b, a) .. processed_html .. "</color>"
end
if tag == "A" then
local r, g, b, a = GetRGBA(self.HyperlinkColor)
local link_ref = attributes.href
if not link_ref then return "" end
link_ref = link_ref:gsub(" ", "+")
if (processed_html or "") ~= "" then
return string.format("%s [%s]", processed_html, link_ref)
else
return link_ref
end
--return string.format("<color %s %s %s><h OpenUrl %s %s %s %s underline>", r, g, b, link_ref, r, g, b) .. processed_html .. "</h></color>"
end
if tag == "UL" then
processed_html = processed_html:gsub("\n", "\n ")
self.MakeListItem = state[1]
return "\n" .. processed_html .. "\n"
end
if tag == "OL" then
processed_html = processed_html:gsub("\n", "\n ")
self.MakeListItem = state[1]
self.numbered_entries = state[2]
return "\n" .. processed_html .. "\n"
end
if tag == "LI" then
return self:MakeListItem(processed_html)
end
return ""
end
local function closest_find(str, patterns)
local results = {}
for key, value in ipairs(patterns) do
table.insert(results, table.pack(string.find(str, value)))
end
table.sort(results, function(a,b) return (a[1] or 10000) < (b[1] or 100000) end)
return table.unpack(results[1])
end
function HTMLParser:ExtractAttributes(tag)
-- "a href='qwert'" => {href = "qwert"}
local name, rest = string.match(tag, "(%w+)%s+(.+)")
if not name then return string.upper(tag), {} end
local attributes = {}
while #rest > 0 do
local start_idx, end_idx, key, value = closest_find(rest, {"(%w+)%s*=%s*\"([^\"]*)\"", "(%w+)%s*=%s*\'([^\']*)\'", "(%w+)%s*=%s*([^%s]+)"})
if start_idx then
attributes[key] = value
rest = rest:sub(end_idx + 1)
else
break
end
end
return string.upper(name), attributes
end
function HTMLParser:Error(err)
self.errors = self.errors or {}
table.insert(self.errors, err)
end
function HTMLParser:CloseHTMLTag(tag_to_close, attributes, rest_of_text)
local pos = 0
local buffer = ""
local state = self:BeginTag(tag_to_close, attributes)
while pos < #rest_of_text do
local next_tag_start, next_tag_end = string.find(rest_of_text, "<[^>]+>", pos)
if not next_tag_start then
buffer = buffer .. self:HandleText(rest_of_text:sub(pos))
break
end
local tag = rest_of_text:sub(next_tag_start + 1, next_tag_end - 1)
local slashed, tag_str = string.match(tag, "^(/?)%s*(%w+).*$")
tag_str = tag_str and string.upper(tag_str)
buffer = buffer .. self:HandleText(rest_of_text:sub(pos, next_tag_start - 1))
if (slashed and #slashed > 0) or tag_str == "BR" then
if tag_str == tag_to_close then
buffer = self:EndTag(tag_str, attributes, state, rest_of_text:sub(1, next_tag_start-1), buffer)
pos = next_tag_end + 1
break
elseif tag_str == "BR" then
buffer = buffer .. self:EndTag(tag_str, {}, false, rest_of_text:sub(next_tag_start-1), "")
pos = next_tag_end + 1
else
self:Error("Expected " .. tag_to_close .. " found " .. tag)
pos = next_tag_end + 1
end
else
local new_tag_str, attributes = self:ExtractAttributes(tag)
local processed_html, next_pos = self:CloseHTMLTag(new_tag_str, attributes, rest_of_text:sub(next_tag_end + 1))
pos = next_pos + next_tag_end
buffer = buffer .. processed_html
end
end
return buffer, pos
end
function HTMLParser:ConvertText(input)
local r, g, b, a = GetRGBA(self.TextColor)
local text, reached_pos = self:CloseHTMLTag(nil, {}, input)
local final_text = string.format("<color %s %s %s %s>", r, g, b, a) .. text .. "</color>"
return final_text:gsub("[%s]*[\n]+", "\n"):gsub("</?br%s*/?>", "\n")
end
function ParseHTML(input, properties)
properties = properties and table.copy(properties) or {}
local parser = HTMLParser:new(properties)
return parser:ConvertText(input), parser.errors
end |