|
DefineClass.HTMLParser = { |
|
__parents = { "PropertyObject" }, |
|
|
|
properties = { |
|
{ category = "General", id = "TextColor", editor = "color", default = RGB(80, 80, 80) }, |
|
{ category = "General", id = "BoldColor", editor = "color", default = RGB(0, 0, 0) }, |
|
{ category = "General", id = "HyperlinkColor", editor = "color", default = RGB(0, 0, 238) }, |
|
|
|
{ category = "General", id = "HeadingFont1", editor = "text", default = "Heading1" }, |
|
{ category = "General", id = "HeadingFont2", editor = "text", default = "Heading2" }, |
|
{ category = "General", id = "HeadingFont3", editor = "text", default = "Heading3" }, |
|
{ category = "General", id = "HeadingFont4", editor = "text", default = "Heading4" }, |
|
}, |
|
|
|
numbered_entries = 0, |
|
errors = false, |
|
} |
|
|
|
function HTMLParser:MakeListItem(content) |
|
if #content > 0 then return "\n" .. content .. "\n" end |
|
return "" |
|
end |
|
|
|
function HTMLParser:HandleText(text) |
|
|
|
local text_pieces = { } |
|
while #text > 0 do |
|
local first, last, content = string.find(text, "&([#%w%d]+);") |
|
if not first or not last or #content == 0 then |
|
table.insert(text_pieces, text) |
|
break |
|
end |
|
|
|
if string.starts_with(content, '#') then |
|
|
|
local codepoint = tonumber(string.sub(content, 2)) |
|
if codepoint and 31 < codepoint and codepoint < 127 then |
|
table.insert(text_pieces, string.sub(text, 1, first - 1)) |
|
table.insert(text_pieces, string.char(codepoint)) |
|
else |
|
table.insert(text_pieces, string.sub(text, first, last)) |
|
end |
|
else |
|
local char |
|
if content == "lt" then char = '<' end |
|
if content == "gt" then char = '>' end |
|
if content == "amp" then char = '&' end |
|
if content == "nbsp" then char = ' ' end |
|
|
|
if char then |
|
table.insert(text_pieces, string.sub(text, 1, first - 1)) |
|
table.insert(text_pieces, char) |
|
else |
|
table.insert(text_pieces, string.sub(text, first, last)) |
|
end |
|
end |
|
|
|
text = string.sub(text, last + 1) |
|
end |
|
text = table.concat(text_pieces, "") |
|
|
|
|
|
text = text:gsub("%s+", " ") |
|
|
|
return text |
|
end |
|
|
|
function HTMLParser:BeginTag(tag, attributes, state) |
|
if tag == "UL" then |
|
local old_state = {self.MakeListItem} |
|
self.MakeListItem = function(self, content) return "\n•" .. content end |
|
return old_state |
|
end |
|
if tag == "OL" then |
|
local old_state = {self.MakeListItem, self.numbered_entries} |
|
self.MakeListItem = function(self, content) |
|
self.numbered_entries = self.numbered_entries + 1 |
|
return string.format("\n%s. %s", self.numbered_entries, content) |
|
end |
|
return old_state |
|
end |
|
end |
|
|
|
function HTMLParser:EndTag(tag, attributes, state, original_inner_html, processed_html) |
|
local level = string.match(tag, "H(%d+)") |
|
if level then |
|
level = tonumber(level) |
|
if not level or level < 1 or level > 4 then level = 1 end |
|
|
|
local fontstyle = self["HeadingFont" .. level] |
|
local r, g, b, a = GetRGBA(self.BoldColor) |
|
return string.format("\n<style %s><color %s %s %s %s>", fontstyle, r, g, b, a) .. processed_html .. "</color></style>\n" |
|
end |
|
|
|
if tag == "P" then |
|
return "\n" .. processed_html:gsub("\n", "") .. "\n" |
|
end |
|
|
|
if tag == "BR" then |
|
return "</br>" |
|
end |
|
|
|
if tag == "STRONG" or tag == "B" then |
|
local r, g, b, a = GetRGBA(self.BoldColor) |
|
return string.format("<color %s %s %s %s>", r, g, b, a) .. processed_html .. "</color>" |
|
end |
|
|
|
if tag == "A" then |
|
local r, g, b, a = GetRGBA(self.HyperlinkColor) |
|
local link_ref = attributes.href |
|
if not link_ref then return "" end |
|
link_ref = link_ref:gsub(" ", "+") |
|
if (processed_html or "") ~= "" then |
|
return string.format("%s [%s]", processed_html, link_ref) |
|
else |
|
return link_ref |
|
end |
|
|
|
end |
|
|
|
if tag == "UL" then |
|
processed_html = processed_html:gsub("\n", "\n ") |
|
self.MakeListItem = state[1] |
|
return "\n" .. processed_html .. "\n" |
|
end |
|
|
|
if tag == "OL" then |
|
processed_html = processed_html:gsub("\n", "\n ") |
|
self.MakeListItem = state[1] |
|
self.numbered_entries = state[2] |
|
return "\n" .. processed_html .. "\n" |
|
end |
|
|
|
if tag == "LI" then |
|
return self:MakeListItem(processed_html) |
|
end |
|
|
|
return "" |
|
end |
|
|
|
local function closest_find(str, patterns) |
|
local results = {} |
|
for key, value in ipairs(patterns) do |
|
table.insert(results, table.pack(string.find(str, value))) |
|
end |
|
|
|
table.sort(results, function(a,b) return (a[1] or 10000) < (b[1] or 100000) end) |
|
return table.unpack(results[1]) |
|
end |
|
|
|
function HTMLParser:ExtractAttributes(tag) |
|
|
|
local name, rest = string.match(tag, "(%w+)%s+(.+)") |
|
if not name then return string.upper(tag), {} end |
|
|
|
local attributes = {} |
|
while #rest > 0 do |
|
local start_idx, end_idx, key, value = closest_find(rest, {"(%w+)%s*=%s*\"([^\"]*)\"", "(%w+)%s*=%s*\'([^\']*)\'", "(%w+)%s*=%s*([^%s]+)"}) |
|
|
|
if start_idx then |
|
attributes[key] = value |
|
rest = rest:sub(end_idx + 1) |
|
else |
|
break |
|
end |
|
end |
|
return string.upper(name), attributes |
|
end |
|
|
|
function HTMLParser:Error(err) |
|
self.errors = self.errors or {} |
|
table.insert(self.errors, err) |
|
end |
|
|
|
function HTMLParser:CloseHTMLTag(tag_to_close, attributes, rest_of_text) |
|
local pos = 0 |
|
local buffer = "" |
|
local state = self:BeginTag(tag_to_close, attributes) |
|
while pos < #rest_of_text do |
|
local next_tag_start, next_tag_end = string.find(rest_of_text, "<[^>]+>", pos) |
|
if not next_tag_start then |
|
buffer = buffer .. self:HandleText(rest_of_text:sub(pos)) |
|
break |
|
end |
|
local tag = rest_of_text:sub(next_tag_start + 1, next_tag_end - 1) |
|
local slashed, tag_str = string.match(tag, "^(/?)%s*(%w+).*$") |
|
tag_str = tag_str and string.upper(tag_str) |
|
|
|
buffer = buffer .. self:HandleText(rest_of_text:sub(pos, next_tag_start - 1)) |
|
if (slashed and #slashed > 0) or tag_str == "BR" then |
|
if tag_str == tag_to_close then |
|
buffer = self:EndTag(tag_str, attributes, state, rest_of_text:sub(1, next_tag_start-1), buffer) |
|
pos = next_tag_end + 1 |
|
break |
|
elseif tag_str == "BR" then |
|
buffer = buffer .. self:EndTag(tag_str, {}, false, rest_of_text:sub(next_tag_start-1), "") |
|
pos = next_tag_end + 1 |
|
else |
|
self:Error("Expected " .. tag_to_close .. " found " .. tag) |
|
pos = next_tag_end + 1 |
|
end |
|
else |
|
local new_tag_str, attributes = self:ExtractAttributes(tag) |
|
local processed_html, next_pos = self:CloseHTMLTag(new_tag_str, attributes, rest_of_text:sub(next_tag_end + 1)) |
|
pos = next_pos + next_tag_end |
|
buffer = buffer .. processed_html |
|
end |
|
|
|
end |
|
return buffer, pos |
|
end |
|
|
|
function HTMLParser:ConvertText(input) |
|
local r, g, b, a = GetRGBA(self.TextColor) |
|
local text, reached_pos = self:CloseHTMLTag(nil, {}, input) |
|
local final_text = string.format("<color %s %s %s %s>", r, g, b, a) .. text .. "</color>" |
|
return final_text:gsub("[%s]*[\n]+", "\n"):gsub("</?br%s*/?>", "\n") |
|
end |
|
|
|
function ParseHTML(input, properties) |
|
properties = properties and table.copy(properties) or {} |
|
local parser = HTMLParser:new(properties) |
|
return parser:ConvertText(input), parser.errors |
|
end |