local load_module = "Module:load"
local string_char_module = "Module:string/char"
local find = string.find
local gsub = string.gsub
local match = string.match
local require = require
local tonumber = tonumber
local function u(...)
u = require(string_char_module)
return u(...)
end
local entities
local function get_entities()
entities, get_entities = require(load_module).load_data("Module:data/entities"), nil
return entities
end
local function decode_entity(hash, x, code)
-- "#" isn't included in "", so if no "#" is found then it's a
-- a named entity or a false match.
if hash == "" then
return (entities or get_entities())
end
-- Exclude numbers that don't fit the expected format.
local cp
if x == "" then
cp = match(code, "^()%d+$") and tonumber(code)
else
cp = match(code, "^()%x+$") and tonumber(code, 16)
end
-- Exclude surrogates (U+D800 to U+DFFF) and codepoints that are too high.
return cp and (
cp <= 0xD7FF or
cp >= 0xE000 and cp <= 0x10FFFF
) and u(cp) or nil
end
return function(str)
-- As an optimisation, only do a full search with gsub() if plain searches
-- for "&" and ";" find anything.
local amp = find(str, "&", nil, true)
-- Search for ";" after the point "&" was found.
return amp and find(str, ";", amp + 1, true) and
-- Non-ASCII characters aren't valid in proper HTML named entities, but
-- MediaWiki uses them in some nonstandard aliases (which have also been
-- included in ]), so include them anyway.
gsub(str, "&(#?)(?)(+);", decode_entity) or
str
end