Reimplementation of mw.ustring.char
in pure Lua, which is faster up to around 15 arguments, and much faster if given only one argument.
local math_module = "Module:math"
local char = string.char
local error = error
local format = string.format
local pcall = pcall
local select = select
local tonumber = tonumber
local type = type
local function to_hex(...)
to_hex = require(math_module).to_hex
return to_hex(...)
end
local function codepoint_err(cp, i)
-- Throw error: to_hex can only return integers, so only show the bad value
-- if it can be converted into something that looks like a codepoint.
local success, result = pcall(to_hex, cp, true)
error(format(
"bad argument #%d to 'string/char' (codepoint between 0x0 and 0x10FFFF expected%s)",
i, success and "; got " .. result or ""),
i + 3)
end
local function utf8_char(n, i, v, ...)
local cp = tonumber(v)
if cp == nil then
error(format("bad argument #%d to 'char' (number expected; got %s)", i, type(v)), i + 2)
elseif cp < 0 then
codepoint_err(cp, i)
elseif cp < 0x80 then
if i == n then
return cp
end
return cp, utf8_char(n, i + 1, ...)
elseif cp < 0x800 then
if i == n then
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40
end
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
elseif cp < 0x10000 then
-- Don't return "?" for surrogates, like mw.ustring.char does, as they
-- have legitimate uses (e.g. in JSON).
if i == n then
return 0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
return 0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
elseif cp < 0x110000 then
if i == n then
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
end
codepoint_err(cp, i)
end
return function(...)
local n = select("#", ...)
if n ~= 0 then
return char(utf8_char(n, 1, ...))
end
end