local export = {}
local append = require("Module:table").append
local codepoint = mw.ustring.codepoint
local concat = table.concat
local explode_utf8 = require("Module:string utilities").explode_utf8
local floor = math.floor
local format = string.format
local insert = table.insert
local lower = mw.ustring.lower
local pattern_escape = require("Module:string utilities").pattern_escape
local sortkey
local upper = mw.ustring.upper
local data = require("Module:User:Theknightwho/sortkey/serialized")
local UTF8_char = "*"
function export.sortkey(text)
if type(text) == "table" then
text = text.term.term
end
local chars = explode_utf8(text)
local function table_insert(t, v)
if v ~= 0 then
insert(t, v)
end
end
local primary = {}
local secondary = {}
local tertiary = {}
local function insert_weights(w1, w2, w3)
table_insert(primary, w1)
table_insert(secondary, w2)
table_insert(tertiary, w3)
end
for _, char in ipairs(chars) do
local cp = codepoint(char)
if (cp >= 0x17000 and cp <= 0x18AFF) or (cp >= 0x18D00 and cp <= 0x18D8F) then
insert_weights(0xFB00, 0x20, 2)
insert_weights((cp - 0x17000) % 0x8000 + 0x8000, 0, 0)
elseif cp >= 0x1B170 and cp <= 0x1B2FF then
insert_weights(0xFB01, 0x20, 2)
insert_weights((cp - 0x1B170) % 0x8000 + 0x8000, 0, 0)
elseif cp >= 0x18B00 and cp <= 0x18CFF then
insert_weights(0xFB02, 0x20, 2)
insert_weights((cp - 0x18B00) % 0x8000 + 0x8000, 0, 0)
elseif (cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0xF900 and cp <= 0xFAFF) then
insert_weights(0xFB40 + floor(cp / 0x8000), 0x20, 2)
insert_weights(cp % 0x8000 + 0x8000, 0, 0)
elseif (cp >= 0x3400 and cp <= 0x4DBF) or (cp >= 0x20000 and cp <= 0x2A6DF) or (cp >= 0x2A700 and cp <= 0x2EBEF) or (cp >= 0x30000 and cp <= 0x323AF) then
insert_weights(0xFB80 + floor(cp / 0x8000), 0x20, 2)
insert_weights(cp % 0x8000 + 0x8000, 0, 0)
else
if char == "\0" then
char = "%z"
end
local char_data = data:match("\255(" .. pattern_escape(char) .. "+)\255")
if not char_data then
insert_weights(0xFBC0 + floor(cp / 0x8000), 0x20, 2)
insert_weights(cp % 0x8000 + 0x8000, 0, 0)
else
for typ, w1, w2, w3 in char_data:gmatch("()(" .. UTF8_char .. ")(" .. UTF8_char .. ")(" .. UTF8_char .. ")") do
insert_weights(codepoint(w1), codepoint(w2), codepoint(w3))
end
end
end
end
local key = append(primary, secondary, tertiary)
for k, v in ipairs(key) do
key = format("%04x", v)
end
return concat(key)
end
sortkey = export.sortkey
local function cap_first_sortkey(k)
local upper_k, lower_k = upper(k), lower(k)
if upper_k == lower_k then
return sortkey(k)
elseif k == upper_k then
return sortkey(lower_k)
end
return sortkey(upper_k)
end
function export.sort(t, cap_first)
local memo, func = {}, cap_first and cap_first_sortkey or sortkey
require("Module:stable sort")(t, function(k1, k2)
local k1_memo, k2_memo = memo, memo
if k1_memo == nil then
k1_memo = func(k1)
memo = k1_memo
end
if k2_memo == nil then
k2_memo = func(k2)
memo = k2_memo
end
return k1_memo < k2_memo
end)
return t
end
return export