local export = {}
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = mw.ustring.char
local ugsub = mw.ustring.gsub
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local grave = u(0x0300)
local acute = u(0x0301)
local tilde = u(0x0303)
local macron = u(0x0304)
local dotabove = u(0x0307)
local caron = u(0x030C)
local ogonek = u(0x0328)
local accents = ""
local dotless_to_dotted = {
= "i",
= "j",
}
local function char_to_dotted_form(base, below)
return (dotless_to_dotted or base) .. below
end
local function dots_to_entryname_form(text)
-- Remove any dots above, and convert dotless forms to dotted.
return (ugsub(text, "()(" .. ogonek .. "?)" .. dotabove, char_to_dotted_form))
end
local function char_to_accent_form(base, below)
-- Add a 'dot above' after the base.
if base == "i" or base == "j" then
return base .. below .. dotabove
end
-- Convert any dotless chars combining with accents to the dotted form, so
-- that they normalize properly. This shouldn't happen, but just in case.
return char_to_dotted_form(base, below)
end
function export.makeDisplayText(text, lang, sc)
-- Normalize any dots to the entryname form (while retaining accents).
text = dots_to_entryname_form(toNFD(text))
-- Add a 'dot above' between "i" or "j" and an accent.
text = ugsub(text, "()(" .. ogonek .. "?)%f" .. accents, char_to_accent_form)
return toNFC(text)
end
local function entryname_form(text)
-- Remove accents.
text = ugsub(toNFD(text), accents .. "+", "")
-- Normalize dots.
return dots_to_entryname_form(text)
end
function export.makeEntryName(text, lang, sc)
return toNFC(entryname_form(text))
end
local sortkey_substitutes = {
= u(0xF000),
= u(0xF001),
= u(0xF002),
= u(0xF003),
= "i" .. u(0xF004),
}
function export.makeSortKey(text, lang, sc)
-- Normalize to the entryname form.
text = entryname_form(ulower(text))
:gsub(".*", sortkey_substitutes)
return toNFC(uupper(text))
end
return export