local export = {}
-- internal encoding using
export.encode = {
= "A",
= "B",
= "C",
= "D",
= "E",
= "G",
= "I",
= "N",
= "O",
= "S",
= "T",
= "Q",
}
export.decode = {
A = "ã",
B = "ɓ",
C = "cʼ",
D = "ɗ",
E = "ɛ",
G = "ɠ",
I = "ĩ",
N = "ɲ",
O = "ɔ",
S = "sh",
T = "tʼ",
Q = "ʔ",
}
export.sortkey = {
A = "a",
B = "b",
C = "c",
D = "d",
E = "e",
G = "g",
I = "i",
N = "n",
O = "o",
S = "sh",
T = "t",
Q = "ʔ",
}
-- parse a word into syllables using the internal encoding
-- returns a table with extra info, e.g. "Wucʼê" becomes:
-- { "wu", "Ce", accent=2, cap=true, falling=true }
function export.syllabify(word)
word = mw.ustring.toNFD(word)
local lowered = word:ulower()
local cap = lowered ~= word
word = lowered
local accent = word:match('\204')
local _, count = word:gsub('\204','')
if count > 1 then error("More than one diacritic found.") end
word = word:gsub("ʼ",export.encode)
if word:match("ʼ") then error("Uncoupled ʼ found.") end
word = word:gsub("sh","S")
word = word:gsub("",export.encode)
word = word:gsub("",".%0")
:gsub("()(\204)","%1.%2") -- e.g. tiá -> ti.á
:gsub("^%.","")
:gsub("%.%.+",".")
local syllables = mw.text.split(word,".",true)
local accented = 0
for i,syl in ipairs(syllables) do
syllables,count = syl:gsub("\204","")
if count == 1 then
accented = i
break
end
end
syllables.accent = accented
syllables.cap = cap
syllables.falling = accent == "\204\130"
return syllables
end
-- inverse of export.syllabify
function export.combine(syllables)
local a,c,f = syllables.accent, syllables.cap, syllables.falling
local diacritic = f and "\204\130" or "\204\129"
local word = "" -- do not use table.concat to avoid modifying input
for i,syl in ipairs(syllables) do
if i == a then
syl = syl:gsub("","%0"..diacritic,1)
end
word = word .. syl
end
word = word:gsub("",export.decode)
if c then
word = word:gsub("^*",string.uupper,1)
end
return mw.ustring.toNFC(word)
end
-- generates the sort key for categorization
-- wucʼê --> wuce2'
-- (2: accent on second syllable)
-- (apostrophe at the end: falling tone)
function export.makeSortKey(text, lang, sc)
if lang ~= "amf" or sc ~= "Latn" then
require("Module:debug").track("amf-utilities/sort")
return text
end
words = mw.text.split(text, " ", true)
for i,word in ipairs(words) do
local success, syllables = pcall(export.syllabify,word)
if success then
words = table.concat(syllables):gsub("",export.sortkey)
.. syllables.accent
.. (syllables.falling and "'" or "")
else
require("Module:debug").track("amf-utilities/sort")
end
end
return table.concat(words, " ")
end
return export