For testcases, see Module:Bopo-convert/testcases.
local export = {}
local Bopo2pin_initials = {
--labials
="b", ="p", ="m", ="f",
--dentals
="d", ="t", ="n", ="l",
--velars/gutturals
="g", ="k", ="h",
--palatals
="j", ="q", ="x",
--retroflex
="zh", ="ch", ="sh", ="r",
--dental sibilants
="z", ="c", ="s",
--other languages
="v", ="ng", ="gn",
}
local Bopo2pin_medials_naked = {
="y",
="w",
="yu",
}
local Bopo2pin_medials = {
="i",
="u",
="ü",
="a", ="o", ="e", ="e", --"ye" ?
}
local Bopo2pin_finals = {
="ai", ="i", ="ao", ="ou",
="an", ="n", ="ang", ="ng", ="r",
};
local Bopo2pin_tones = {
="\204\129", ="\204\140", ="\204\128", ="", = "\204\132"
}
function export.Bopo_to_pinyin(text)
if type(text) == "table" then text = text.args end
text = mw.ustring.gsub(text, '(?)(*)(?)(?)(ㄦ?)', function (initial, medial, final, tone, erhua)
-- should probably scan the string manually...
if (#initial + #medial + #final) == 0 then
return
end
mw.log('matched:', initial, medial, final, erhua)
initial = Bopo2pin_initials
final = Bopo2pin_finals
if medial == "" then
medial = nil
else
if (initial == "j") or (initial == "q") or (initial == "x") then
medial = mw.ustring.gsub(medial, '^ㄩ', "u")
elseif not initial then
medial = mw.ustring.gsub(medial, '^.', Bopo2pin_medials_naked)
end
medial = mw.ustring.gsub(medial, '.', Bopo2pin_medials)
end
if (final == "ng") or (final == "n") or (final == "i") or (final == "r") then
if not medial or (medial == "w") then
final = "e" .. final
elseif medial == "y" then
medial = "yi"
elseif (final == "ng") then
if medial == "u" then
medial = ((initial == "j") or (initial == "q") or (initial == "x")) and "io" or "o"
elseif medial == "yu" then
medial = "yo"
end
end
elseif (final == "ou") and (medial == "i") then
final = "u"
end
if not final then
if medial == "y" then
medial = "yi"
elseif medial == "w" then
medial = "wu"
elseif not medial then
medial = "i"
end
end
if erhua ~= "" then
final = (final or "") .. "r"
end
mw.log('initial conversion: ', initial, medial, final)
-- place the tone diacritic
local repls
final, repls = string.gsub(final or "", "^(.-)", "%1" .. Bopo2pin_tones)
if repls == 0 then
-- since all medials end with a vowel
medial = medial .. Bopo2pin_tones
end
return (initial or "") .. (medial or "") .. (final or "")
end)
return mw.ustring.toNFC(text)
end
return export