This module does romanisation conversion, IPA conversion, etc. for Nanjing Mandarin. See {{zh-pron}}
.
-- Nanjing mandarin
local export = {}
-- see the encoding below
local initials = {
b = "p", p = "pʰ", m = "m", f = "f",
d = "t", t = "tʰ", l = "l",
g = "k", k = "kʰ", h = "x",
j = "t͡ɕ", q = "t͡ɕʰ", x = "ɕ",
Z = "ʈ͡ʂ", C = "ʈ͡ʂʰ", S = "ʂ", r = "ʐ",
z = "t͡s", c = "t͡sʰ", s = "s",
= "",
}
-- see the encoding below (U=ü, N=ng)
local finals = {
ii = "z̩", iU = "ʐ̩", i = "i", u = "u", U = "y",
a = "a", ia = "ia", ua = "ua",
o = "o",
e = "e", E = "ə", ie = "ie", Ue = "ye",
ai = "ɛ", iai = "iɛ", uai = "uɛ",
ei = "əi", ui = "uəi",
ao = "ɔ", iao = "iɔ",
ou = "əɯ", iu = "iəɯ",
an = "ã", ian = "iã", uan = "uã",
ien = "iẽ", Uen = "yẽ",
en = "ə̃", = "ĩ", un = "uə̃", Un = "yĩ",
on = "oŋ", ion = "ioŋ",
iUq = "ʐ̩ʔ", iq = "iʔ", uq = "uʔ", Uq = "yʔ",
aq = "aʔ", iaq = "iaʔ", uaq = "uaʔ",
eq = "əʔ", ieq = "ieʔ", ueq = "ueʔ", Ueq = "yeʔ",
oq = "oʔ", ioq = "ioʔ",
er = "ər", ir = "iər", ur = "uər", Ur = "yər",
ar = "ar", iar = "iar", uar = "uar",
= "or", ior = "ior",
ier = "ier",
air = "ɛr", iair = "iɛr", uair = "uɛr",
aor = "ɔr", iaor = "iɔr",
anr = "ãr", ianr = "iãr", uanr = "uãr",
enr = "ɵ̃r",
m = "m̩", n = "n̩", N = "ŋ̍",
}
local tones = {
= "31", --陰平(T1)
= "24", --陽平(T2)
= "11", --上(T3)
= "44", --去(T4)
= "5", --入(T5)
= "", -- toneless (T0)
="33",
="11", ="11",
="12", ="12",
="42",
="3",
}
local function tone_superscript(text)
text = text:gsub("",{='¹',='²',='³',='⁴',='⁵'})
return text
end
local tone_sandhi_num = {
="4",
="3", ="3",
="2", ="2",
="1",
}
-- internal use, encode and decode digraphs
local digraph_encode = {
ng = "N", zh = "Z", ch = "C", sh = "S",
= "1",
= "2",
= "3",
= "4",
= "5",
}
local digraph_decode = {
N = "ng", Z = "zh", C = "ch", S = "sh", U = "ü",
= "",
= "\204\128",
= "\204\129",
= "\204\140",
= "\204\132",
= "\204\138",
= '<span style="background-color:#F5DEB3">',
= "</span>",
}
local function encode(text)
text = mw.ustring.toNFD(text)
:gsub("",function(c) return "^"..c:lower() end)
:gsub("u\204\136","U")
:gsub("",digraph_encode)
:gsub("n()g","N%1")
return text
end
local function decode_error(text)
text = text:gsub("",digraph_decode)
return text
end
local function decode(text)
text = text
:gsub("N()","n%1g")
:gsub("",digraph_decode)
:gsub("%^()",string.upper)
return mw.ustring.toNFC(text)
end
-- check that the text is a valid input e.g. ^lan2jin1 ^beq5hua4
local function check_syllable_format(text)
local check = text:gsub("???*?r?","")
if check ~= "" then error("Nanjing: Invalid syllable(s): "..check) end
end
-- inverse of py_divide_syllables
local function py_join_syllables(text)
text = text
:gsub("(?)(*?r?)()", function(a,b,c)
local d,e = b:match("^(?)(%l*)$")
if d then
return "'"..a..d..(c~="0" and c or "")..e
else
return "'"..a..b..(c~="0" and c or "")
end
end)
:gsub("'()","%1")
:gsub("%f(6?)'","%1")
return decode(text)
end
-- Lánjìn Be̊qhuā --> ^lan2jin1 ^beq5hua4
local function py_divide_syllables(text)
local res = encode(text)
:gsub("()N%f","%1n'g")
:gsub("'?()","'%1")
:gsub("'?(??)(?)(*?r?)",
function(a,b,c) return a..c..(b~="" and b or "0") end)
check_syllable_format(res)
local check = py_join_syllables(res)
if text ~= check then error("Nanjing: input should be "..check) end
return res
end
local function py_numbered(text)
text = text:gsub("","<sup>%0</sup>")
:gsub("",digraph_decode)
return text
end
-- canonize to adhere to pinyin rules, e.g. jü -> ju
local function py_canonize(text)
text = text
:gsub("()U","%1u")
:gsub("%fu?",{u="w",ui="wei",un="wen"})
:gsub("%fw%f","wu")
:gsub("%fi?",{i="y",iu="you",iU="rii"})
:gsub("%fy%f","yi")
:gsub("iU","ii")
:gsub("%fU","yu")
:gsub("()i%f","%1ii") -- give error for zhi
:gsub("E","e")
return text
end
-- normalize to initial+final, e.g. ju -> jü
local function py_normalize(text)
local res = text
:gsub("()u","%1U")
:gsub("w?",{wu="u",wei="ui",wen="un"})
:gsub("w","u")
:gsub("%fyu?",{yi="i",yu="U",you="iu"})
:gsub("%fy","i")
:gsub("()ii","%1iU")
:gsub("riU%f","iU")
:gsub("()e0","%1E0")
local check = py_canonize(res)
if text ~= check then
error("Nanjing: "..decode_error(text).." should be "..decode_error(check))
end
return res
end
local function py_to_ipa(text)
text = text:gsub("+",function(syllable)
local a,b,c,d = syllable:match("^(?)(*?r?)()(?)$")
if not a then error("Nanjing: Invalid syllable: " .. decode_error(syllable)) end
local e = d~="" and tones
return (initials or error("Nanjing: Invalid initial: " .. decode_error(a)))
.. (finals or error ("Nanjing: Invalid final: " .. decode_error(b)))
.. tones
.. (e and ("⁻"..e) or "")
end)
return "/" .. text .. "/"
end
-- returns (display_text, phonetic_text, ipa)
function export.py_process(text)
local conv_display = {}
local conv_hidden = {}
local conv_numbered = {}
local conv_ipa = {}
local i = 0
for reading in mw.text.gsplit(text,"/",true) do
i = i + 1
reading = py_divide_syllables(reading)
conv_display = py_join_syllables(reading:gsub(">(+)","<sup>→%1</sup>"))
local original = reading:gsub("(+)>+","%1")
local phonetic = reading:gsub("+>(+)","6%17")
phonetic = phonetic:gsub("%^","")
reading = phonetic:gsub("%f(7?6?+)()","%2%1%2")
phonetic = reading:gsub("(+)()()",function(a,b,c)
local d = tone_sandhi_num
return d and ('6'..a..d..'7') or (a..b)
end)
phonetic = phonetic:gsub("(+)r3","6%1r27")
phonetic = phonetic:gsub("6+","6"):gsub("7+","7")
reading = reading:gsub("(+)r3","%1r2"):gsub("r2","r2")
local original_num = original:gsub("() ?","%1 "):gsub(" $",""):gsub("%^","")
local phonetic_num = phonetic:gsub("(7?) ?","%1 "):gsub(" $","")
if phonetic:find("6") then
conv_hidden = py_join_syllables(original) .. " "
conv_numbered = py_numbered(original_num) .. " "
else
conv_hidden = py_join_syllables(original)
conv_numbered = py_numbered(original_num)
end
reading = reading:gsub("",""):gsub("(?) ?","%1 "):gsub(" $","")
reading = py_normalize(reading)
conv_ipa = py_to_ipa(reading)
end
return table.concat(conv_display, " / "),
table.concat(conv_hidden, " / "),
table.concat(conv_numbered, " / "),
tone_superscript(table.concat(conv_ipa, ", "))
end
return export