local export = {}
local m_string_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_data = require("Module:cpx-pron/data")
local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC
local SPECIAL_MARKERS = {
NO_ASSIMILATION = "*",
NO_SANDHI = "#",
MANUAL_CHANGE = ">",
CAPITALIZATION = "^",
SPACE_AFTER = "\\"
}
local FORMAT_MODES = {
BRIEF = "brief",
COMPLETE = "complete",
DEMO = "demo"
}
local dialects = {
pt = "]",
nr = "]",
jk = "]",
xy = "]",
ft = "]",
yy = "]"
}
local initials = {
pt = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "ɬ",
= "k", = "kʰ", = "ŋ", = "h",
= ""
},
jk = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "ɬ",
= "k", = "kʰ", = "ŋ", = "h",
= ""
},
nr = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "ɬ",
= "k", = "kʰ", = "ŋ", = "h",
= ""
},
xy = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "ɬ",
= "k", = "kʰ", = "ŋ", = "h",
= "",
= "β",
},
yy = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "θ",
= "k", = "kʰ", = "ŋ", = "h",
= "",
= "β",
},
ft = {
= "p", = "pʰ", = "m",
= "t", = "tʰ", = "n", = "l",
= "t͡s", = "t͡sʰ", = "ɬ",
= "k", = "kʰ", = "ŋ", = "h",
= "",
= "β",
},
}
local finals = {
pt = {
= "a", = "ɛ", = "e", = "i", = "o",
= "ø", = "ɒ", = "u", = "y",
= "ai", = "au", = "ia", = "ieu", = "iu",
= "ɔu", = "ua", = "uei", = "ui", = "yɒ",
= "aŋ", = "ɒŋ", = "ɛŋ", = "œŋ", = "ɔŋ",
= "iŋ", = "iæŋ", = "uŋ", = "uaŋ", = "yŋ",
= "yɒŋ", = "ŋ̍",
= "aʔ", = "ɒʔ", = "ɛʔ", = "œʔ", = "ɔʔ",
= "iʔ", = "iæʔ", = "uʔ", = "uaʔ", = "iɛʔ",
= "uoʔ", = "yʔ", = "yɒʔ",
},
jk = {
= "a", = "e", = "ɛ", = "ø", = "œ",
= "ɒ", = "o", = "i", = "u", = "y",
= "ie", = "iɐu", = "iu", = "ai", = "au",
= "ou", = "uo", = "uɐi", = "ui", = "yø",
= "aŋ", = "ɛŋ", = "iŋ", = "uŋ", = "iɛŋ",
= "ɒŋ", = "œŋ", = "uoŋ", = "ŋ",
= "aʔ", = "ɛʔ", = "eʔ", = "oʔ", = "ɒʔ",
= "œʔ", = "uoʔ"
},
nr = {
= "a", = "e", = "ø", = "ɒ", = "o",
= "i", = "u", = "y", = "ia", = "ieu",
= "iu", = "ai", = "au", = "ɔ", = "ua",
= "uei", = "ui",
= "aŋ", = "ᴇŋ", = "iŋ", = "oŋ",
= "ɒŋ", = "œŋ", = "uəŋ", = "yŋ", = "ŋ",
= "aʔ", = "ᴇʔ", = "iʔ", = "oʔ", = "ɒʔ",
= "œʔ", = "uəʔ", = "yʔ"
},
xy = {
= "a", = "ɛ", = "e", = "i", = "o",
= "ø", = "ɒ", = "u", = "y",
= "ai", = "au", = "ia", = "ieu", = "iu",
= "ɔu", = "ua", = "uei", = "ui", = "ya",
= "aŋ", = "ɒŋ", = "ɛŋ",
= "iŋ", = "iɛŋ", = "yŋ",
= "yøŋ", = "uoŋ", = "ŋ̍",
= "aʔ", = "ɒʔ", = "ɛʔ",
= "iʔ", = "iɛʔ", = "uʔ",
= "uoʔ", = "yʔ", = "yøʔ",
= "iaʔ", = "uaʔ", -- iah, uah only for 代詞促調
= "ã", = "ĩ", = "ỹ", = "ɒ̃", = "ãĩ",
= "ãũ", = "ĩã", = "ĩũ", = "ũã", = "ũĩ",
= "ỹã",
},
yy = {
= "a", = "e", = "ø", = "ɒ", = "o",
= "i", = "u", = "y", = "ia", = "iəu",
= "iu", = "ai", = "au", = "ou", = "ua",
= "uai", = "oi", = "ui", = "ya",
= "aŋ", = "ɛŋ", = "iŋ", = "oŋ", = "uŋ",
= "iɛŋ", = "ɒŋ", = "œŋ", = "uaŋ",
= "yɐŋ", = "yŋ", = "ŋ",
= "aʔ", = "ɛʔ", = "iʔ", = "oʔ", = "ɒʔ",
= "œʔ", = "iɛʔ", = "uaʔ", = "yɐʔ", = "yʔ",
= "ã", = "ẽ", = "ø̃", = "ɒ̃",
= "ĩã", = "ĩũ", = "ũã", = "ũĩ", = "ỹã"
},
ft = {
= "a", = "e", = "ɒ", = "ɤ", = "i",
= "u", = "ia", = "iəu", = "iu", = "ai",
= "au", = "ou", = "ua", = "uei", = "ui",
= "aŋ", = "ɛŋ", = "iŋ", = "ɒŋ",
= "ieŋ", = "ɯəŋ", = "ŋ",
= "aʔ", = "ɛʔ", = "iʔ", = "ɒʔ",
= "ieʔ", = "ɯəʔ",
= "ã", = "ĩ", = "ɒ̃",
= "ĩã", = "ĩũ", = "ũã", = "ũĩ"
}
}
-- 1 陰平 | 2 陽平 | 3 上聲 | 4 陰去 | 5 陽去 | 6A 陰入甲 | 6B 陰入乙 | 7A 陽入甲 | 7B 陽入乙
-- S1: variant of 1 in non-final position
-- S3: 代詞促調, sounds like 上聲 in both Putian and Xianyou after tone sandhi
local tones = {
pt = {
= "⁵³³", = "¹³", = "⁴⁵³", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "¹³",
= "⁵⁵", = "³²", = "⁴⁵"
},
jk = {
= "⁵³³", = "¹³", = "⁴⁵³", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "⁴⁵³",
= "⁵⁵", = "³²", = "⁴⁵"
},
nr = {
= "⁵³³", = "¹³", = "⁴⁵³", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "¹³",
= "⁵⁵", = "³²", = "⁴⁵"
},
xy = {
= "⁵³³", = "¹³", = "³³²", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "¹³",
= "⁵⁵", = "³²"
},
yy = {
= "⁵³³", = "¹³", = "³³²", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "¹³",
= "⁵⁵", = "³²"
},
ft = {
= "⁵³³", = "¹³", = "³³²", = "⁴²",
= "²¹", = "¹", = "²¹", = "⁴", = "¹³",
= "⁵⁵", = "³²"
},
}
local corrections = {
common = {
= "ao", = "ieo", = "ieo"
},
pt = {
= "ieo", = "yor", = "uei", = "uei",
= "yorh", = "yorng",
},
jk = {
= "ie", = "uo", = "uai", = "uai",
= "oe", = "oe", = "yoe",
},
nr = {
= "ieo", = "oo", = "uei", = "ua",
= "ua", = "ng", = "uerng", = "uerh",
},
xy = {
= "ieo", = "uei", = "yoeh",
= "yoeng", = "ieng", = "ng",
},
yy = {
= "ieo", = "oi", -- or "uai"
= "oi", -- or "uai"
},
ft = {
= "er", = "ieo", = "uei", = "ia",
= "ng", = "uerng", = "uerng", = "uerh",
}
}
local function handle_nasalization(final)
local has_old_nasal = final:match("ⁿ$")
local has_new_nasal = final:match("nn$")
local base_final = has_old_nasal and final:gsub("ⁿ$", "") or
has_new_nasal and final:gsub("nn$", "") or
final
return base_final, has_old_nasal, has_new_nasal
end
local sandhi_rules = {
pt = {
= {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
= {="S1", ="4", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="4"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
},
jk = {
= {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
= {="S1", ="4", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="4"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
},
nr = {
= {="5", ="5", ="5", ="5", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
},
xy = {
= {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
},
yy = {
= {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
},
ft = {
= {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
= {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
= {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
= {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
= {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
}
}
local initial_assimilation_rules = {
pt = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
glottal_final = {}, -- remain unchanged
other_final = {
= "", = "",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
jk = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
glottal_final = {},
other_final = {
= "", = "",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
nr = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
glottal_final = {},
other_final = {
= "", = "",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
xy = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
nasalized_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "", = "", = "",
= "ng",
= ""
},
glottal_final = {},
other_final = {
= "w", = "w",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
yy = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
nasalized_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "", = "", = "",
= "ng",
= ""
},
glottal_final = {},
other_final = {
= "w", = "w",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
ft = {
nasal_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "ng", = "ng", = "ng", = "ng", = "ng"
},
nasalized_final = {
= "m", = "m", = "m",
= "n", = "n", = "n", = "n", = "n", = "n", = "n",
= "", = "", = "",
= "ng",
= ""
},
glottal_final = {}, -- remain unchanged
other_final = {
= "w", = "w",
= "m", = "n", = "l", = "ng",
= "l", = "l", = "l", = "l", = "l",
= "", = "", = "", = ""
}
},
}
local buc_initials = {
= "b",
= "p",
= "m",
= "d",
= "t",
= "n",
= "l",
= "c",
= "ch",
= "s",
= "g",
= "k",
= "ng",
= "h",
= ""
}
local buc_finals = {
= {{"a", 1}, {"aⁿ", 1}, {"ah", 1}},
= {{"e", 1}},
= {{"ah", 1}},
= {{"ai", 1}},
= {{"ang", 1}},
= {{"au", 1}},
= {{"a̤", 1}, {"a̤ⁿ", 1}, {"a̤h", 1}},
= {{"eh", 1}},
= {{"eng", 1}},
= {{"i", 1}, {"ih", 1}},
= {{"ia", 2}, {"iaⁿ", 2}, {"iah", 2}},
= {{"iah", 2}},
= {{"iah", 2}},
= {{"iang", 2}},
= {{"a̤u", 2}, {"a̤uⁿ", 2}, {"a̤uh", 2}}, -- on `u`
= {{"ih", 1}},
= {{"ing", 1}},
= {{"iu", 2}},
= {{"ng", 1}}, -- actually in the middle of `n` and `g`
= {{"eo", 2}, {"eoh", 2}},
= {{"e̤", 1}, {"e̤ⁿ", 1}},
= {{"e̤h", 1}},
= {{"e̤ng", 1}},
= {{"eoh", 2}},
= {{"eong", 2}},
= {{"o̤", 1}, {"o̤ⁿ", 1}, {"o̤h", 1}},
= {{"o̤h", 1}},
= {{"o̤ng", 1}},
= {{"o", 1}},
= {{"u", 1}},
= {{"ua", 2}, {"uaⁿ", 2}, {"uah", 2}},
= {{"uah", 2}},
= {{"uang", 2}},
= {{"oi", 1}, {"uai", 2}, {"oiⁿ", 1}, {"oih", 1}}, -- on `o`
= {{"uh", 1}},
= {{"ui", 1}}, -- on `u`
= {{"ng", 1}}, -- actually in the middle of `n` and `g`
= {{"ṳ", 1}},
= {{"ṳh", 1}},
= {{"ṳng", 1}},
= {{"io̤", 2}, {"io̤ⁿ", 2}, {"io̤h", 2}},
= {{"io̤h", 2}},
= {{"io̤ng", 2}}
}
local buc_tones = {
= "", -- 陰平 null
= "́", -- 陽平 u+0301
= "̂", -- 上聲 u+0302
= "̍", -- 陰去 u+030D
= "̄", -- 陽去 u+0304
= "", -- 陰入甲 -h
= "̄", -- 陰入乙
= "̍", -- 陽入甲 -h + u+030D
= "̍", -- 陽入乙 -h + u+030D
}
local function split_dialect_codes(code_string)
local codes = {}
for code in code_string:gmatch("+") do
-- Validate dialect code
if not dialects then
error("Unsupported dialect: " .. code)
end
table.insert(codes, code)
end
return codes
end
local function get_syllable_markers(syllable)
local markers = {
capitalize = false,
space_after = false,
comma_after = false,
manual_buc = nil
}
if syllable:sub(1, 1) == SPECIAL_MARKERS.CAPITALIZATION then
markers.capitalize = true
syllable = syllable:sub(2)
end
if syllable:sub(-1) == SPECIAL_MARKERS.SPACE_AFTER then
markers.space_after = true
syllable = syllable:sub(1, -2)
end
if syllable:sub(-1) == "," then
markers.comma_after = true
syllable = syllable:sub(1, -2)
end
-- Check manual BUC
local manual_start, manual_end = syllable:find("{+}")
if manual_start then
markers.manual_buc = syllable:sub(manual_start + 1, manual_end - 1)
syllable = syllable:sub(1, manual_start - 1) .. syllable:sub(manual_end + 1)
end
return markers, syllable
end
local function split_initial_final(options)
if not options or not options.form then
error("split_initial_final: form is required")
end
local form = options.form
local initial, final
if form == "ng" then
initial, final = "", form
elseif form:sub(1, 2) == "ng" and #form > 2 then
initial, final = "ng", form:sub(3)
else
initial = form:match("^h?") or ""
final = form:sub(#initial + 1)
end
if not final or final == "" then
error("Invalid form: " .. form .. " (unable to extract final)")
end
return initial, final
end
-- Phonological rule application functions
local function get_final_type(options)
if not options or type(options) ~= "table" then
error("get_final_type: options must be a table")
end
local initial = options.initial
local final = options.final
local dialect = options.dialect
if not final then
error("get_final_type: final cannot be nil")
end
if sub(final, -2) == "ng" then
return "nasal_final"
elseif sub(final, -1) == "h" then
return "glottal_final"
elseif sub(final, -2) == "nn" or
(match(initial, "g?") and
get_final_type({initial = "", final = final}) == "other_final" and
dialect == "xy") then
if match(initial, "g?") and
get_final_type({initial = "", final = final}) == "other_final" and
dialect == "xy" then
require("Module:debug").track('cpx-pron/xy-nasal-initial/default-rule')
end
return "nasalized_final"
else
return "other_final"
end
end
local function track_buc_issue(reason)
require("Module:debug").track('cpx-pron/' .. reason)
end
local function combine_buc_syllable(options)
local initial = options.initial
local final = options.final
local tone = options.tone
local tone_position = options.tone_position
local tone_mark = buc_tones
if not tone_mark then
error("Invalid tone: " .. tone)
end
-- Split the final string into character table
local chars = {}
for char in mw.ustring.gmatch(final, ".") do
table.insert(chars, char)
end
-- put tone diacritic
if #chars >= tone_position then
chars = chars .. tone_mark
else
error("Invalid tone position: " .. tone_position)
end
return mw.ustring.toNFC(initial .. table.concat(chars))
end
local function lookup_char_readings(char)
if not m_data.buc then
return nil
end
return m_data.buc
end
-- Convert single PSP syllable to BUC
local function convert_to_buc_syllable(options)
local syllable_info = options.syllable_info
local char = options.char
-- If BUC is manually specified, first verify
if syllable_info.manual_buc then
local is_valid, error_msg = validate_manual_buc(syllable_info.manual_buc)
if not is_valid then
track_buc_issue("manual form incorrect")
return nil
end
return syllable_info.manual_buc
end
local lookup_tone = syllable_info.original_tone
local lookup_final = syllable_info.original_final
-- Special handling for S3 tone
if syllable_info.original_tone == "S3" then
lookup_tone = "3"
-- Remove final h if present
if lookup_final:sub(-1) == "h" then
lookup_final = lookup_final:sub(1, -2)
end
end
-- Get possible BUC finals
local possible_finals = buc_finals
if not possible_finals then
track_buc_issue("no final found")
return nil
end
-- Get BUC initial
local initial = buc_initials
if not initial then
track_buc_issue("no initial found")
return nil
end
-- Generate all possible BUC forms
local filtered_finals = {} -- special check for BUC tone 7B which merged into tone 2
for _, final_info in ipairs(possible_finals) do
local final, tone_position = final_info, final_info
local is_tone_7b_final = final:match("h$")
local psp_has_h = syllable_info.original_final:match("h$")
local should_keep = true
local use_tone = lookup_tone
if lookup_tone == "7B" then
if final:match("h$") then
final = final .. "*"
else
should_keep = false
end
end
if is_tone_7b_final and not psp_has_h then
if lookup_tone == "2" then
use_tone = "7B"
final = final .. "*"
elseif lookup_tone == "7B" then
-- do nothing
else
should_keep = false
end
end
if should_keep then
table.insert(filtered_finals, {
final = final,
tone_position = tone_position,
tone = use_tone
})
end
end
local candidates = {}
for _, final_info in ipairs(filtered_finals) do
local candidate = combine_buc_syllable({
initial = initial,
final = final_info.final,
tone = final_info.tone,
tone_position = final_info.tone_position
})
table.insert(candidates, candidate)
end
if #candidates == 1 then
return candidates
end
-- No need to look up Hanzi-BUC table if hanzi's and PSP's counts don't match
if not char then
if #possible_finals > 1 then
track_buc_issue("contraction and multiple final found")
-- temp
local finals_for_output = {}
for _, final_info in ipairs(possible_finals) do
table.insert(finals_for_output, final_info)
end
return nil
end
return combine_buc_syllable({
initial = initial,
final = possible_finals,
tone = lookup_tone,
tone_position = possible_finals
})
end
local char_readings = lookup_char_readings(char)
if not char_readings then
track_buc_issue("cannot look up table")
return nil
end
local matches = {}
for _, candidate in ipairs(candidates) do
for _, reading in ipairs(char_readings) do
local match = (candidate == reading)
if match then
table.insert(matches, candidate)
end
end
end
if #matches == 0 then
track_buc_issue("no matching reading found")
return nil
elseif #matches > 1 then
track_buc_issue("multiple matching readings found")
return nil
end
-- temp
if syllable_info.original_tone == "2" and matches:match("h%*$") then
require("Module:debug").track('cpx-pron/2-to-7B')
end
return matches
end
local function generate_buc(options)
if not options.syllable_infos then
error("Missing required syllable_infos in generate_buc")
end
if options.dialect ~= "pt" then
return nil
end
local page_title = mw.title.getCurrentTitle().text
local chars = mw.ustring.gsub(page_title, "", "")
local char_count = mw.ustring.len(chars)
local syllable_count = #options.syllable_infos
local check_char_table = (syllable_count == char_count)
local buc_syllables = {}
for i, syllable_info in ipairs(options.syllable_infos) do
if syllable_info.manual_buc then
table.insert(buc_syllables, syllable_info.manual_buc)
else
-- Get possible BUC
local syllable_result = convert_to_buc_syllable({
syllable_info = syllable_info,
char = check_char_table and mw.ustring.sub(chars, i, i) or nil,
word = options.word
})
-- If any syllable cannot be uniquely identified
if not syllable_result then
return nil
end
if syllable_info.capitalize then
local normalized = mw.ustring.toNFD(syllable_result)
local first_char = mw.ustring.sub(normalized, 1, 1)
syllable_result = mw.ustring.toNFC(
mw.ustring.upper(first_char) ..
mw.ustring.sub(normalized, 2)
)
end
table.insert(buc_syllables, syllable_result)
end
end
-- concat syllables
local result = {}
for i = 1, #buc_syllables do
table.insert(result, buc_syllables)
if i < #buc_syllables then
if options.syllable_infos.comma_after then
table.insert(result, ", ")
elseif options.syllable_infos.space_after then
table.insert(result, " ")
else
table.insert(result, "-")
end
end
end
return table.concat(result)
end
local function split_syllable(syllable)
-- Initialize result table
local components = {
orig_form = nil,
changed_form = nil,
tone_part = nil,
orig_initial = nil,
orig_final = nil,
changed_initial = nil,
changed_final = nil,
orig_tone = nil,
manual_sandhi_tone = nil,
no_sandhi = false,
no_assimilation = false,
-- BUC (only for Putian)
capitalize = false,
space_after = false,
comma_after = false,
manual_buc = nil
}
if not syllable or syllable == "" then
error("Invalid syllable: " .. tostring(syllable))
end
local markers, cleaned_syllable = get_syllable_markers(syllable)
components.capitalize = markers.capitalize
components.space_after = markers.space_after
components.comma_after = markers.comma_after
components.manual_buc = markers.manual_buc
syllable = cleaned_syllable
components.no_assimilation = syllable:sub(1, 1) == SPECIAL_MARKERS.NO_ASSIMILATION
if components.no_assimilation then
syllable = syllable:sub(2)
end
components.no_sandhi = syllable:sub(-1) == SPECIAL_MARKERS.NO_SANDHI
if components.no_sandhi then
syllable = syllable:sub(1, -2)
end
if syllable:find(SPECIAL_MARKERS.MANUAL_CHANGE) then
components.orig_form, components.changed_form, components.tone_part =
syllable:match("(.-)>(.-)(+.*)$")
else
components.orig_form, components.tone_part =
syllable:match("(.-)(+.*)$")
components.changed_form = components.orig_form
end
-- If the segmentation is not correct
if not components.orig_form or not components.tone_part then
error("Invalid syllable format: " .. syllable)
end
-- Process form components
components.orig_initial, components.orig_final =
split_initial_final({form = components.orig_form})
components.changed_initial, components.changed_final =
split_initial_final({form = components.changed_form})
-- Process tone components
if components.tone_part:find("-") then
components.orig_tone, components.manual_sandhi_tone =
components.tone_part:match("^(+)%-(+)$")
require("Module:debug").track('cpx-pron/manual sandhi tone')
else
components.orig_tone = components.tone_part
end
-- Special tone processing
if components.orig_tone == "3" and components.changed_final:sub(-1) == "h" then
components.orig_tone = "S3"
end
if components.orig_tone == "6" then
if components.orig_final:sub(-1) == "h" then
components.orig_tone = "6A"
else
components.orig_tone = "6B"
end
elseif components.orig_tone == "7" then
if components.orig_final:sub(-1) == "h" then
components.orig_tone = "7A"
else
components.orig_tone = "7B"
end
end
-- final validation
if not (components.orig_initial and components.orig_final and components.orig_tone) then
error("Unable to parse syllable: " .. syllable)
end
return components
end
local function create_syllable_info(options)
local syllable_components = split_syllable(options.syllable)
return {
original_initial = syllable_components.orig_initial,
original_final = syllable_components.orig_final,
original_tone = syllable_components.orig_tone,
changed_initial = syllable_components.changed_initial,
changed_final = syllable_components.changed_final,
changed_tone = syllable_components.orig_tone, -- default: original tone
no_sandhi = syllable_components.no_sandhi,
no_assimilation = syllable_components.no_assimilation,
is_first_syllable = options.is_first_syllable,
manual_sandhi_tone = syllable_components.manual_sandhi_tone,
-- BUC
capitalize = syllable_components.capitalize,
space_after = syllable_components.space_after,
comma_after = syllable_components.comma_after,
manual_buc = syllable_components.manual_buc
}
end
-- Syllable processing functions
local function create_syllable_infos(options)
local syllable_infos = {}
for syllable in options.word:gmatch("%S+") do
local syllable_options = {
syllable = syllable,
is_first_syllable = #syllable_infos == 0
}
table.insert(syllable_infos, create_syllable_info(syllable_options))
end
return syllable_infos
end
local function post_process_nasalization(options)
local syllable = options.syllable
-- Remove duplicate nasalization
if syllable.changed_initial:match("^g?") then
if syllable.changed_final:match("nn$") then
syllable.changed_final = syllable.changed_final:gsub("nn$", "")
end
end
-- Simplify ng-initial syllables
if syllable.changed_initial == "ng" and
syllable.changed_final == "ng" then
syllable.changed_initial = ""
end
end
local function get_sandhi_tone(options)
local curr_syllable = options.curr_syllable
local next_syllable = options.next_syllable
local dialect = options.dialect
-- Handle manual tone specification
if curr_syllable.manual_sandhi_tone then
return curr_syllable.manual_sandhi_tone
end
-- Handle final syllable
if not next_syllable then
return curr_syllable.original_tone
end
-- Apply sandhi rules
local current_tone = curr_syllable.original_tone
local next_tone = next_syllable.original_tone
return sandhi_rules or
curr_syllable.original_tone
end
local function apply_sandhi(options)
local dialect = options.dialect
local syllable_infos = options.syllable_infos
for i = 1, #syllable_infos do
local curr_syllable = syllable_infos
local next_syllable = syllable_infos
local original_tone = curr_syllable.original_tone
-- No sandhi if one of the following conditions are met
-- 1. there is a no_sandhi mark
-- 2. syllable followed by a comma
-- 3. is the last syllable
if curr_syllable.no_sandhi or
curr_syllable.comma_after or
-- curr_syllable.space_after or
not next_syllable then
curr_syllable.changed_tone = curr_syllable.original_tone
else
curr_syllable.changed_tone = get_sandhi_tone({
curr_syllable = curr_syllable,
next_syllable = next_syllable,
dialect = dialect
})
end
-- Special tone adjustment for glottal finals
if curr_syllable.changed_tone == '3' and
curr_syllable.changed_final:sub(-1) == 'h' then
curr_syllable.changed_tone = 'S3'
end
-- Tracking
if next_syllable then
local format_tone_for_tracking = function(tone)
if tone == "S1" or tone == "S3" then
return tone
elseif tone:sub(1, 1) == "S" then
return tone:sub(2)
else
return tone
end
end
local track_original_tone = format_tone_for_tracking(original_tone)
local track_next_tone = format_tone_for_tracking(next_syllable.original_tone)
local track_changed_tone = format_tone_for_tracking(curr_syllable.changed_tone)
require("Module:debug").track('cpx-pron/sandhi/' .. dialect .. '/' ..
track_original_tone .. '+' .. track_next_tone .. '/' ..
track_changed_tone)
end
end
end
local function apply_initial_assimilation(options)
local dialect = options.dialect
local syllable_infos = options.syllable_infos
local result = {}
-- Handle first syllable
result = syllable_infos
result.is_first_syllable = true
-- Process subsequent syllables
for i = 2, #syllable_infos do
local prev_syllable = result
local curr_syllable = syllable_infos
-- Store original initial for tracking
local original_initial = curr_syllable.original_initial
local rule_applied = false
-- Check for manual override in xy dialect with nasal initial + other final
local is_manual_override = (dialect == "xy" or dialect == "yy" or dialect == "ft") and
prev_syllable.changed_initial:match("^g?$") and
not (prev_syllable.changed_final:sub(-2) == "ng" or
prev_syllable.changed_final:sub(-1) == "h" or
prev_syllable.changed_final:sub(-2) == "nn") and
curr_syllable.changed_initial ~= curr_syllable.original_initial
if is_manual_override then
require("Module:debug").track('cpx-pron/xy-nasal-initial/manual-override')
end
local original_final_type = get_final_type({
initial = prev_syllable.changed_initial,
final = prev_syllable.changed_final,
dialect = dialect
})
-- No initial assimilation if one of the following conditions are met:
-- 1. there is a no_assimilation mark
-- 2. the previous syllable is followed by a comma
if not curr_syllable.no_assimilation and
not prev_syllable.comma_after and
curr_syllable.changed_initial == curr_syllable.original_initial then
local final_type = original_final_type
-- Special rule for nasalized finals
local should_apply_nasal_rule =
final_type == "other_final" and
curr_syllable.original_initial:match("^") and
get_final_type({
initial = curr_syllable.original_initial,
final = curr_syllable.original_final,
dialect = dialect
}) == "nasalized_final"
if should_apply_nasal_rule then
final_type = "nasal_final"
end
-- Mark that we are applying an assimilation rule
rule_applied = true
-- Apply assimilation rules
curr_syllable.changed_initial =
initial_assimilation_rules or
curr_syllable.original_initial
require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
end
-- Track assimilation only for manual override cases
if not rule_applied and curr_syllable.changed_initial ~= original_initial then
require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
end
-- Post-process nasalization
local post_process_options = {
syllable = curr_syllable,
dialect = dialect
}
post_process_nasalization(post_process_options)
table.insert(result, curr_syllable)
end
return result
end
local function generate_actual_pronunciation(syllable_infos)
local pronunciations = {}
for _, syllable in ipairs(syllable_infos) do
-- Combine the changed components
local pronunciation = syllable.changed_initial ..
syllable.changed_final ..
syllable.changed_tone
table.insert(pronunciations, pronunciation)
end
return table.concat(pronunciations, " ")
end
local function generate_actual_pronunciation(syllable_infos)
local pronunciations = {}
for _, syllable in ipairs(syllable_infos) do
-- Combine the changed components
local pronunciation = syllable.changed_initial ..
syllable.changed_final ..
syllable.changed_tone
table.insert(pronunciations, pronunciation)
end
return table.concat(pronunciations, " ")
end
local function get_ipa_value(options)
-- Validation
if not options.type or not options.dialect or not options.value then
error("Missing required parameter for IPA lookup")
end
-- Get the appropriate lookup table
local lookup_tables = {
initials = initials,
finals = finals,
tones = tones
}
local table = lookup_tables
if not table then
error("Invalid lookup type: " .. options.type)
end
if options.type == "initials" and options.value == "bh" then
error(string.format(
'Invalid initial "bh" for %s dialect. Please use "w" instead.',
options.dialect
))
end
if options.type == "tones" and options.value == "S5" then
error('Invalid tone S5. Please use "6" instead.')
end
if options.type == "finals" then
local base_final, has_old_nasal, has_new_nasal = handle_nasalization(options.value)
local nasal_suffix = ""
if has_old_nasal then
nasal_suffix = "nn"
require("Module:debug").track('cpx-pron/deprecated-nasalization')
elseif has_new_nasal then
nasal_suffix = "nn"
end
local result = table and table
if not result then
local corrected_final = nil
if corrections.common then
corrected_final = corrections.common
elseif corrections and corrections then
corrected_final = corrections
elseif has_old_nasal and not has_new_nasal then
corrected_final = base_final
nasal_suffix = "nn"
end
if corrected_final or (has_old_nasal and not has_new_nasal) then
local corrected_value = corrected_final or base_final
if has_old_nasal or has_new_nasal then
corrected_value = corrected_value .. nasal_suffix
end
local full_syllable = ""
if options.syllable_info then
full_syllable = options.syllable_info.original_initial .. corrected_value .. options.syllable_info.original_tone
end
error(string.format(
'Invalid final "%s" for %s dialect. Please use "%s" instead.',
options.value,
options.dialect,
full_syllable ~= "" and full_syllable:gsub("", "") or corrected_value
))
end
end
end
local result = table and table
if not result then
error(string.format(
"Invalid %s %s for %s.",
options.type:sub(1, -2),
options.value,
options.dialect
))
end
return result
end
local function get_ipa_components(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Get basic components
local components = {
initial = get_ipa_value({
type = "initials",
dialect = dialect,
value = syllable_info.changed_initial,
syllable_info = syllable_info
}),
final = get_ipa_value({
type = "finals",
dialect = dialect,
value = syllable_info.changed_final,
syllable_info = syllable_info
}),
tone = get_ipa_value({
type = "tones",
dialect = dialect,
value = syllable_info.original_tone,
syllable_info = syllable_info
})
}
-- Handle tone change
if syllable_info.changed_tone ~= syllable_info.original_tone then
local sandhi_tone = get_ipa_value({
type = "tones",
dialect = dialect,
value = syllable_info.changed_tone
})
if not sandhi_tone then
error("Invalid sandhi tone: " .. syllable_info.changed_tone ..
" for dialect: " .. dialect)
end
components.tone = components.tone .. "⁻" .. sandhi_tone
end
return components
end
local function get_original_initial_display(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Only show original initial for non-first syllables with changes
if syllable_info.is_first_syllable or
(syllable_info.original_initial == syllable_info.changed_initial) then
return ""
end
-- Get IPA for original initial
local ipa_initial = get_ipa_value({
type = "initials",
dialect = dialect,
value = syllable_info.original_initial,
syllable_info = syllable_info
})
return syllable_info.original_initial == "" and "<sup>(Ø-)</sup>" or
"<sup>(" .. ipa_initial .. "-)</sup>"
end
local function syllable_to_ipa(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Get IPA components
local ipa_components = get_ipa_components({
syllable_info = syllable_info,
dialect = dialect
})
-- Generate display for changed initial if needed
local original_initial_display = get_original_initial_display({
syllable_info = syllable_info,
dialect = dialect
})
-- Combine all parts
return original_initial_display ..
ipa_components.initial ..
ipa_components.final ..
ipa_components.tone
end
-- Generate IPA for the syllables
local function generate_ipa(options)
if not options or not options.syllable_infos then
error("Missing required syllable_infos in generate_ipa")
end
local syllable_infos = options.syllable_infos
local dialect = options.dialect
local ipa_parts = {}
for _, syllable_info in ipairs(syllable_infos) do
table.insert(ipa_parts, syllable_to_ipa({
syllable_info = syllable_info,
dialect = dialect
}))
end
return table.concat(ipa_parts, " ")
end
-- Process a single pronunciation entry
local function process_pronunciation(options)
local result = {
dialect_codes = options.dialect_codes,
word = options.word,
processed = {},
index = options.index
}
local dialect_list = split_dialect_codes(options.dialect_codes)
-- Create syllable info objects for each syllable
local syllable_options = {
word = options.word,
is_first_syllable = true
}
local original_syllable_infos = create_syllable_infos(syllable_options)
-- Process for each dialect
for i, dialect in ipairs(dialect_list) do
local syllable_infos = m_table.deepCopy(original_syllable_infos)
-- Apply phonological rules
local processed_syllables = apply_initial_assimilation({
dialect = dialect,
syllable_infos = syllable_infos
})
apply_sandhi({
dialect = dialect,
syllable_infos = processed_syllables
})
-- Generate IPA and collect results
local pronunciation_result = {
dialect = dialect,
dialect_position = i,
original = options.word,
actual = generate_actual_pronunciation(processed_syllables),
ipa = generate_ipa({
syllable_infos = processed_syllables,
dialect = dialect
}),
index = options.index,
syllable_infos = processed_syllables
}
-- Generate BUC only for Putian
if dialect == "pt" then
pronunciation_result.buc = generate_buc({
syllable_infos = processed_syllables,
dialect = dialect,
word = options.word
})
end
table.insert(result.processed, pronunciation_result)
end
return result
end
-- Formatting helper functions
local function font_consolas(text)
-- return '<span style="font-family: Consolas, monospace;">' .. text .. '</span>'
return '<span class="zhpron-monospace">' .. text .. '</span>'
end
local function font_ipa(text)
return '<span class="IPA">/' .. text .. '/</span>'
end
local function clear_pinging_format(text)
if not text then
return ""
end
return text:gsub("%-S?%d", "") -- remove tone sandhi
:gsub(">+", "") -- remove irregular sound change
:gsub("+", "") -- remove special symbols
:gsub("{+}", "") -- remove manual BUC
:gsub("", "") -- remove A/B
:gsub("(%d)", "<sup>%1</sup>") -- superscript tone numbers
:gsub("S", "") -- remove "S" in special tones
end
-- Output formatting functions
local function format_demo_output(options)
local results = options.results
local highlight_type = options.type or "default"
local output = {}
for _, result in ipairs(results) do
for _, processed in ipairs(result.processed) do
local syllable_infos = processed.syllable_infos
local orig_parts = {}
local actual_parts = {}
-- Process each syllable
for i, syllable_info in ipairs(syllable_infos) do
local is_first = (i == 1)
local is_last = (i == #syllable_infos)
-- Process original syllable
local orig_initial = syllable_info.original_initial
local orig_final = syllable_info.original_final
local orig_tone = syllable_info.original_tone:gsub("S", "")
local orig_text = orig_initial .. orig_final
-- Apply bold formatting based on highlight_type
if highlight_type == "assim" and not is_first and orig_initial ~= "" then
orig_text = "'''" .. orig_initial .. "'''" .. orig_final
elseif highlight_type == "sandhi" and not is_last then
orig_tone = "'''" .. orig_tone .. "'''"
end
-- Add tone as superscript and clear format
orig_text = orig_text .. orig_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("", "")
table.insert(orig_parts, orig_text)
-- Process actual syllable
local actual_initial = syllable_info.changed_initial
local actual_final = syllable_info.changed_final
local actual_tone = syllable_info.changed_tone:gsub("S", "")
local actual_text = actual_initial .. actual_final
-- Apply bold formatting based on highlight_type
if highlight_type == "assim" and not is_first and actual_initial ~= "" then
-- Bold non-first syllable initials for assim type
actual_text = "'''" .. actual_initial .. "'''" .. actual_final
elseif highlight_type == "sandhi" and not is_last then
-- Bold non-last syllable tones for sandhi type
actual_tone = "'''" .. actual_tone .. "'''"
end
-- Add tone as superscript and clear format
actual_text = actual_text .. actual_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("", "")
table.insert(actual_parts, actual_text)
end
-- Build the output line
local line = table.concat(orig_parts, " ")
line = line .. " → " .. table.concat(actual_parts, " ")
-- Add IPA
line = line .. "<br/>" .. font_ipa(processed.ipa)
table.insert(output, line)
end
end
return table.concat(output, "\n\n")
end
local function format_brief_output(options)
local results = options.results
local output_parts = {}
local dialect_codes = {}
local seen_pronunciations = {}
local order = {}
-- Collecte Pronunciation and dialect codes in their original order
for _, result in ipairs(results) do
if result.processed and #result.processed > 0 then
local original = result.processed.original
local cleared_text = clear_pinging_format(original)
-- If the cleaned pronunciation has not appeared before, record its order
if not seen_pronunciations then
seen_pronunciations = {
original = original,
index = result.index
}
table.insert(order, cleared_text)
end
-- Collect dialect codes
for _, processed in ipairs(result.processed) do
if not dialect_codes then
dialect_codes = true
end
end
end
end
local dialect_codes_array = {}
for code, _ in pairs(dialect_codes) do
table.insert(dialect_codes_array, code)
end
local output = " " -- "Puxian Min" already written in zh-pron
if #dialect_codes_array == 1 then
output = output .. "<small>(<i>" .. dialects] .. ", "
else
output = output .. "<small>(<i>"
end
output = output .. "]</i>): </small>"
-- Generate the pronunciation parts in the original order
if #order > 0 then
local formatted = {}
for _, cleared_text in ipairs(order) do
table.insert(formatted, cleared_text)
end
output = output .. font_consolas(table.concat(formatted, " / "))
end
return output
end
function format_complete_output(options)
local results = options.results
local output = {}
local ordered_results = {}
for _, result in ipairs(results) do
table.insert(ordered_results, result)
end
table.sort(ordered_results, function(a, b)
return a.index < b.index
end)
for _, result in ipairs(ordered_results) do
local grouped = {}
local group_keys = {}
for _, processed in ipairs(result.processed) do
local key = processed.original .. "|" .. processed.actual .. "|" .. processed.ipa
if not grouped then
grouped = {
data = {
original = processed.original,
actual = processed.actual,
ipa = processed.ipa,
buc = processed.buc,
dialect = processed.dialect
},
dialects = {}
}
table.insert(group_keys, key)
elseif not grouped.data.buc and processed.buc then
grouped.data.buc = processed.buc
grouped.data.dialect = processed.dialect
end
table.insert(grouped.dialects, {
code = processed.dialect,
position = processed.dialect_position
})
end
for _, key in ipairs(group_keys) do
local group = grouped
table.sort(group.dialects, function(a, b)
return a.position < b.position
end)
local dialect_names = {}
for _, dialect_info in ipairs(group.dialects) do
table.insert(dialect_names, dialects)
end
table.insert(output, "\n** <small>(<i>" .. table.concat(dialect_names, ", ") .. "</i>)</small>")
-- Pouseng Ping'ing
table.insert(output, "\n*** <small><i>]</i></small>: " ..
font_consolas(clear_pinging_format(group.data.original)))
if clear_pinging_format(group.data.original) ~= clear_pinging_format(group.data.actual) then
table.insert(output, font_consolas(
" [<small>Phonetic</small>: " .. clear_pinging_format(group.data.actual)) ..
"]")
end
-- BUC
if group.data.dialect == "pt" and group.data.buc then
local displayed_buc = group.data.buc:gsub("%*", "")
table.insert(output, "\n*** <small><i>]</i></small>: " ..
font_consolas(displayed_buc))
end
-- IPA
table.insert(output, '\n*** <small>Sinological ] ' ..
'<sup>(])</sup></small>: ' .. font_ipa(group.data.ipa))
end
end
return table.concat(output)
end
-- Main entry point
function export.rom_display(text, mode, highlight_type)
if type(text) == "table" then
highlight_type = text.args.type
mode = text.args or mode
text = text.args
end
-- Parameter validation
if not text or text == "" then
error("Invalid input: text must be a non-empty string")
end
mode = mode or FORMAT_MODES.BRIEF
highlight_type = highlight_type or "default"
local pronunciation_data = {
results = {},
mode = mode,
type = highlight_type
}
-- Process each pronunciation in the input
local index = 1
for pronunciation in text:gmatch("+") do
local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
if not dialect_codes or not word then
error("Invalid input format: " .. pronunciation)
end
local pron_options = {
dialect_codes = dialect_codes,
word = word,
index = index
}
table.insert(pronunciation_data.results,
process_pronunciation(pron_options))
index = index + 1
end
-- Format output according to the specified mode
if mode == FORMAT_MODES.BRIEF then
return format_brief_output(pronunciation_data)
elseif mode == FORMAT_MODES.COMPLETE then
return format_complete_output(pronunciation_data)
elseif mode == FORMAT_MODES.DEMO then
return format_demo_output(pronunciation_data)
else
error("Unsupported mode: " .. mode)
end
end
-- Convert single BUC syllable to PSP
local function syllable_to_psp(input)
local buc_to_psp_initials = {
= "b", = "c", = "z",
= "d", = "g", = "h",
= "k", = "l", = "m",
= "ng", = "n", = "p",
= "s", = "t", = ""
}
local buc_to_psp_finals = {
= "a",
= "a",
= "ah",
= "a",
= "ai",
= "ang",
= "ao",
= "e",
= "e",
= "eh",
= "e",
= "ae",
= "eh",
= "eng",
= "oe",
= "oe",
= "oeh",
= "oeng",
= "i",
= "ih",
= "i",
= "ing",
= "ia",
= "ia",
= "iah",
= "ia",
= "ieng",
= "iu",
= "ou",
= "or",
= "or",
= "orh",
= "or",
= "orng",
= "o",
= "oh",
= "o",
= "ong",
= "u",
= "uh",
= "ua",
= "ua",
= "uah",
= "ua",
= "uang",
= "ui",
= "uei",
= "uei",
= "uei",
= "uei",
= "uei",
= "y",
= "yh",
= "yng",
= "yor",
= "yor",
= "yorh",
= "yor",
= "yorng",
= "ng",
= "ieo",
= "ieo",
= "ieoh",
= "ieo"
}
-- Handle input parameter
local syllable
if type(input) == "table" then
syllable = input.args
else
syllable = input
end
if not syllable or syllable == "" then
return syllable
end
-- Try to convert the syllable, return original if any error occurs
local success, result = pcall(function()
-- Decompose the syllable and check for validity
local decomposed = mw.ustring.toNFD(syllable)
if not decomposed then
return syllable
end
-- Extract and remove tone marks
local tone = ""
if decomposed:find("́") then -- Tone 2: COMBINING ACUTE ACCENT
tone = "2"
decomposed = decomposed:gsub("́", "")
elseif decomposed:find("̂") then -- Tone 3: COMBINING CIRCUMFLEX ACCENT
tone = "3"
decomposed = decomposed:gsub("̂", "")
elseif decomposed:find("̍") then -- Tone 4/7: COMBINING VERTICAL LINE ABOVE
if decomposed:find("h%*$") then -- Special case: -h* ending -> tone 2
if not decomposed:find("̍") then -- If has h* but no vertical line
return syllable
end
tone = "2"
elseif decomposed:find("h$") then
tone = "7"
else
tone = "4"
end
decomposed = decomposed:gsub("̍", "")
elseif decomposed:find("̄") then -- Tone 5: COMBINING MACRON
tone = "5"
decomposed = decomposed:gsub("̄", "")
else
-- No tone mark: either tone 1 (no -h) or tone 6 (with -h)
if decomposed:find("h$") and not decomposed:find("h%*$") then
tone = "6"
else
tone = "1"
end
end
-- Recompose and check validity
local normalized = mw.ustring.toNFC(decomposed)
if not normalized then
return syllable
end
-- Special case: standalone `ng` syllable after tone removal
if normalized == "ng" then
return "ng" .. tone
end
-- Extract initial
local initial = ""
if normalized:match("^") then
initial = normalized:sub(1, 2):lower()
normalized = normalized:sub(3)
elseif normalized:match("^") then
initial = normalized:sub(1, 2):lower()
normalized = normalized:sub(3)
elseif normalized:match("^") then
initial = normalized:sub(1, 1):lower()
normalized = normalized:sub(2)
end
local psp_initial = buc_to_psp_initials or ""
-- Process final
-- Remove -h* marker if present (affects tone but not final lookup)
local final = normalized:gsub("h%*$", "")
-- Look up PSP final
local psp_final = buc_to_psp_finals
if not psp_final then
return syllable
end
-- Combine all parts to form complete PSP syllable
return (psp_initial .. psp_final .. tone):lower()
end)
-- Return original syllable if conversion failed
return success and result or syllable
end
-- Convert BUC to PSP (both single syllable and text)
function export.buc_to_psp(input)
-- Handle input parameter
local text
if type(input) == "table" then
text = input.args
else
text = input
end
if not text or text == "" then
return text
end
-- Split text into parts by delimiters while keeping delimiters
local parts = {}
local last_pos = 1
local pattern = ""
for pos, delimiter in mw.ustring.gmatch(text, "()("..pattern..")") do
if pos > last_pos then
table.insert(parts, mw.ustring.sub(text, last_pos, pos - 1))
end
table.insert(parts, delimiter)
last_pos = pos + mw.ustring.len(delimiter)
end
-- Handle the last part
if last_pos <= mw.ustring.len(text) then
table.insert(parts, mw.ustring.sub(text, last_pos))
end
-- Convert syllables and keep delimiters
for i = 1, #parts do
if not parts:match("^$") then
parts = syllable_to_psp(parts)
end
end
return table.concat(parts)
end
return export