Generates IPA based on romanisation for Wu Chinese. See {{zh-pron}}
. Rimes and tones found in Module:wuu-pron/data.
local export = {}
local data = mw.loadData("Module:wuu-pron/data")
--[=[
TODO:
- do IPA for glottalised nasal intials (currently the glottal stop is dropped)
- FIND DATA FOR 3+ SYLLABLE SANDHI AND RPS!!! (we can settle for trisyllabics right)
- northern wu data: FIRST PRIORITY should be
- CHUANSHA 川沙 (sandhi found in 當代, needs. understanding)
- NEW KUNSHAN 崑山新派 (trisyllabic sandhi needed)
- HUZHOU 湖州 (trisyllabic sandhi needed)
- XIAOSHAN 蕭山 (大西博子 source doesn't exactly line up with wugniu 市志)
- CIXI 慈溪 (慈溪方言研究 PDF exists but currently unretrieved, please inform @ND381 if a working PDF is in your possession ASAP!! zia-zia)
- any other northern wu points are good!! pls note the source u used in the EDIT SUMMARY if u do add one zia-zia
- MODULES for TAIZHOU & WENZHOU
→ and then we're done (probably)
]=]--
local loc_names = { -- Basic 聲韻調 Disyll LPS Trisyll LPS Quad+ LPS RPS Logic
--NORTHERN
= 'Shanghai', -- + + + + + +
= 'Jiading', -- + + + - - +
= 'Songjiang', -- + + + - - +
= 'Chuansha', -- + + + - - -
= 'Chongming', -- + + + - + +
= 'Suzhou', -- + + + + + +
= 'Kunshan', -- + + - - - - Wugniu "新派"
= 'Yixing', -- + - - - - -
= 'Changzhou', -- + + + - - +
= 'Jingjiang', -- + + - - - -
= 'Jiaxing', -- + + + - - +
= 'Tongxiang', -- + + + - - +
= 'Haining', -- + + + - + + Xiashi locality
= 'Haiyan', -- + + + - - +
= 'Deqing', -- + - - - - -
= 'Hangzhou', -- + + + + + +
= 'Xiaoshan', -- + - - - - -
= 'Shaoxing', -- + + + - + +
= 'Cixi', -- + + - - - -
= 'Ningbo', -- + + + - + +
= 'Zhoushan', -- + - - - - -
-- Jinhua + + + + + +
-- TAIZHOU
-- Huangyan + + - - -
-- Linhai + + - - -
-- Sanmen + - - - -
-- Tiantai +/- + - - -
-- Wenling + + - - -
-- Wenzhou need to reconcile differences between sources
}
-- default to "<loc_name> dialect" if empty
-- specifies the name of the Wikipedia article of the lect
local wiki_names = {
jd = 'Shanghainese#Classification',
sj = 'Shanghainese#Classification',
cs = 'Shanghainese#Classification',
yx = 'Taihu Wu',
jj = 'Taihu Wu',
jx = 'Taihu Wu',
tx = 'Taihu Wu',
hn = 'Taihu Wu',
hy = 'Taihu Wu',
dq = 'Taihu Wu',
xs = 'Taihu Wu',
cx = 'Taihu Wu',
zs = 'Taihu Wu',
}
local minidict = {
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true
}
local order = {'sh', 'jd', 'sj', 'cm', 'cs', 'sz', 'ks', 'yx', 'cz', 'jj', 'jx', 'tx', 'hn', 'hy', 'dq', 'hz', 'xs', 'sx', 'cx', 'nb', 'zs'}
local ipa_initial = {
= "p", = "pʰ", = "b", = "m", = "f", = "v",
= "t", = "tʰ", = "d", = "n", = "l",
= "t͡s", = "t͡sʰ", = "s", = "z", = "t͡ɕ", = "t͡ɕʰ",
= "d͡z", = "d͡ʑ", = "n̠ʲ", = "ɕ", = "ʑ",
= "k", = "kʰ", = "ɡ", = "ŋ", = "h", = "ɦ",
= "",
}
local ipa_initial_override = {
--this always takes priority over the table above
--additional unique initials can also be defined here
--a question mark means the initial does not exist
--there must be empty tables for all locations, even if there is nothing there
= {
= "?"
},
= {
= "?", = "?"
},
= {
= "ɓ", = "ɗ",
= "ɸ", = "β",
= "cʰ", = "c", = "ɟ", = "ç",
= "?", = "?"
},
= {
= "fv", = "sz", = "ɕʑ", = "hɦ",
},
= {
= "ɓ", = "ɗ",
= "ɸ", = "β",
= "?"
},
= {
= "?", = "?"
},
= {
= "?", = "x"
},
= {
},
= {
},
= {
},
= {
= "?", = "ʔv"
},
= {
},
= {
},
= {
= "?"
},
= {
= "?"
},
= {
= "?"
},
= {
},
= {
},
= {
},
= {
},
= {
},
}
local function get_initial(initial, loc)
return ipa_initial_override or ipa_initial or error('Invalid initial: "' .. initial .. '"')
end
local function get_final(final, loc)
return data.ipa_final or error('Invalid final for ' .. loc .. ' : "' .. final .. '"')
end
local ipa_syllabic = {
= "m̩", = "n̩", = "ŋ̍",
}
-- diagnose tone error
local function diagnose_tones(word_length, loc, text, tone, tone2, tone3)
-- the cap on number of syllables
local syl_cap = ({sh=5,sj=3,cm=3,cs=3,sz=4,ks=2,yx=1,cz=3,jj=2,jx=3,tx=3,hn=3,hy=3,dq=1,hz=5,xs=1,sx=3,cx=2,nb=3,zs=1})
if syl_cap and word_length > syl_cap then
error(("Maximum %d syllables supported for %s."):format(syl_cap, loc))
end
-- the cap on number of specified tones
local tone_cap = ({sj=3,ks=2,cz=3,jx=3,tx=3,hn=3,hy=2})
if tone_cap then
local expected = math.min(tone_cap, word_length)
local received = 1 + (tone2 ~= '' and 1 or 0) + (tone3 ~= '' and 1 or 0)
if received ~= expected then
error(('Expected %d tones, but received %d: "%s:%s".'):format(expected, received, loc, text))
end
elseif loc == 'sz' or loc == 'sx' then
-- sz: tone is 7 or 8, but second tone not provided
error("For " .. loc .. ", second tone must be specified.")
end
error(('Incorrect tone notation "%s" for %s. See ].'):format(tone..tone2..tone3, loc))
end
local function tone_superscript(text)
return text:gsub('',{='¹',='²',='³',='⁴',='⁵'})
end
local function get_tone(text, loc)
local word_length = text:gsub("+", ""):len() + 1
local tone, tone2, tone3 = text:match("^(.%u*)%w+ ?(%d?%u?)%w* ?(%d?%u?)")
if loc == "jx" and tone == "3" then
tone = text:find("^3s?h") and "3B" or "3A"
elseif loc == "cm" then
local result = nil
if tone:find("") then -- Verb + Motion / Verb + Pronoun
if word_length ~= 2 then error("cm: Unsupported word length.") end
result = data.tone_contours or error("cm: Wrong motion/pronoun format.")
elseif tone:find("R",1,true) then -- Reduplication
local main_tone, redup_type, word, sub_tone = text:match("^(%d)R()(%l+) (%d)%3$")
main_tone, sub_tone = tonumber(main_tone), tonumber(sub_tone)
local conv_tone = (redup_type == "N" and main_tone%2==0 and word:find("^g?") and main_tone-1) or main_tone
if sub_tone ~= conv_tone then error("cm: Wrong reduplication format.") end
result = data.tone_contours
end
if result then
return tone_superscript(result)
end
elseif loc == "sx" and tone:find("^%dA$") then
return tone_superscript(data.tone_contours)
elseif loc == "cs" and word_length == 3 then
tone2 = ""
end
local result = data.tone_contours
or data.tone_contours
or data.tone_contours
return result and tone_superscript(result) or diagnose_tones(word_length, loc, text, tone, tone2, tone3)
end
local function RPS_tone_determ(word_length, tone, loc)
local result
if word_length == 1 then
result = data.tone_contours or data.tone_contours
else
result = data.tone_contours
end
return tone_superscript(result)
end
local function rom_check(text, locs) --this checks wugniu
if text:match("%f") or text:match('ny') or text:match('hh') or text:match("h$") then
error('Invalid syllable: ' .. text ..'. Wugniu expected, but another romanisation is being provided.')
end
if text:match('ghi') and locs ~= 'cm' then
error('Invalid initial "ghi". Use "yi" instead.')
end
if text:match('ghu') and locs ~= 'cm' then
error('Invalid initial "ghu". Use "wu" instead.')
end
if text:match('%fy%f') then
error('Invalid syllable "y"')
end
if text:match('%fy') then
error('Invalid syllable "yn" or "yq"')
end
if text:match('gn') then
error('Palatalization expected. Insert an "i" after the "gn".')
end
if text:match('uw') then
error(('Invalid syllable in "%s".'):format(text))
end
if locs:find('cm') and (text:find('ueu') or text:find('uon') or text:find('ui')) then
error('cm: Mutation-only final found.')
end
if locs:find('sh') and text:match('') then
require("Module:debug").track("wuu-pron/sh-tone-234")
--error('sh: Incorrect tone number used.')
end
for syl in text:gmatch("+") do
if not syl:match("%d") then
require("Module:debug").track("wuu-pron/no-tone")
if locs ~= "sh" then
require("Module:debug").track("wuu-pron/no-tone-other")
end
end
end
return nil
end
function export.ipa_syl_conv(text, loc, initials, finals, syllabics, i, main_tone, tone)
-- get ipa from tables
local initial, final = text:match("^(??)(.+)$")
local if_syllabic = syllabics
if loc == 'sx' and text == 'gn' then if_syllabic = "ɲ̩" end
if not initial or if_syllabic then
initial, final = '', text
end
if loc == 'cm' then -- mutation
local mutated_initial = i > 1 and initial == "z" and "z"
local preglottal = ""
if tone ~= "0" and (mutated_initial or initial:find("^g?") or initial == "") then
preglottal = (i > 1 or main_tone:find("^$")) and "ʔ" or "ɦ"
end
return preglottal
.. (mutated_initial or initials(initial,loc))
.. (if_syllabic or finals(final,loc))
end
return initials(initial,loc) .. (if_syllabic or finals(final,loc))
end
function export.wugniu_to_ipa(original_text, loc, initials, finals, syllabics, tones)
local text, conv_text = "", ""
local tone_number = ""
original_text = original_text:gsub(" (%l+)(%d%u?)", ' %2%1')
if loc == 'cm' then
original_text = original_text:gsub("%fyi?","i"):gsub("%fwu?","u")
else
original_text = original_text:gsub("%fyi?","ghi"):gsub("%fwu?","ghu")
end
local reading = mw.text.split(original_text, ",", true)
local syllable = {}
local syl_tone = {}
for reading_index = 1, #reading, 1 do
local components = mw.text.split(reading, "&", true)
for component_index = 1, #components do
local indep_words = mw.text.split(components, "+", true)
for indep_index = 1, #indep_words do
text = indep_words
tone_number = text:sub(1, 1)
local tone = tones(text, loc)
text = text:gsub("+", "")
local syllable = mw.text.split(text, " ", true)
local syl_tone = mw.text.split(tone, " ", true)
for i = 1, #syllable, 1 do
--RPS
if i == #syllable and indep_words and tone ~= "³³" then
syl_tone = RPS_tone_determ(#syllable, tone_number, loc)
end
syllable = (syllable ~= "" and export.ipa_syl_conv(syllable, loc, initials, finals, syllabics, i, tone_number, syl_tone) or "")
.. (syl_tone == "0" and "" or syl_tone)
end
indep_words = table.concat(syllable, " ")
end
components = table.concat(indep_words, " ")
end
reading = table.concat(components, " ")
end
return table.concat(reading, "/, /")
end
function export.wikt_to_wugniu(text)
require("Module:debug").track("wuu-pron/legacy")
if type(text) == "table" then text = text.args end
return text
--initials
:gsub("'+", {=""})
:gsub("%f?", {j="c", jj="j", q="ch", x="sh", xx="zh"})
:gsub("%fny", "gn")
:gsub("%fhh", "gh")
--vowels
:gsub("un", "uen")
:gsub("yoe", "ioe")
:gsub("y", "iu")
:gsub("aan", "aon")
:gsub("%fr", "y")
--syllabics
:gsub("g?h?mm", "m")
:gsub("g?h?ngg", "ng")
--tones
:gsub("", {='5', ='6', ='7', ='8'})
--gh rules
:gsub("ghi", "yi")
:gsub("yi%f", "y")
:gsub("ghu", "wu")
:gsub("wu%f", "w")
end
local function wugniu_to_wikt(text)
if type(text) == "table" then text = text.args end
--initials
return export.wugniu_format(text
:gsub("%f?", {c="j", ch="q", j="jj", sh="x", zh="xx", gn="ny", gh="hh"})
:gsub("%fyi?", "hhi")
:gsub("wu?", "hhu")
--vowels
:gsub("y%f", "r")
:gsub("uen", "un")
:gsub("ioe", "yoe")
:gsub("iu", "y")
:gsub("aon", "aan")
--syllabics
:gsub("%fg?%f", {m="mm",n="nn",ng="ngg"})
--initial hh and '
:gsub("()()", "%1'%2")
:gsub("()(g?)%f", "%1hh%2")
--tones
:gsub("", {='2', ='3', ='4', ='5'}))
end
--[[學堂拼音 → 錢拼
local function wugniu_to_qian(text)
if type(text) == "table" then text = text.args end
--initials
return export.wugniu_format(text
gn → n
sh zh → x xh
gh w y → hh wh yh
b d g m n l j → bh dh gh mh nh lh jh
ng → nhg
v z → fh sh
ts tsh c ch → z c j q
'mh 'nh 'lh 'nhg → m n l ng
--rimes
q → k
aon on en → ang ong eng
au eu → ao ou
(ae/e split, but no way to do this on enwikt)
word-initial i and u → yi wu (eg. 一 iq → yik)
iuk → yuik
rules regarding the i in iu behaves exactly like the diaresis in Pinyin ü
--syllabics: NO CHANGE: m n ng er → m n ng er
--tones: 5, 6, 7, 8 UNMARKED, 1: APOSTROPHE IN FRONT
天: 'ti, 去 qu, 定 dhin, 不 bek, 日 nik
SPACES between syllables, not dashes
]]
function export.wugniu_format(text, loc)
-- 1a a 1a 1a3 a1 -> ^1a-a-a_1-^1a_3-a_1
-- 1a3-3a5 -> ^1a_3-^3a_5
return text
:gsub("", {="", ="-", =" ", =" ", ="; "})
:gsub("(%-?)(%d?%u?)('?%l+)(%d?%u?)", function(dash, tone1, main, tone2)
if dash == '-' and tone2 == '' then
tone1, tone2 = tone2, tone1
end
if tone1 ~= '' then
tone1 = '<sup>' .. tone1 .. '</sup>'
end
if tone2 ~= '' then
tone2 = '<sub>' .. tone2 .. '</sub>'
end
return dash .. tone1 .. main .. tone2
end)
end
local function wikt_format(text)
return export.wugniu_format(text)
end
local function minidict_format(text)
-- 1A3 3B5 3C D3 E -> A^3 B^5 C^3 D^3 E
-- 1A B -> A^1 B
return text
:gsub("-", "")
:gsub("", " ")
:gsub(",", "; ")
:gsub("0", "")
:gsub("?(%l+)()", '%1<sup>%2</sup>')
:gsub("()(%l+)", '%2<sup>%1</sup>')
:gsub("%f(%l*)(<sup></sup>)", "'%1%2")
:gsub("",{
="平",="平",
="上",="上",
="去",="去",
="入",="入",
})
end
function export.wugniu_to_minidict(text, loc)
if type(text) == "table" then text = text.args end
text = text:gsub('%f', {y = 'yi', w = 'wu'})
if loc == 'sx' then
text = text:gsub("+",{een="en",en="eon",iq="ieq"})
elseif loc == 'hz' then -- are we dealing with mergers?
text = text:gsub("+q?%f",{eu="ei",ieu="iu",aq="eq",iaq="ieq",iq="ieq",uaq="ueq"})
elseif loc == 'sz' or loc == 'cz' then
text = text:gsub("%fyie%f", "yiie") -- ye > yie
elseif loc == 'nb' then
text = text:gsub("yu%f", "oe")
elseif loc == 'sh' then
text = text:gsub("ie%f", "iae")
elseif loc == 'cm' then
text = text:gsub("<sup>→%l+</sup>", "")
elseif loc == 'jd' then
text = text:gsub("ue%f", "uie")
end
return minidict_format(text
--finals & syllabic
:gsub("iu()", "iui%1")
:gsub("gher", "r")
:gsub("er", "r")
:gsub("q", "h"))
--initials
--Glottal stops? text = text:gsub("", "'")
:gsub("gn", "ny")
:gsub("nyi%f", "ny")
:gsub('yi()', 'y%1')
:gsub('wu()', 'w%1')
end
-- various boilerplates
function export.name_boilerplate(name, wiki)
return '<i>]</i>'
end
function export.consolas(text)
return '<span class="zhpron-monospace">' .. text .. '</span>'
end
function export.wugniu_boilerplate(text)
return '\n*** <small><i>]</i></small>: '
.. export.consolas(text)
end
function export.minidict_boilerplate(text)
return '\n*** <small><i>]</i></small>: '
.. export.consolas(text)
end
function export.wikt_boilerplate(text)
return '\n*** <small><i>] (Shanghai)</i></small>: '
.. export.consolas(text)
end
function export.IPA_boilerplate(text, name, wiki)
text = text:gsub("(/?*/*/?)", '<span style="white-space: nowrap;">%1</span>')
return '\n*** <small>Sinological ]'
.. ' (' .. export.name_boilerplate(name, wiki) .. ')</small>: '
.. '<span class="IPA">/' .. text .. '/</span>'
end
local function preprocess_IPA(text, loc)
if loc == 'hz' then
return text:gsub("%f(?h?u)%f", "%1w")
elseif loc == 'sx' then
return text:gsub("+%++", function(chain)
local tone1,mode,word1,tone2,word2 = chain:match("^(%d)(?)(%l+)%+(%d)(%l+)$")
if not tone1 then error("sx: Wrong chain format.") end
if mode == '' then mode = 'O' end
if mode == 'A' then
return tone1..'A'..word1..'&'..tone2..word2
end
return tone1..word1..' '..tone2..mode..word2
end):gsub("#(%d)","%1N")
elseif loc == 'cm' then
return text:gsub("%f%l+<(%l*)>","%1")
end
return text
end
local function preprocess_wugniu(text, loc)
if loc == 'jx' then
return text:gsub("3","3")
elseif loc == 'cm' then
return text:gsub("?","")
:gsub("%f(%l*)<(%l*)>(%l*)(%d?)","%1%3%4<sup>→%2%3</sup>")
elseif loc == 'sx' then
return text:gsub("","")
end
return text
end
local function preprocess_mutation(text, locs)
if locs:find('cm') then
text = text:gsub(" (%d?C?)(h?)(+)", function(tone, initial, final)
local mutated_initial = ({v="u",zh="",gh=""})
if mutated_initial == "u" and final:find("^u") then
mutated_initial = ""
elseif initial == "d" and final:find("^i") then
mutated_initial = "l"
end
if final:find("<") or not mutated_initial then
return " "..tone..initial..final
end
return " "..tone..initial.."<"..mutated_initial..">"..final
end)
end
if text:find("<") and locs ~= "cm" then
error("cm: Mutation is incompatible with collapsing.")
end
return text
end
function export.make(text, w_count)
if not text:match(':') then -- assume Shanghainese
text = 'sh:'..text
end
local show = ""
local hide = ""
local roms = {}
local input_seen, duplicated = {}, false
text = mw.text.split(text, ';', true)
local show_name = "<i>]</i>"
if #text == 1 and text:find("^..:") then -- single locality
local loc = text:sub(1,2)
show_name = export.name_boilerplate(loc_names, wiki_names)
end
for i = 1,#text,1 do
local s = mw.text.split(text, ':', true)
if not duplicated then
if input_seen] then duplicated = true end
input_seen] = true
end
if #s ~= 2 or #s == 0 then
error("Wugniu: prefix is required or too many prefixes")
end
local locs, t = mw.text.split(s, ',', true), s
rom_check(t, s)
t = preprocess_mutation(t, s)
local list = {}
local format_text = t
for _, loc in ipairs(locs) do
if loc_names then
list = true
else
error('Wugniu: prefix "' .. loc .. '" is not recognized')
end
format_text = preprocess_wugniu(format_text, loc)
end
local wugniu_text = export.wugniu_format(format_text, locs)
table.insert(roms,wugniu_text)
local names = {}
local minidicts = {}
local minidicts_seen = {}
local IPAs = {}
for _, loc in ipairs(order) do if list then
table.insert(names, export.name_boilerplate(loc_names, wiki_names))
if minidict then
local minidict_result = export.wugniu_to_minidict(format_text, loc)
if not minidicts_seen then
table.insert(minidicts, minidict_result)
minidicts_seen = true
end
end
local ipa_text = preprocess_IPA(t, loc)
ipa_text = export.wugniu_to_ipa(ipa_text, loc, get_initial, get_final, ipa_syllabic, get_tone)
table.insert(IPAs,export.IPA_boilerplate(ipa_text, loc_names, wiki_names))
end end
hide = hide .. '\n** <small>(<i>]</i>: ' .. table.concat(names,', ') .. ')</small>'
hide = hide .. export.wugniu_boilerplate(wugniu_text)
for _,minidict_text in ipairs(minidicts) do
hide = hide .. export.minidict_boilerplate(minidict_text)
end
if list.sh then
hide = hide .. export.wikt_boilerplate(wugniu_to_wikt(format_text))
end
hide = hide .. table.concat(IPAs, '')
end
if not w_count or w_count > 1 then
show = '\n** <small>('..show_name..')</small>: ' .. export.consolas(table.concat(roms, ' / '))
else
show = ' <small>('..show_name..', <i>]</i>)</small>: ' .. export.consolas(table.concat(roms, ' / '))
end
if duplicated then
require("Module:debug").track("wuu-pron/duplicated")
end
return show, hide
end
return export