Hakka pronunciation module. See {{zh-pron}}
.
local export = {}
local m_string_utils = require("Module:string utilities")
local gsub = m_string_utils.gsub
local sub = mw.ustring.sub
local match = m_string_utils.match
local find = m_string_utils.find
local len = m_string_utils.len
local lower = m_string_utils.lower
local toNFD = mw.ustring.toNFD
function export.rom_display(text,convtype)
if type(text) == 'table' then text,convtype = text.args,(text.args or '') end
local display = ''
local show = { = '', = '', = '', = '' }
local decomp = mw.text.split(gsub(text,'%s*/%s*',' / '),';',true)
local m_table = require('Module:table')
local handlers = {
pfs = function(value)
local pfs_readings = { = {}, = {} }
local hrs_readings = { = {}, = {} }
local hpy_readings = { = {}, = {} }
local ipa_readings = { = {}, = {} }
local function display_format(style)
local label = { = 'Northern ', = 'Southern ', = '' }
local city = {
= ']',
= ']',
= '] and ]'
}
local text = string.format("\n** <small>(''], incl. %s'')</small>", label, city)
text = text .. "\n*** <small>'']''</small>: <span class=\"zhpron-monospace\">" .. table.concat(pfs_readings, ' / ') .. '</span>'
text = text .. "\n*** <small>'']''</small>: <span class=\"zhpron-monospace\">" .. table.concat(hrs_readings, ' / ') .. '</span>'
text = text .. "\n*** <small>'']''</small>: <span class=\"zhpron-monospace\">" .. table.concat(hpy_readings, ' / ') .. '</span>'
local ipa = '\n*** <small>Sinological ]'
local span = '</small>: <span class="IPA">/'
text = text .. ipa
if style == 'ns' then
local north = table.concat(ipa_readings, '/, /')
local south = table.concat(ipa_readings, '/, /')
if north == south then
text = text .. span .. north .. '/</span>'
else
text = text .. " (''Northern, incl. " .. city .. "'')" .. span .. north .. "/</span>"
text = text .. ipa .. " (''Southern, incl. " .. city .. "'')" .. span .. south .. "/</span>"
end
else
text = text .. span .. table.concat(ipa_readings, '/, /') .. '/</span>'
end
return text
end
local function southern(text)
local function convert(a, b, c)
local e_a = { = 'a', = 'â', = 'á', = 'à' }
return a .. e_a .. c
end
text = gsub(text, '()()(̍?)', convert)
text = gsub(text, '(gi)()(̍?)', convert)
text = gsub(text, '(h?i)()(̍?)', convert)
text = gsub(text, '^(i)()(̍?)', convert)
text = gsub(text, '(i)()(̍?)', convert)
return text
end
local function add(style, reading)
pfs_readings + 1] = reading
hrs_readings + 1] = export.hrs(reading, style)
hpy_readings + 1] = export.pfs_to_hpy(reading)
ipa_readings + 1] = export.ipa(reading, style)
end
local ns = true
for _, reading in ipairs(mw.text.split(value, ' / ')) do
if match(reading,':') then
local pair = mw.text.split(reading, ':')
if pair == 'ns' then
add('n', pair)
add('s', pair)
else
ns = false
add(pair, pair)
end
elseif match(reading,'') or reading ~= southern(reading) then
ns = false
add('n', reading)
add('s', southern(reading))
else
add('n', reading)
add('s', reading)
end
end
if convtype == '' then
if ns then
display = display .. display_format('ns')
else
if #pfs_readings ~= 0 then display = display .. display_format('n') end
if #pfs_readings ~= 0 then display = display .. display_format('s') end
end
else
for i, reading in ipairs(pfs_readings) do
pfs_readings + 1] = pfs_readings
end
show = table.concat(m_table.removeDuplicates(pfs_readings), ' / ')
end
end,
hrs = function(value)
local supported = { "h" }
local hrs_readings = { = {}, = {}, = {}, = {} }
for _, reading in ipairs(mw.text.split(value, ' / ')) do
if find(reading, ':') then
local dialects, rom = match(reading, '^(+):(.+)$')
for _, dialect in ipairs(mw.text.split(dialects, ',')) do
table.insert(hrs_readings, rom)
end
else
error("Missing dialect label for Hakka Romanization.")
end
end
local dialect_link = {
= "], incl. ]",
= "]",
= "]",
= "]",
}
if convtype == '' then
for _, dialect in ipairs(supported) do
display = display .. string.format("\n** <small>(''%s'')</small>", dialect_link)
display = display .. "\n*** <small>'']''</small>: <span class=\"zhpron-monospace\">" .. export.hrs_process(hrs_readings, dialect, "rom") .. '</span>'
display = display .. '\n*** <small>Sinological ]</small>: <span class="IPA">/' .. export.hrs_process(hrs_readings, dialect, "ipa") .. "/</span>"
end
else
show = export.hrs_process(hrs_readings, 'h', "rom") --TO-DO: multiple dialects
end
end,
gd = function(value)
local gd_formatted = gsub(value, '()', '<sup>%1</sup>')
if convtype == '' then
display = display .. "\n** <small>('']'')</small>"
display = display .. "\n*** <small>'']''</small>: <span class=\"zhpron-monospace\">" .. gd_formatted .. '</span>'
display = display .. '\n*** <small>Sinological ]</small>: <span class="IPA">/' .. export.gd_to_ipa(value) .. "/</span>"
else
show = gd_formatted
end
end,
ct = function(value)
local ct_formatted = gsub(value, '()', '<sup>%1</sup>')
if convtype == '' then
display = display .. "\n** <small>('']'')</small>"
display = display .. "\n*** <small>''Changting Pinyin''</small>: <span class=\"zhpron-monospace\">" .. ct_formatted .. '</span>'
display = display .. '\n*** <small>Sinological ]</small>: <span class="IPA">/' .. export.ct_to_ipa(value) .. "/</span>"
else
show = ct_formatted
end
end,
}
for i = 1,#decomp,1 do
local key, val = match(decomp, '^(%a+)%=(.*)$')
if not key then
error("Malformed input: must be in the form 'tag=value'.")
end
if val == nil or mw.text.trim(val) == "" then
error("Missing value for tag: " .. key)
end
local handler = handlers
if not handler then
error("Unsupported tag: '" .. key .. "'")
end
handler(val)
end
if convtype ~= '' then
local pfs = " <small>(''], ]'')</small>: <span class=\"zhpron-monospace\">" .. show .. '</span>'
local gd = " <small>(''], ]'')</small>: <span class=\"zhpron-monospace\">" .. show .. '</span>'
local hrs = " <small>(''], ]'')</small>: <span class=\"zhpron-monospace\">" .. show .. '</span>'
local ct = " <small>(''], Changting Pinyin'')</small>: <span class=\"zhpron-monospace\">" .. show .. '</span>'
if show ~= '' and show == '' and show == '' and show == '' then
display = display .. pfs
elseif show ~= '' and show == '' and show == '' and show == '' then
display = display .. gd
elseif show ~= '' and show == '' and show == '' and show == '' then
display = display .. hrs
elseif show ~= '' and show == '' and show == '' and show == '' then
display = display .. ct
else
display = display .. (show ~= '' and '\n*:' .. pfs or '')
display = display .. (show ~= '' and '\n*:' .. hrs or '')
display = display .. (show ~= '' and '\n*:' .. gd or '')
display = display .. (show ~= '' and '\n*:' .. ct or '')
end
end
return display
end
local function find_tone(text)
text = toNFD(text)
if find(text, '̂') then
return 1
elseif find(text, '̀') then
return 2
elseif find(text, '́') then
return 3
elseif find(text, '̍') then
return 6
elseif find(text, '$') then
return 5
else
return 4
end
end
function export.ipa(text, dialect)
local syllables, initial, final, tone, tone_conv = {}, {}, {}, {}, {}
local ipa = {}
if type(text) == 'table' then text = text.args end
text = gsub(gsub(gsub(lower(text), '%.', ''), '%s+$', ''), '%s+', '-')
syllables = mw.text.split(text, "-")
for i, syllable in ipairs(syllables) do
syllable = gsub(syllable, ",", "")
syllable = gsub(syllable,'o̍',{='ua̍',='ue̍'})
syllable = gsub(syllable,'',{='ua',='uá',='uà',='uâ',='uā',='ue',='ué',='uè',='uê',='uē'})
initial = match(syllable, '^??h?')
final = sub(syllable, len(initial) + 1, -1)
local initial_ipa = {
= 'ŋ',
= 'pʰ',
= 'tʰ',
= 'kʰ',
= 't͡s',
= 't͡sʰ',
= 'i'
}
initial = initial_ipa] or initial
tone = find_tone(final)
local final_conv = {
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'n', = 'n',
= '',
= 'ɨ',
}
final = gsub(final, '', final_conv)
if initial == 'i' then
final = (find(final, '^i?$') and '' or 'i') .. final
initial = dialect == 's' and '(j)' or ''
end
final = gsub(final, '()$', '%1̚')
final = gsub(final, 'ng$', 'ŋ')
final = final == 'ŋ' and 'ŋ̍' or final
final = gsub(final, 'er$', 'ə')
final = gsub(final, '()()(?)', function(first, second, third)
if third ~= '' then
first = first .. '̯'
third = third .. '̯'
elseif first == 'i' or first == 'u' then
first = first .. '̯'
elseif second == 'i' or second == 'u' then
second = second .. '̯'
end
return first .. second .. third end)
end
for i, syllable in ipairs(syllables) do
local tone_ipa = {
= '²⁴',
= '¹¹',
= '³¹',
= '⁵⁵',
= '²',
= '⁵',
}
tone_conv = tone_ipa]
if (tone == 1 and find(tostring(tone), '') and not find(syllable, ",")) or (syllable == 'é' and dialect == 'n' and find(mw.title.getCurrentTitle().text, '仔') and find(text, '-é') and find(tostring(tone), '')) then
tone_conv = tone_conv .. '⁻¹¹'
end
ipa = initial .. final .. tone_conv
end
return gsub(table.concat(ipa, " "), ",", "")
end
function export.hrs(text, dialect)
if type(text) == 'table' then text = text.args end
local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
for i, syllable in ipairs(syllables) do
-- check for commas
local comma = ''
if find(syllable, ',') then
comma = ','
syllable = sub(syllable, 1, -2)
end
-- change consonants
syllable = gsub(syllable,'',{='b',='d',='g',=dialect == 's' and '(r)i' or 'i'})
syllable = gsub(syllable,'h',{='p',='t',='k',='z'})
syllable = gsub(syllable,'zh','c')
local palatal = {='j',='q',='x',=''}
syllable = gsub(syllable,'()()', function(a,b) return palatal..b end)
-- find tones
local marks = { = 'ˊ', = 'ˇ', = 'ˋ', = 'ˋ' }
local tone = marks or ''
-- remove tone marks and fix vowels
syllable = gsub(syllable, 'ṳ', 'ii')
syllable = gsub(toNFD(syllable), '', '')
syllable = gsub(syllable, 'o()', 'u%1')
-- add new tone marks
syllables = syllable .. "<sup>" .. tone .. "</sup>" .. comma
end
return table.concat(syllables, " ")
end
function export.pfs_to_hpy(text)
if type(text) == 'table' then text = text.args end
local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
for i, syllable in ipairs(syllables) do
if syllable == '...' then
syllables = syllable
else
-- check for commas
local comma = ''
if find(syllable, ',') then
comma = ','
syllable = sub(syllable, 1, -2)
end
-- change consonants
syllable = gsub(syllable,'',{='b',='d',='g'})
syllable = gsub(syllable,'h',{='p',='t',='k',='z'})
syllable = gsub(syllable,'zh','c')
local palatal = {='j',='q',='x'}
syllable = gsub(syllable,'()()', function(a,b) return palatal..b end)
-- find tones
local tone = find_tone(syllable)
-- remove tone marks and fix vowels
syllable = gsub(syllable, 'ṳ', 'i')
syllable = gsub(toNFD(syllable), '', '')
syllable = gsub(syllable, 'o()', 'u%1')
syllable = syllable == 'yu' and 'yiu' or syllable
syllable = gsub(syllable, '()e()', '%1a%2')
-- put everything together
syllables = syllable .. '<sup>' .. tone .. '</sup>' .. comma
end
end
return table.concat(syllables, " ")
end
function export.gd_to_ipa(text)
local initial_conv = {
= "p", = "pʰ", = "m", = "f", = "ʋ",
= "t", = "tʰ", = "n", = "l",
= "k", = "kʰ", = "ŋ", = "h",
= "t͡s", = "t͡sʰ", = "s",
= "t͡ɕ", = "t͡ɕʰ", = "ɕ",
= "",
}
local final_conv = {
= "z̩", = "i", = "u",
= "a", = "ia", = "ua",
= "e", = "ie", = "ue",
= "o", = "io", = "uo",
= "m̩", = "n̩",
= "aɪ", = "iaɪ", = "uaɪ",
= "oɪ",
= "uɪ", = "iuɪ",
= "au", = "iau",
= "eu",
= "iu",
= "əm", = "im",
= "am", = "iam",
= "ɛm",
= "ən", = "in",
= "an", = "ian", = "uan",
= "ɛn", = "iɛn", = "uɛn",
= "ɔn", = "iɔn", = "uɔn",
= "un", = "iun",
= "aŋ", = "iaŋ", = "uaŋ",
= "ɔŋ", = "iɔŋ", = "uɔŋ",
= "ʊŋ", = "iʊŋ",
= "əp̚", = "ip̚",
= "ap̚", = "iap̚",
= "ɛp̚",
= "ət̚", = "it̚",
= "at̚", = "iat̚", = "uat̚",
= "ɛt̚", = "iɛt̚", = "uɛt̚",
= "ɔt̚",
= "ut̚", = "iut̚",
= "ak̚", = "iak̚", = "uak̚",
= "ɔk̚", = "iɔk̚", = "uɔk̚",
= "ʊk̚", = "iʊk̚",
}
local tone_conv = {
= "⁴⁴", = "¹¹",
= "³¹",
= "⁵³",
= "¹", = "⁵",
= "⁴⁴⁻³⁵",
= "⁵³⁻⁵⁵",
}
if type(text) == 'table' then text = text.args end
local words = mw.text.split(text, " / ")
local result = {}
for _, word in ipairs(words) do
word = gsub(gsub(gsub(word, 'gd=', ''), '', ''), '%s+$', '')
local syllables = mw.text.split(word, '%s+')
local initial, final, tone, ipa = {}, {}, {}, {}
for i, syllable in ipairs(syllables) do
initial = match(syllable, "^?g?")
final = match(sub(syllable, len(initial) + 1, -1), "^*")
if initial == "" and find(final, "^i") then
error('Syllables starting with "i" need a "y" in front.')
end
final = gsub(gsub(final, "^yi", "i"), "^y", "i")
if find(initial, "") and final == "i" then
final = "ii"
end
if final == "" then
final = initial
initial = ""
end
tone = match(syllable, "$")
end
for i, syllable in ipairs(syllables) do
local ortho_pal = { --orthographic palatalization
= "j",
= "q",
= "x",
}
local ortho_alv = { --orthographic alveolars
= "z",
= "c",
= "s",
}
if find(initial, "^$") and find(final, "^i") then
error("Initial should be " .. ortho_pal] .. "?")
end
if find(initial, "^$") and find(final, "^") then
error("Initial should be " .. ortho_alv] .. "?")
end
initial = initial_conv] or error(("Unrecognised initial: \"%s\""):format(initial))
final = final_conv] or error(("Unrecognised final: \"%s\""):format(final))
if match(tone, "") and match(tone or "", "") then
tone = tone .. "*"
end
if initial == "" and final == "e" and tone == "3" and find(mw.title.getCurrentTitle().text, '仔') then
initial = match(final or '', '()̚?$') or initial
initial = find(final or '', 'u$') and 'ʋ' or initial
initial = find(final or '', '$') and '(ʋ)' or initial
initial = find(final or '', 'e$') and '(i)' or initial
end
tone = tone_conv]
ipa = initial .. final .. tone
end
table.insert(result, table.concat(ipa, " "))
end
return table.concat(result, "/, /")
end
function export.hrs_process(rom_list, dialect, process)
local dialect_names = {
= "hailu",
= "dabu",
= "raoping",
= "zhaoan",
}
if process == "rom" then
return gsub(gsub(table.concat(rom_list, " / "), "()", "<sup>%1</sup>"), "#", "")
elseif process == "ipa" then
local ipa_readings = {}
for i, reading in ipairs(rom_list) do
ipa_readings = export.hrs_to_ipa(reading, dialect_names)
end
return table.concat(ipa_readings, "/, /")
end
end
function export.hrs_to_ipa(text, dialect)
local initial_conv = {
= "p", = "pʰ", = "m", = "f", = "v", = "b",
= "t", = "tʰ", = "n", = "l",
= "k", = "kʰ", = "ŋ", = "h",
= "t͡s", = "t͡sʰ", = "s",
= "t͡ɕ", = "t͡ɕʰ", = "ɕ",
= "t͡ʃ", = "t͡ʃʰ", = "ʃ", = "ʒ",
= "",
}
local final_conv = {
= "ɨ",
= "i", = "e", = "a", = "o", = "u",
= "ie", = "eu", = "ieu",
= "ia", = "ua",
= "ai", = "iai", = "uai",
= "au", = "iau",
= "io", = "oi", = "ioi",
= "iu", = "ui", = "iui",
= "ue",
= "ɨm", = "im",
= "em", = "iem",
= "am", = "iam",
= "ɨn", = "in",
= "en", = "ien", = "uen",
= "an", = "ian", = "uan",
= "on", = "ion",
= "un", = "iun",
= "aŋ", = "iaŋ", = "uaŋ",
= "oŋ", = "ioŋ",
= "uŋ", = "iuŋ",
= "ə",
= "ɨp", = "ip",
= "ep", = "iep",
= "ap", = "iap",
= "ɨt", = "it",
= "et", = "iet", = "uet",
= "at", = "iat", = "uat",
= "ot", = "iot",
= "ut", = "iut",
= "ak", = "iak", = "uak",
= "ok", = "iok",
= "uk", = "iuk",
= "m̩", = "n̩", = "ŋ̍",
}
local function get_tone(final, tone_mark, dialect)
local mark_to_value = {
= {
= "53",
= "55",
= "24",
= "11",
= "33",
= "5",
= "2",
},
= {
= "33",
= "35",
= "113",
= "31",
= "53",
= "21",
= "54",
},
= {
= "11",
= "53",
= "31",
= "55",
= "24",
= "43",
},
}
local mark = (find(final, "$") and "d" or "") .. tone_mark
return mark_to_value or ""
end
local function get_sandhi(syl_count, i, tone, dialect)
if dialect == "hailu" then
if i < syl_count then
if tone == "24" then
return "33"
elseif tone == "5" then
return "2"
end
end
end
return ""
end
local sup = {
= "¹", = "²", = "³", = "⁴", = "⁵", = "⁻",
}
local function hrs_check_invalid(text)
if not text then
return nil
end
local common_errors = ""
local error_correction = {
= "ˊ",
= "ˋ",
= "˖",
= "˖",
= "ˆ",
}
local correct = gsub(text, common_errors, error_correction)
if text ~= correct then
error("Invalid Hakka Romanization \"" .. text .. "\": please change it to \"" .. correct .. "\"")
end
end
--check for common errors in input
hrs_check_invalid(text)
local syllables, initial, final, tone, sandhi, no_sandhi, ipa = {}, {}, {}, {}, {}, {}, {}
syllables = mw.text.split(text, " ")
for i, syllable in ipairs(syllables) do
no_sandhi = false
--find commas and #s
if find(syllable, "") then
no_sandhi = true
syllable = gsub(syllable, "", "")
end
--find initial, final, tone
initial = match(syllable, "^(?)") or ""
tone = match(syllable, "()$") or ""
final = sub(syllable, len(initial) + 1, -1 - len(tone))
--convert initial, final, tone
initial = initial_conv] or ""
final = final_conv] or ""
tone = get_tone(final, tone, dialect)
sandhi = no_sandhi and "" or get_sandhi(#syllables, i, tone, dialect)
ipa = initial .. final ..
gsub(tone .. (sandhi ~= "" and "-" or "") .. sandhi, "", sup)
end
return gsub(table.concat(ipa, " "), ",", "")
end
function export.ct_to_ipa(text)
local initial_conv = {
= "p", = "pʰ", = "m", = "f", = "v",
= "t", = "tʰ", = "n", = "l",
= "k", = "kʰ", = "ŋ", = "h",
= "t͡s", = "t͡sʰ", = "s",
= "t͡ɕ", = "t͡ɕʰ", = "ɕ",
= "t͡ʃ", = "t͡ʃʰ", = "ʃ",
= "",
}
local final_conv = {
= "ʐ̩", = "i", = "u",
= "a", = "ia", = "ua",
= "o", = "io",
= "e", = "ie", = "ue",
= "ai", = "ui",
= "ɔ", = "iɔ",
= "əɯ", = "iəɯ", = "iəɯ",
= "aŋ", = "iaŋ", = "uaŋ",
= "eŋ", = "ieŋ", = "ueŋ",
= "iŋ", = "uŋ",
= "ɔŋ", = "iɔŋ",
= "oŋ", = "ioŋ",
= "ŋ̍",
}
local tone_conv = {
= "³³", = "²⁴",
= "⁴²",
= "⁵⁴", = "²¹",
= "⁴²⁻³³",
}
if type(text) == 'table' then text = text.args end
local words = mw.text.split(text, " / ")
local result = {}
for _, word in ipairs(words) do
word = gsub(gsub(gsub(word, 'ct=', ''), '', ''), '%s+$', '')
local syllables = mw.text.split(word, '%s+')
local initial, final, tone, ipa = {}, {}, {}, {}
for i, syllable in ipairs(syllables) do
initial = match(syllable, "^??")
final = match(sub(syllable, len(initial) + 1, -1), "^*")
if initial == "" and final == "wung" then
error('Please change "wung" to "ng".')
end
if initial == "" and find(final, "^i") then
error('Syllables starting with "i" need a "y" in front.')
end
final = gsub(gsub(final, "^yi", "i"), "^y", "i")
if initial == "" and find(final, "^u") then
error('Syllables starting with "u" need a "w" in front.')
end
final = gsub(gsub(final, "^wu", "u"), "^w", "u")
if final == "" then
final = initial
initial = ""
end
tone = match(syllable, "$")
end
for i, syllable in ipairs(syllables) do
initial = initial_conv] or error(("Unrecognised initial: \"%s\""):format(initial))
final = final_conv] or error(("Unrecognised final: \"%s\""):format(final))
if initial:find("") then
final = final:gsub("ʐ", "z")
end
if (tone == "3") and (tone == "3") then
tone = tone .. "*"
end
-- TODO: tone sandhi?
tone = tone_conv]
ipa = initial .. final .. tone
end
table.insert(result, table.concat(ipa, " "))
end
return table.concat(result, "/, /")
end
return export