(Aiming to be) a Jyutping-to-anything converter. Currently: Jyutping-to-IPA, Jyutping-to-Yale, Jyutping-to-Cantonese-Pinyin, Jyutping-to-Guangdong-Romanization.
local export = {}
local m_string_utils = require("Module:string utilities")
local gsplit = m_string_utils.gsplit
local gsub = m_string_utils.gsub
local len = m_string_utils.len
local lower = m_string_utils.lower
local split = m_string_utils.split
local entering = {
p = 1, t = 1, k = 1
}
local entering_tones = {
= "7", = "8", = "9"
}
local ipa_allophones = {
ei = "eri",
eoi = "eoy",
ing = "irng",
ik = "irk",
ou = "oru",
ung = "urng",
uk = "urk",
}
local ipa_initial = {
= "p", = "pʰ", = "m", = "f",
= "t", = "tʰ", = "n", = "l",
= "k", = "kʰ", = "ŋ", = "kʷ", = "kʷʰ",
-- = "t͡ʃ", = "t͡ʃʰ", = "ʃ",
= "t͡s", = "t͡sʰ", = "s",
= "h", = "w", = "j",
= ""
}
-- vowels with "r" only appear as allophones and should not appear in the input
local ipa_nucleus = {
= "aː", = "ɐ",
= "ɛː", = "e",
= "iː", = "ɪ",
= "ɔː", = "o",
= "œː", = "ɵ",
= "uː", = "ʊ",
= "yː"
}
local ipa_coda = {
= "i̯", = "u̯", = "y̯",
= "m", = "n", = "ŋ",
= "p̚", = "t̚", = "k̚",
= ""
}
local ipa_tone = {
= "<span style=\"cursor:help\" title=\"or 53\">⁵⁵</span>",
= "³⁵",
= "³³",
= "²¹",
= "¹³",
= "²²",
= "⁵",
= "³",
= "²",
= ""
}
local ipa_tone_sandhi = {
= "⁻", = ""
}
local ipa_syllabic = {
= "m̩", = "ŋ̍"
}
-- display `main`, but show `option` on hovering
local function alt(main,option)
return '<span style="cursor:help" title="or ' .. option .. '">' .. main .. '</span>'
end
local acute = {
a="á", e="é", i="í", o="ó", u="ú", m="ḿ", ng="ńg"
}
local grave = {
a="à", e="è", i="ì", o="ò", u="ù", m="m̀", ng="ǹg"
}
local macron = {
a="ā", e="ē", i="ī", o="ō", u="ū", m="m̄", ng="n̄g"
}
-- "?" indicates finals that are not supported by Yale
local yale_final = {
= "?", = "a",
= "?", = "?", = "?", = "?", = "?",
= "?", = "?",
= "?", = "?",
= "eu", = "?", = "?", = "eung", = "?", = "?", = "euk",
= "eui", = "eun", = "eut",
}
-- The core function to handle conversion to Yale.
-- For non-syllabics, there is exactly one vowel cluster in the syllable:
-- the first vowel is inputted as `a`, and the rest of the vowels is `b`, and `t` is the tone.
--- (e.g. "keui" -> a="e",b="ui"; "keung" -> a="e",b="u")
-- Conversion rule:
--- if `t` is 4, 5, or 6, then "h" is inserted after `b`.
--- if `t` is 1, 2, 4, or 5, then the corresponding accent is put on `a`.
-- (finally, for syllabics, the whole syllabic is `a`, and `b` is empty)
local function yale_tone(a,b,t)
local h = ""
if t == "4" or t == "5" or t == "6" then
h = "h"
end
if t == "1" then a = alt(macron, grave) end
if t == "4" then a = grave end
if t == "2" or t == "5" then a = acute end
return a..b..h
end
function export.jyutping_to_ipa(text)
if type(text) == "table" then text = text.args end
text = text:gsub(", "," "):gsub("%.%.%.", " "):gsub(" $",""):gsub(" / ","/, /")
-- :gsub("()yu", "%1hyu")
-- :gsub("()oe", "%1hoe")
-- :gsub("()eo", "%1heo")
:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
-- try initial+final
local initial, final = main:match("^(??)(%l*)$")
if not initial then
-- otherwise try initial+syllabic
local syllabic
initial, syllabic = main:match("^(h?)(g?)$")
if not initial then
error("Invalid Jyutping syllable: " .. main)
end
main = ipa_initial
.. (ipa_syllabic or error("Unrecognised syllabic: " .. syllabic)) -- really?
else
-- e.g. convert <ei> (which would be */ɛːi̯/) to <eri> (/ei̯/)
final = ipa_allophones or final
local nucleus, coda = final:match("^(y??)(?g?)$")
if not nucleus then
error("Invalid Jyutping final: " .. final)
end
if entering then
tone = entering_tones or tone
tone2 = entering_tones or tone2
end
main = (ipa_initial or error(("Unrecognised initial: \"%s\""):format(initial)))
.. (ipa_nucleus or error(("Unrecognised nucleus: \"%s\""):format(nucleus)))
.. (ipa_coda or error(("Unrecognised coda: \"%s\""):format(coda)))
end
return main .. ipa_tone .. ipa_tone_sandhi .. ipa_tone
end)
return text
end
function export.jyutping_to_yale(text)
if type(text) == "table" then text = text.args end
text = text:gsub("jy?","y")
:gsub("",{z="j",c="ch"})
--:gsub("%-","")
:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
if tone2 ~= "" then
tone = tone2
end
-- find the first vowel letter
local initial,final = main:match("^(*)(.*)$")
local a,b,c
if initial then
final = yale_final or final
if final == "?" then
return "?"
end
a,b,c = final:match("^()(*)(?g?)$")
else -- otherwise it is a syllabic
initial,a = main:match("(h?)(g?)")
b,c = "",""
end
return initial..yale_tone(a,b,tone)..c
end)
if text:find("?") then
return false
end
return text
end
function export.jyutping_to_cantonese_pinyin(text)
if type(text) == "table" then text = text.args end
if text:find("oe") then -- unsupported finals
return false
end
text = text:gsub("yu","y")
:gsub("eo",{eoi="oey",eon="oen",eot="oet"})
:gsub("",{z="dz",c="ts"})
:gsub("()()(%-?)(?)",function(a,b,c,d)
return a .. (entering_tones or b) .. c .. (entering_tones or d)
end)
return text
end
function export.jyutping_to_guangdong(text)
if type(text) == 'table' then text = text.args end
-- unsupported finals
if text:find("%fa%d") or text:find("oe")
or text:find("") or text:find("e") then
return false
end
text = text:gsub("yu","ü")
:gsub("j","y")
:gsub("%f",{z="j",c="q",s="x"}) -- ü=\xC3\xBC
:gsub("()ü","%1u")
:gsub("eoi","êu")
:gsub("?",{aa="a",a="e",e="é",oe="ê",eo="ê"})
:gsub("()u","%1o")
:gsub("()w","%1u")
:gsub("%f",{p="b",t="d",k="g"})
return text
end
-- substitute changed tones for finding homophones
function export.jyutping_format(text)
text = text:gsub("%-()", "%1")
return split(text," / ")
end
local function Consolas(text)
return '<span style="font-family: Consolas, monospace;">' .. text .. "</span>"
end
local function format_IPA(text)
return '<span class="IPA">' .. text .. "</span>"
end
local function make_superscript(text)
return (text:gsub("%d*%f", "<sup>%0</sup>"))
end
-- the only allowed punctuations are:
--- ", ": represents a comma (or a break of any sort)
--- "...": represents a slot where a text can go to (e.g. ])
--- ",": separates alternate readings
local function validate(c_rom)
if c_rom:find("") then error("Invalid tone in Jyutping.") end
if c_rom:find("") then error("Please do not capitalize the Jyutping.") end
if c_rom:find("%-") then error("Please do not hyphenate the Jyutping.") end
if c_rom:find("") then error("Error in Jyutping: please use spaces to separate syllables.") end
if c_rom:find("h") then error("'zh'/'ch'/'sh' are non-valid Jyutping, use 'z'/'c'/'s' instead.") end
if c_rom:find("y") then error("Wrong usage of 'y' in Jyutping.") end
if c_rom:find("oei") then error("Invalid rime oei in Jyutping. Did you mean eoi?") end
if c_rom:find("eong") then error("Invalid rime eong in Jyutping. Did you mean oeng?") end
if c_rom:find("eok") then error("Invalid rime eok in Jyutping. Did you mean oek?") end
if c_rom:find("r") then error("Invalid letter \"r\" in Jyutping.") end
if c_rom:find("%d%d") then error("Invalid Jyutping: please use a hyphen to indicate a changed tone.") end
c_rom = c_rom:gsub("^%.%.%.",""):gsub("%.%.%.%f",""):gsub("%.%.%."," "):gsub(", ?"," ")
if c_rom:find("^ ") or c_rom:find(" ") or c_rom:find(" $") then
error("Empty syllable detected.")
end
if c_rom:find("") then
error("Invalid character found.")
end
-- ensure that each syllable matches `^%l+%d%-?%d?$`
for syllable in c_rom:gmatch("%S+") do
if not syllable:match("^%l+%d%-?%d?$") then
error("Invalid Jyutping syllable: " .. syllable)
end
end
end
-- generate the shown text of Standard Cantonese
-- if the pagename is one character long, then generate links to all the readings
function export.show_standard(c_rom, is_single_hanzi)
validate(c_rom)
c_rom = c_rom:gsub(",%f"," / ")
if is_single_hanzi then
for reading in c_rom:gmatch("+") do
require('Module:debug').track('yue-pron/'..reading)
end
c_rom = c_rom:gsub("(%l+)(%d%-?%d?)","]")
else
c_rom = make_superscript(c_rom)
end
return c_rom
end
-- generate the collapsed text of Standard Cantonese
-- generate all the different romanisations, as well as homophones
function export.hide_standard(c_rom, is_single_hanzi)
local res = ""
c_rom = c_rom:gsub(",%f"," / ")
-- generate IPA first because the error-catching is located there
local c_ipa = export.jyutping_to_ipa(c_rom)
local c_yale = export.jyutping_to_yale(c_rom)
local c_cp = export.jyutping_to_cantonese_pinyin(c_rom)
local c_gd = export.jyutping_to_guangdong(c_rom)
local c_hom = mw.loadData("Module:yue-pron/hom")
local c_hom_exists = false
for _,c_first in ipairs(export.jyutping_format(c_rom)) do
if c_hom then
c_hom_exists = c_first
break
end
end
res = res .. "\n** <small>(<i>], ]–]</i>)</small>"
if not c_hom_exists and not is_single_hanzi then
res = res .. '<sup><small><abbr title="Add Cantonese homophones"><span class="plainlinks">['
res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{="edit"}))
res = res .. " +]</span></abbr></small></sup>"
end
res = res .. "\n*** <small><i>]</i></small>: "
res = res .. Consolas(make_superscript(c_rom))
if c_yale then
res = res .. "\n*** <small><i>]</i></small>: "
res = res .. Consolas(c_yale)
end
if c_cp then
res = res .. "\n*** <small><i>]</i></small>: "
res = res .. Consolas(make_superscript(c_cp))
end
if c_gd then
res = res .. "\n*** <small><i>]</i></small>: "
res = res .. Consolas(make_superscript(c_gd))
end
res = res .. "\n*** <small>Sinological ] <sup>(])</sup></small>: "
res = res .. format_IPA("/" .. c_ipa .. "/")
if c_hom_exists then
res = res .. '\n*** <small>Homophones</small>: <table class="wikitable mw-collapsible mw-collapsed" style="width:15em;margin:0;'
res = res .. 'position:left; text-align:center"><tr><th></th></tr><tr><td><div style="float: right; clear: right;"><sup>'
res = res .. '<span class="plainlinks">['
res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{="edit"}))
res = res .. ' edit]</span></sup></div><div style="visibility:hidden; float:left"><sup><span style="color:#FFF">edit</span></sup></div>'
local hom_text = {}
local yue = require("Module:languages").getByCode("yue")
for _,hom in ipairs(c_hom) do
table.insert(hom_text, require("Module:links").full_link( { term = hom, lang = yue, tr = "-" } ))
end
res = res .. table.concat(hom_text, "<br>")
res = res .. "</td></tr></table>"
res = res .. "]"
end
return res
end
function export.jyutping_headword(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
= {},
})
return require("Module:headword").full_headword{
lang = require("Module:languages").getByCode("yue"),
sc = require("Module:scripts").getByCode("Latn"),
heads = {make_superscript(args.head or mw.loadData("Module:headword/data").pagename)},
pos_category = "jyutping"
}
end
return export