Module:vi-pron

Hello, you have come here looking for the meaning of the word Module:vi-pron. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:vi-pron, but we will also tell you about its etymology, its characteristics and you will know how to say Module:vi-pron in singular and plural. Everything you need to know about the word Module:vi-pron you have here. The definition of the word Module:vi-pron will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:vi-pron, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

Pronunciation module for Vietnamese.


local export = {}

local m_str_utils = require("Module:string utilities")
local IPA_module = "Module:IPA"
local parse_utilities_module = "Module:parse utilities"

local lang = require("Module:languages").getByCode("vi")

local ugcodepoint = m_str_utils.gcodepoint
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local rmatch = m_str_utils.match
local usub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char

local function rsub(str, from, to)
	return (rsubn(str, from, to))
end

local function track(page)
	require("Module:debug/track")("vi-pron/" .. page)
	return true
end

local function split_on_comma(term)
	if not term then
		return nil
	end
	if term:find(",%s") then
		return require(parse_utilities_module).split_on_comma(term)
	elseif term:find(",") then
		return rsplit(term, ",")
	else
		return {term}
	end
end

--àằầèềìòồờùừỳ áắấéếíóốớúứý ảẳẩẻểỉỏổởủửỷ ãẵẫẽễĩõỗỡũữỹ ạặậẹệịọộợụựỵ
local tone_diacritics = {
	 = 2, -- grave accent = ◌̀
	 = 3, -- acute accent = ◌́
	 = 4, -- hook above = ◌̉
	 = 5, -- tilde = ◌̃
	 = 6, -- dot under = ◌̣
}

local tone_contour = {
	 = {  = "˧˧",  = "˨˩",  = "˧˦",  = "˧˦",  = "˧˩",  = "˦ˀ˥",  = "˧˨ʔ" },
	 = {  = "˧˧",  = "˦˩",  = "˦˧˥",  = "˨˩˦",  = "˧˨",  = "˧˨",  = "˨˩ʔ" },
	 = {  = "˧˧",  = "˨˩",  = "˦˥",  = "˦˥",  = "˨˩˦",  = "˨˩˦",  = "˨˩˨" },
}

local initial_ipa = {
	 = { "ʔɓ", "ʔɓ", "ʔɓ" },
	 = { "k", "k", "k" },
	 = { "t͡ɕ", "t͡ɕ", "c" },
	 = { "z", "j", "j" },
	 = { "ʔɗ", "ʔɗ", "ʔɗ" },
	 = { "ɣ", "ɣ", "ɣ" },
	 = { "ɣ", "ɣ", "ɣ" },
	 = { "z", "j", "j" },
	 = { "z", "j", "j" },
	 = { "h", "h", "h" },
	 = { "k", "k", "k" },
	 = { "x", "kʰ", "kʰ" },
	 = { "l", "l", "l" },
	 = { "m", "m", "m" },
	 = { "n", "n", "n" },
	 = { "ŋ", "ŋ", "ŋ" },
	 = { "ŋ", "ŋ", "ŋ" },
	 = { "ɲ", "ɲ", "ɲ" },
	 = { "p", "p", "p" }, -- foreign
	 = { "f", "f", "f" },
	 = { "k", "k", "k" },
	 = { "kw", "kw", "w" },
	 = { "z", "ʐ", "ɹ" },
	 = { "ɹ", "ɹ", "ɹ" },
	 = { "s", "ʂ", "ʂ" },
	 = { "t", "t", "t" },
	 = { "tʰ", "tʰ", "tʰ" },
	 = { "t͡ɕ", "ʈ", "ʈ" },
	 = { "v", "v", "v" },
	 = { "s", "s", "s" },
	 = { "z", "z", "z" },
	 = { "ʔ", "ʔ", "ʔ" },
	 = { "", "", "" },
}

local mvi_initial_ipa = {
	 = "β",  = "β",
	 = "ɓ",
	 = "k",  = "c",
	 = "ð",  = "ɗ",
	 = "ɣ",  = "ɣ",  = "ʝ",
	 = "h",
	 = "k",  = "kʰ",
	 = "l",
	 = "m",
	 = "n",  = "ŋ",  = "ŋ",  = "ɲ",
	 = "p",  = "pʰ",
	 = "ɹ",
	 = "ʂ",
	 = "t",  = "tʰ",  = "ʈ",
	 = "w",
	 = "ɕ",
}

local final_ipa = {
	 = { "aː", "aː", "aː" },
	 = { "aːk̚", "aːk̚", "aːk̚" },
	 = { "ajk̟̚", "at̚", "at̚" },
	 = { "aːj", "aːj", "aːj" },
	 = { "aːm", "aːm", "aːm" },
	 = { "aːn", "aːŋ", "aːŋ" },
	 = { "aːn", "aːn", "aːn" },
	 = { "aːŋ", "aːŋ", "aːŋ" },
	 = { "ajŋ̟", "ɛɲ", "an" },
	 = { "aːw", "aːw", "aːw" },
	 = { "awŋ͡m", "", "" },
	 = { "aːp̚", "aːp̚", "aːp̚" },
	 = { "aːt̚", "aːk̚", "aːk̚" },
	 = { "aw", "aw", "a(ː)w" },
	 = { "aj", "aj", "a(ː)j" },
	 = { "ak̚", "ak̚", "ak̚" },
	 = { "am", "am", "am" },
	 = { "an", "aŋ", "aŋ" },
	 = { "aŋ", "aŋ", "aŋ" },
	 = { "ap̚", "ap̚", "ap̚" },
	 = { "at̚", "ak̚", "ak̚" },
	 = { "ə", "ə", "ə" },
	 = { "ək̚", "ək̚", "ək̚" },
	 = { "əm", "əm", "əm" },
	 = { "ən", "əŋ", "əŋ" },
	 = { "əŋ", "əŋ", "əŋ" },
	 = { "əp̚", "əp̚", "əp̚" },
	 = { "ət̚", "ək̚", "ək̚" },
	 = { "əw", "əw", "əw" },
	 = { "əj", "əj", "əj" },
	 = { "ɛ", "ɛ", "ɛ" },
	 = { "ɛk̚", "ɛk̚", "ɛk̚" },
	 = { "ɛm", "ɛm", "ɛm" },
	 = { "ɛn", "ɛŋ", "ɛŋ" },
	 = { "ɛn", "ɛn", "ɛn" },
	 = { "ɛŋ", "ɛŋ", "ɛŋ" },
	 = { "ɛw", "ɛw", "ɛw" },
	 = { "ɛp̚", "ɛp̚", "ɛp̚" },
	 = { "ɛt̚", "ɛt̚", "ɛk̚" },
	 = { "e", "ej", "ej" },
	 = { "ek̚", "ek̚", "ek̚" },
	 = { "əjk̟̚", "et̚", "əːt̚" },
	 = { "em", "em", "em" },
	 = { "en", "en", "əːn" },
	 = { "eŋ", "eŋ", "eŋ" },
	 = { "əjŋ̟", "en", "əːn" },
	 = { "ep̚", "ep̚", "ep̚" },
	 = { "et̚", "et̚", "əːt̚" },
	 = { "ew", "ew", "ew" },
	 = { "i", "ɪj", "ɪj" },
	 = { "iə", "iə", "iə" },
	 = { "ïk̟̚", "ïk̟̚", "ïk̟̚" },
	 = { "ïk̟̚", "ɨt̚", "ɨt̚" },
	 = { "iək̚", "iək̚", "iək̚" },
	 = { "iəm", "iəm", "im" },
	 = { "iən", "iəŋ", "iəŋ" },
	 = { "iəŋ", "iəŋ", "iəŋ" },
	 = { "iəp̚", "iəp̚", "ip̚" },
	 = { "iət̚", "iək̚", "iək̚" },
	 = { "iəw", "iw", "iw" },
	 = { "im", "im", "im" },
	 = { "in", "in", "ɨn" },
	 = { "ïŋ", "ɨn", "ɨn" },
	 = { "ip̚", "ip̚", "ip̚" },
	 = { "it̚", "it̚", "ɨt̚" },
	 = { "iw", "iw", "iw" },
	 = { "ɔ", "ɔ", "ɔ" },
	 = { "waː", "waː", "waː" },
	 = { "waːk̚", "waːk̚", "waːk̚" },
	 = { "wajk̟̚", "wat̚", "wat̚" },
	 = { "waːj", "waːj", "waːj" },
	 = { "waːm", "waːm", "waːm" },
	 = { "waːn", "waːŋ", "waːŋ" },
	 = { "waːŋ", "waːŋ", "waːŋ" },
	 = { "wajŋ̟", "wɛɲ", "wan" },
	 = { "waːw", "waːw", "waːw" },
	 = { "waːp̚", "waːp̚", "waːp̚" },
	 = { "waːt̚", "waːk̚", "waːk̚" },
	 = { "waj", "waj", "waj" },
	 = { "wa", "wa", "wa" },
	 = { "wak̚", "wak̚", "wak̚" },
	 = { "wam", "wam", "wam" },
	 = { "wan", "waŋ", "waŋ" },
	 = { "waŋ", "waŋ", "waŋ" },
	 = { "wat̚", "wak̚", "wak̚" },
	 = { "awk͡p̚", "awk͡p̚", "awk͡p̚" },
	 = { "wɛ", "wɛ", "wɛ" },
	 = { "wɛm", "wɛm", "wɛm" },
	 = { "wɛn", "wɛŋ", "wɛŋ" },
	 = { "wɛw", "wɛw", "wɛw" },
	 = { "wɛt̚", "wɛk̚", "wɛk̚" },
	 = { "ɔj", "ɔj", "ɔj" },
	 = { "ɔm", "ɔm", "ɔm" },
	 = { "ɔn", "ɔŋ", "ɔŋ" },
	 = { "awŋ͡m", "awŋ͡m", "awŋ͡m" },
	 = { "ɔk̚", "ɔk̚", "ɔk̚" },
	 = { "ɔŋ", "ɔŋ͡m", "ɔŋ" },
	 = { "ɔp̚", "ɔp̚", "ɔp̚" },
	 = { "ɔt̚", "ɔk̚", "ɔk̚" },
	 = { "o", "ow", "ow" },
	 = { "əwk͡p̚", "əwk͡p̚", "əwk͡p̚" },
	 = { "oj", "oj", "oj" },
	 = { "om", "om", "om" },
	 = { "on", "oŋ", "oŋ" },
	 = { "on", "on", "on" },
	 = { "əwŋ͡m", "əwŋ͡m", "əwŋ͡m" },
	 = { "ok̚", "ok̚", "ok̚" },
	 = { "oŋ", "oŋ", "oŋ" },
	 = { "op̚", "op̚", "op̚" },
	 = { "ot̚", "ok̚", "ok̚" },
	 = { "əː", "əː", "əː" },
	 = { "əːj", "əːj", "əːj" },
	 = { "əːm", "əːm", "əːm" },
	 = { "əːn", "əːŋ", "əːŋ" },
	 = { "əːŋ", "əːŋ", "əːŋ" },
	 = { "əːp̚", "əːp̚", "əːp̚" },
	 = { "əːt̚", "əːk̚", "əːk̚" },
	 = { "u", "ʊw", "ʊw" },
	 = { "uə", "uə", "uə" },
	 = { "waːk̚", "waːk̚", "waːk̚" },
	 = { "wajk̟̚", "wat̚", "wat̚" },
	 = { "waːj", "waːj", "waːj" },
	 = { "waːn", "waːŋ", "waːŋ" },
	 = { "waːŋ", "waːŋ", "waːŋ" },
	 = { "wajŋ̟", "wɛɲ", "wan" },
	 = { "waːw", "waːw", "waːw" },
	 = { "waːp̚", "waːp̚", "waːp̚" },
	 = { "waːt̚", "waːk̚", "waːk̚" },
	 = { "waw", "waw", "wa(ː)w" },
	 = { "waj", "waj", "waj" },
	 = { "wa", "wa", "wa" },
	 = { "wak̚", "wak̚", "wak̚" },
	 = { "wam", "wam", "wam" },
	 = { "wan", "waŋ", "waŋ" },
	 = { "waŋ", "waŋ", "waŋ" },
	 = { "wap̚", "wap̚", "wap̚" },
	 = { "wat̚", "wak̚", "wak̚" },
	 = { "wə", "wə", "wə" },
	 = { "wək̚", "wək̚", "wək̚" },
	 = { "wən", "wəŋ", "wəŋ" },
	 = { "wəŋ", "wəŋ", "wəŋ" },
	 = { "wət̚", "wək̚", "wək̚" },
	 = { "wəj", "wəj", "wəj" },
	 = { "ʊwk͡p̚", "ʊwk͡p̚", "ʊwk͡p̚" },
	 = { "wɛ", "wɛ", "wɛ" },
	 = { "wɛn", "wɛŋ", "wɛŋ" },
	 = { "wɛw", "wɛw", "wɛw" },
	 = { "wɛp̚", "wɛp̚", "wɛp̚" },
	 = { "wɛt̚", "wɛt̚", "wɛt̚" },
	 = { "we", "wej", "wej" },
	 = { "wəjk̟̚", "wet̚", "wəːt̚" },
	 = { "wen", "wen", "wəːn" },
	 = { "wəjŋ̟", "wen", "wəːn" },
	 = { "wet̚", "wet̚", "wəːt̚" },
	 = { "weu", "weu", "wew" },
	 = { "uj", "uj", "uj" },
	 = { "win", "win", "wɨn" },
	 = { "wit̚", "wit̚", "wit̚" },
	 = { "um", "um", "ʊm" },
	 = { "un", "un", "ʊwŋ͡m" },
	 = { "ʊwŋ͡m", "ʊwŋ͡m", "ʊwŋ͡m" },
	 = { "ujŋ̟", "un", "uwn" },
	 = { "wɔ", "wɔ", "wɔ" }, 
	 = { "uə", "uə", "uə" }, 
	 = { "uək̚", "uək̚", "uək̚" },
	 = { "uəj", "uj", "uj" },
	 = { "uəm", "uəm", "uəm" },
	 = { "uən", "uəŋ", "uəŋ" },
	 = { "uəŋ", "uəŋ", "uəŋ" },
	 = { "uət̚", "uək̚", "uək̚" },
	 = { "wəː", "wəː", "wəː" },
	 = { "wəːj", "wəːj", "wəːj" },
	 = { "wəːn", "wəːŋ", "wəːŋ" },
	 = { "wəːt̚", "wəːk̚", "wəːk̚" },
	 = { "up̚", "up̚", "ʊp̚" },
	 = { "ut̚", "ʊk̚", "ʊk͡p̚" },
	 = { "wi", "wɪj", "wɪj" },
	 = { "wiə", "wiə", "wiə" },
	 = { "wïk̟̚", "wɨk̟̚", "wɨt̚" },
	 = { "win", "win", "wɨn" },
	 = { "wïk̟̚", "wɨk̟̚", "wɨt̚" },
	 = { "wiə", "wiə", "wiə" },
	 = { "wiən", "wiəŋ", "wiəŋ" },
	 = { "wiəŋ̟", "wiən", "wən" },
	 = { "wiət̚", "wiək̚", "wiək̚" },
	 = { "wïŋ̟", "wɨn", "wɨn" },
	 = { "wip̚", "wip̚", "wip̚" },
	 = { "wit̚", "wɨt̚", "wɨt̚" },
	 = { "wiw", "wiw", "wiw" },
	 = { "ɨ", "ɨ", "ɨ" },
	 = { "ɨə", "ɨə", "ɨə" },
	 = { "ɨk̚", "ɨk̚", "ɨk̚" },
	 = { "ɨj", "ɨj", "ɨj" },
	 = { "ɨm", "ɨm", "ɨm" },
	 = { "ɨn", "ɨŋ", "ɨŋ" },
	 = { "ɨŋ", "ɨŋ", "ɨŋ" },
	 = { "ɨək̚", "ɨək̚", "ɨək̚" },
	 = { "ɨəj", "ɨj", "ɨj" },
	 = { "ɨəm", "ɨəm", "ɨəm" },
	 = { "ɨən", "ɨəŋ", "ɨəŋ" },
	 = { "ɨəŋ", "ɨəŋ", "ɨəŋ" },
	 = { "ɨəp̚", "ɨəp̚", "ɨəp̚" },
	 = { "ɨət̚", "ɨək̚", "ɨək̚" },
	 = { "iəw", "ɨəw", "ɨəw" },
	 = { "ɨt̚", "ɨk̚", "ɨk̚" },
	 = { "iw", "ɨw", "ɨw" },
	 = { "i", "ɪj", "ɪj" },
	 = { "iək̚", "iək̚", "iək̚" },
	 = { "iəm", "iəm", "iəm" },
	 = { "iən", "iəŋ", "iəŋ" },
	 = { "iəŋ", "iəŋ", "iəŋ" },
	 = { "iəp̚", "iəp̚", "iəp̚" },
	 = { "iət̚", "iək̚", "iək̚" },
	 = { "iəw", "iw", "iw" },
}

local varieties = { 
	 = { "Hà Nội", 1 },
	 = { "Huế", 2 },
	 = { "Hồ Chí Minh City", 3 },
}

local hcmc_opt_w = {
	 = true,
	 = true,
	 = true,
	 = true,
	 = true,  = true,  = true,
	 = true,
}

local variations = {
	 = { { "^ɹ", "z" }, { " ɹ", " z" } },
	 = { { "z", "j" }, { "ʂ", "s" }, { "kʰ", "x" }, { "awŋ͡m", "ɔŋ" } },
	 = { { "ʂ", "s" }, { "v", "j" }, { "kʰ", "x" }, { "z", "j" } },
}

function export.ipa(frame)
	local respellings = {}
	local variety_respellings = {}
	local output = {}
	for variety, _ in pairs(varieties) do
		output = {}
	end
	local output_text = {}
	local iparams = {
		 = {type = "boolean"},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)

	local params = {
		 = {list = true},
		 = {},
		 = {},
		 = {},
		 = {}, -- for testing or documentation
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)

	local pagename = args.pagename or mw.loadData("Module:headword/data").pagename

	local function canonicalize_respelling(respelling)
		if respelling:find("^raw:") then
			return {raw = (respelling:gsub("^raw:", ""))}
		else
			if respelling == "+" then
				respelling = pagename
			end
			return {respelling = rsub(rsub(ulower(respelling), "%-", " "), ",", "")}
		end
	end

	local mvi = iargs.mvi
	local raw_respellings = args
	if not raw_respellings then
		raw_respellings = {"+"}
	end
	for _, raw_respelling in ipairs(raw_respellings) do
		table.insert(respellings, canonicalize_respelling(raw_respelling))
	end
	for variety, _ in pairs(varieties) do
		if args == "-" or (mvi and (variety == "hue" or variety == "hcmc")) then
			variety_respellings = false
		elseif args then
			local variety_raw_respellings = split_on_comma(args)
			variety_respellings = {}
			for _, raw_respelling in ipairs(variety_raw_respellings) do
				table.insert(variety_respellings, canonicalize_respelling(raw_respelling))
			end
		end
	end

	for variety, location in pairs(varieties) do
		if variety_respellings == false then
			-- skip variety
			output = false
		else
			local var_respellings = variety_respellings or respellings
			for i, spec in ipairs(var_respellings) do
				local pronunciation = {}
				if spec.raw then
					table.insert(output, spec.raw)
				else
					for syllable in mw.text.gsplit(spec.respelling, " ", true) do
						local ipa = {}
						local initial, final, tone = nil, nil, nil
						tone = 1
						syllable = toNFD(syllable)
						syllable = rsub(syllable, "(́)", toNFC)

						for diac_pattern, tone_num in pairs(tone_diacritics) do
							if rmatch(syllable, diac_pattern) then
								tone = tone_num
								break
							end
						end
						syllable = toNFC(rsub(syllable, "", ""))
						if syllable == "gi" or syllable == "gin" then
							syllable = rsub(syllable, "gi", "gii")
						end
						initial = rmatch(syllable, "^g+") or rmatch(syllable, "^(g)")
							or rmatch(syllable, "^g") or rmatch(syllable, "^+") or ""
						initial = (rmatch(syllable, "^giê.") and syllable ~= "giên") and "d" or initial
						initial = rmatch(syllable, "qu$") and "qu" or initial
						final = usub(syllable, ulen(initial) + 1, -1)
						local ipa, seq, detoned = {}, location, ""
						if mvi then
							if mvi_initial_ipa then
								table.insert(ipa, mvi_initial_ipa)
							else
								local initial_cluster = ""
								for cc in ugcodepoint(initial) do
									local ch = u(cc)
									initial_cluster = initial_cluster .. mvi_initial_ipa
								end
								table.insert(ipa, initial_cluster)
							end
						elseif initial_ipa then
							table.insert(ipa, initial_ipa)
						else
							local initial_cluster = ""
							initial = rsub(initial, "r$", "ŕ")
							for cc in ugcodepoint(initial) do
								local ch = u(cc)
								initial_cluster = initial_cluster .. initial_ipa
							end
							initial_cluster = rsub(initial_cluster, "(h)", function(digraph)
								return initial_ipa end)
							table.insert(ipa, initial_cluster)
						end
						if final_ipa then
							detoned = rsub(final_ipa, "^(.+)", function(nucleus)
								if initial .. final == "qua" then
									nucleus = final_ipa
								elseif initial .. final == "qui" then
									nucleus = final_ipa
								end
								if variety == "hcmc" then
									if initial == "q" then
										nucleus = rsub(nucleus, "^u", "w")	
									end
									if hcmc_opt_w then
										nucleus = rsub(nucleus, "^w", "⁽ʷ⁾")
									end
								end
								return nucleus end)
							table.insert(ipa, detoned)
						else
							error(("Unrecognised final: \"%s\""):format(final))
						end
						if tone == 3 and rmatch(final, "") then
							tone = "3a"
						end
						table.insert(ipa, tone_contour)
						table.insert(pronunciation, table.concat(ipa, ""))
					end
					table.insert(output, "")
				end
			end
		end
	end

	for variety, location in pairs(varieties) do
		if mvi and variety == "hn" then
			location = "Đông Kinh"
		end
		if output ~= false then
			local ipa_items = {}
			for i, out in ipairs(output) do
				if variety == "hcmc" then
					-- FIXME: Move this up to the pronunciation-computing loop.
					out = rsub(out, "w", "w")
					out = rsub(out, "ʔw", "(ʔ)w")
				end
				table.insert(ipa_items, {pron = out, separator = i == 1 and "" or ", "})
				local alternative = out
				for _, variation in ipairs(variations) do
					alternative = rsub(alternative, variation, variation)
				end
				if alternative ~= out then
					table.insert(ipa_items, {pron = alternative, separator = " ~ "})
				end
			end
			table.insert(output_text, location, "\n* " .. require("Module:IPA").format_IPA_full {
				lang = lang,
				a = {location},
				items = ipa_items,
				separator = "",
			})
		else
			table.insert(output_text, location, "")
		end
	end
	local saw_raw = false
	local actual_respellings = {}
	for _, spec in ipairs(respellings) do
		if spec.raw then
			saw_raw = true
			break
		else
			table.insert(actual_respellings, spec.respelling)
		end
	end
	if not saw_raw and table.concat(actual_respellings, "") ~= ulower(pagename) then
		table.insert(output_text, #output_text + 1, "\n* ''Phonetic spelling'': " .. rsub(table.concat(actual_respellings, ", "), "ŕ", "R"))
	end
	return rsub(table.concat(output_text, ""), "^\n(.)", "%1")
end

return export