Module:zh-translit

Hello, you have come here looking for the meaning of the word Module:zh-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:zh-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:zh-translit in singular and plural. Everything you need to know about the word Module:zh-translit you have here. The definition of the word Module:zh-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:zh-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate Chinese language text. It is also used to transliterate Eastern Min, Jin, Mandarin, Southern Pinghua, Gan, Xiang, Middle Chinese, Literary Chinese, Northern Min, Teochew, Old Chinese, Wu, Cantonese, Sichuanese, and Taishanese. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:zh-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local find_templates = require("Module:template parser").find_templates
local get_section = require("Module:pages").get_section
local gsub = string.gsub
local insert = table.insert
local safe_require = require("Module:load").safe_require
local split = m_str_utils.split
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local uupper = m_str_utils.upper

local tag

local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr

local export = {}

local function fail(lang, request)
	require("Module:debug/track")("zh-translit/needs manual translit/" .. lang)
	return nil
end

local function get_content(title)
	local content = mw.title.new(title)
	if not content then
		return false
	end
	return get_section(content:getContent(), "Chinese", 2)
end

-- Match function for regex ",(?! )".
local function split_on_comma_without_space(str, start)
	local i
	repeat
		i = str:find(",", start)
		if not i then
			return
		end
		start = i + 1
	until str:sub(start, start) ~= " "
	return i, i
end

local function handle_readings(readings, lang, tr)
	if lang == "ltc" or lang == "och" then
		if tr and readings ~= tr then
			return false
		end
		return readings
	elseif (
		lang == "cmn" or
		lang == "csp" or
		lang == "wuu" or
		lang == "yue" or
		lang == "zhx-tai"
	) then
		readings = split(readings, split_on_comma_without_space, true)
	else
		readings = split(readings, "/", true, true)
	end
	local tr_orig = tr
	for _, reading in ipairs(readings) do
		reading = trim(reading)
		if not reading:find("=") then
			if (
				not tr or
				tr == reading or
				gsub(ulower(tr), "%^", "") == reading
			) then
				tr = reading
			elseif ulower(reading) ~= tr then
				return false
			end
		elseif lang == "cmn" and reading == "cap=y" then
			local tr_cap = "^" .. tr
			if not tr_orig or tr_orig == tr_cap then
				tr = tr_cap
			end
		end
	end
	return tr
end

local function iterate_content(content, lang, see, seen, tr)
	content = content:gsub("<ref>.+</ref>", "")
	for template in find_templates(content) do
		local name = template:get_name()
		if name == "zh-pron" then
			for k, v in pairs(template:get_arguments()) do
				if (
					#v > 0 and
					type(k) == "string" and
					k == lect_code
				) then
					tr = handle_readings(v, lang, tr)
					break
				end
			end
			if tr == false then
				return tr
			end
		elseif name == "zh-see" then
			local arg = trim(template:get_arguments())
			if not seen then
				insert(see, arg)
			end
		end
	end
	return tr
end

function export.tr(text, lang, sc)
	if (not text) or text == "" then
		return text
	end
	
	if lang == "zh" or lang == "lzh" then
		lang = "cmn"
	end
	
	if not lect_code then
		lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
	end
	
	local content = get_content(text)
	if not content then
		return fail(lang)
	end
	
	local see = {}
	local seen = {
		 = true
	}
	local tr = iterate_content(content, lang, see, seen)
	
	if tr == nil then
		local i, title = 1
		while i <= #see do
			title = see
			content = get_content(title)
			if content then
				tr = iterate_content(content, lang, see, seen, tr)
				if tr == false then
					return fail(lang)
				end
				seen = true
			end
			i = i + 1
		end
	end
	
	if not tr then
		return fail(lang)
	end
	
	if lang == "cmn" then
		tr = tr:gsub("#", "")
		if tr:match("") then
			tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
			tr = tr:gsub(".*", function(m)
				if m == "一" then
					return "yī"
				elseif m == "不" then
					return "bù"
				else
					m = tag and tag
					if m then
						return toNFD(m):gsub("^", "\1%0") -- temporarily use \1 for apostrophes, as it's not in %p
					end
				end
			end)
			tr = ugsub(tr, "%f(^?)\1", "%1") -- remove any initial apostrophes inserted by the previous function
				:gsub("\1", "'")
		end
		tr = ugsub(tr, "%^('?.)", uupper)
	elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then
		tr = tr:gsub("%d*%f", "<sup>%0</sup>")
	elseif lang == "hak" then
		-- TODO
	elseif lang == "ltc" or lang == "och" then
		if tr == "n" then
			return fail(lang)
		end
		local index = tr and split(tr, lang == "ltc" and "," or ";", true, true) or {}
		for i = 1, ulen(text) do
			local module_type = lang .. "-pron"
			if lang == "och" then
				module_type = module_type .. "-ZS"
			end
			
			local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i))
			
			if not data_module or (((not index) or index == "y") and #data_module > 1) then
				return fail(lang)
			end
			
			if index == "y" then
				index = 1
			elseif index then
				index = tonumber(index)
			end
			
			index = index and data_module] or data_module
			
			if lang == "ltc" then
				local data = mw.loadData("Module:ltc-pron/data")
				local initial, final, tone = require("Module:ltc-pron").infer_categories(index)
				tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
				index = data.initialConv .. data.finalConv .. tone
			else
				index = index
			end
		end
		tr = table.concat(index, " ")
		if lang == "och" then
			tr = "*" .. tr
		end
	elseif lang == "nan" then
		-- TODO
	elseif lang == "nan-tws" then
		tr = require("Module:nan-pron").pengim_display(tr)
	elseif lang == "wuu" then
		local w_pron = require("Module:wuu-pron")
		if tr:match(';') then
			--TODO
			return fail(lang)
		elseif tr:match(':') then
			tr = w_pron.wugniu_format(tr:sub(4))
		else
			tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr))
		end
	elseif lang == "zhx-sic" then
		tr = ugsub(tr, "()(%a)", "%1 %2")
			:gsub("%d*%f", "<sup>%0</sup>")
	else
		tr = require("Module:" .. lang .. "-pron").rom(tr)
	end
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return tr .. " "
end

return export