This module will transliterate Chinese language text. It is also used to transliterate Eastern Min, Jin, Mandarin, Southern Pinghua, Gan, Xiang, Middle Chinese, Literary Chinese, Northern Min, Teochew, Old Chinese, Wu, Cantonese, Sichuanese, and Taishanese.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:zh-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local m_str_utils = require("Module:string utilities")
local find_templates = require("Module:template parser").find_templates
local get_section = require("Module:pages").get_section
local gsub = string.gsub
local insert = table.insert
local safe_require = require("Module:load").safe_require
local split = m_str_utils.split
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local uupper = m_str_utils.upper
local tag
local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr
local export = {}
local function fail(lang, request)
require("Module:debug/track")("zh-translit/needs manual translit/" .. lang)
return nil
end
local function get_content(title)
local content = mw.title.new(title)
if not content then
return false
end
return get_section(content:getContent(), "Chinese", 2)
end
-- Match function for regex ",(?! )".
local function split_on_comma_without_space(str, start)
local i
repeat
i = str:find(",", start)
if not i then
return
end
start = i + 1
until str:sub(start, start) ~= " "
return i, i
end
local function handle_readings(readings, lang, tr)
if lang == "ltc" or lang == "och" then
if tr and readings ~= tr then
return false
end
return readings
elseif (
lang == "cmn" or
lang == "csp" or
lang == "wuu" or
lang == "yue" or
lang == "zhx-tai"
) then
readings = split(readings, split_on_comma_without_space, true)
else
readings = split(readings, "/", true, true)
end
local tr_orig = tr
for _, reading in ipairs(readings) do
reading = trim(reading)
if not reading:find("=") then
if (
not tr or
tr == reading or
gsub(ulower(tr), "%^", "") == reading
) then
tr = reading
elseif ulower(reading) ~= tr then
return false
end
elseif lang == "cmn" and reading == "cap=y" then
local tr_cap = "^" .. tr
if not tr_orig or tr_orig == tr_cap then
tr = tr_cap
end
end
end
return tr
end
local function iterate_content(content, lang, see, seen, tr)
content = content:gsub("<ref>.+</ref>", "")
for template in find_templates(content) do
local name = template:get_name()
if name == "zh-pron" then
for k, v in pairs(template:get_arguments()) do
if (
#v > 0 and
type(k) == "string" and
k == lect_code
) then
tr = handle_readings(v, lang, tr)
break
end
end
if tr == false then
return tr
end
elseif name == "zh-see" then
local arg = trim(template:get_arguments())
if not seen then
insert(see, arg)
end
end
end
return tr
end
function export.tr(text, lang, sc)
if (not text) or text == "" then
return text
end
if lang == "zh" or lang == "lzh" then
lang = "cmn"
end
if not lect_code then
lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
end
local content = get_content(text)
if not content then
return fail(lang)
end
local see = {}
local seen = {
= true
}
local tr = iterate_content(content, lang, see, seen)
if tr == nil then
local i, title = 1
while i <= #see do
title = see
content = get_content(title)
if content then
tr = iterate_content(content, lang, see, seen, tr)
if tr == false then
return fail(lang)
end
seen = true
end
i = i + 1
end
end
if not tr then
return fail(lang)
end
if lang == "cmn" then
tr = tr:gsub("#", "")
if tr:match("") then
tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
tr = tr:gsub(".*", function(m)
if m == "一" then
return "yī"
elseif m == "不" then
return "bù"
else
m = tag and tag
if m then
return toNFD(m):gsub("^", "\1%0") -- temporarily use \1 for apostrophes, as it's not in %p
end
end
end)
tr = ugsub(tr, "%f(^?)\1", "%1") -- remove any initial apostrophes inserted by the previous function
:gsub("\1", "'")
end
tr = ugsub(tr, "%^('?.)", uupper)
elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then
tr = tr:gsub("%d*%f", "<sup>%0</sup>")
elseif lang == "hak" then
-- TODO
elseif lang == "ltc" or lang == "och" then
if tr == "n" then
return fail(lang)
end
local index = tr and split(tr, lang == "ltc" and "," or ";", true, true) or {}
for i = 1, ulen(text) do
local module_type = lang .. "-pron"
if lang == "och" then
module_type = module_type .. "-ZS"
end
local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i))
if not data_module or (((not index) or index == "y") and #data_module > 1) then
return fail(lang)
end
if index == "y" then
index = 1
elseif index then
index = tonumber(index)
end
index = index and data_module] or data_module
if lang == "ltc" then
local data = mw.loadData("Module:ltc-pron/data")
local initial, final, tone = require("Module:ltc-pron").infer_categories(index)
tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
index = data.initialConv .. data.finalConv .. tone
else
index = index
end
end
tr = table.concat(index, " ")
if lang == "och" then
tr = "*" .. tr
end
elseif lang == "nan" then
-- TODO
elseif lang == "nan-tws" then
tr = require("Module:nan-pron").pengim_display(tr)
elseif lang == "wuu" then
local w_pron = require("Module:wuu-pron")
if tr:match(';') then
--TODO
return fail(lang)
elseif tr:match(':') then
tr = w_pron.wugniu_format(tr:sub(4))
else
tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr))
end
elseif lang == "zhx-sic" then
tr = ugsub(tr, "()(%a)", "%1 %2")
:gsub("%d*%f", "<sup>%0</sup>")
else
tr = require("Module:" .. lang .. "-pron").rom(tr)
end
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return tr .. " "
end
return export