This is a fork of Module:zh. Functions in this module should not be used in any pages by transclusion (Special:WhatLinksHere/Module:zh-new); instead, they should only be used if substituted.
local M = require('Module:zh')
local len = mw.ustring.len
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local find = mw.ustring.find
local pos_aliases_title = {
= "Noun",
= "Proper noun",
= "Proper noun",
= "Pronoun",
= "Verb",
= "Adjective",
= "Adjective",
= "Adverb",
= "Preposition",
= "Postposition",
= "Conjunction",
= "Particle",
= "Suffix",
= "Proverb",
= "Idiom",
= "Idiom",
= "Idiom",
= "Phrase",
= "Interjection",
= "Classifier",
= "Numeral",
= "Abbreviation",
= "Determiner",
}
-- This is now used only as an inverse alias table.
local pos_aliases_head = {
= "noun",
= "proper noun",
= "proper noun",
= "verb",
= "adj",
= "post",
= "con",
= "particle",
= "pronoun",
= "proverb",
= "idiom",
= "idiom",
= "idiom",
= "phrase",
= "interj",
= "abbr",
= "cls",
= "numeral",
= "det",
}
local pos_aliases_head_vi = {
= "noun",
= "proper noun",
= "proper noun",
= "verb",
= "adj",
= "post",
= "con",
= "part",
= "pronoun",
= "proverb",
= "idiom",
= "idiom",
= "idiom",
= "phrase",
= "interj",
= "abbr",
= "classifier",
= "num",
= "determ",
}
local function check_pron_nan(title)
local result = M.check_pron(title, 'nan-hbl')
if result then
result = gsub(result, "%-á%-", "-仔-")
result = gsub(result, "%-á/", "-仔/")
result = gsub(result, "%-á$", "-仔")
result = gsub(result, "^(.+)%-%1%-%1$", "(%1)")
result = gsub(result, "^(.+)%-%1%-%1()", "(%1)%2")
result = gsub(result, "()(.+)%-%1%-%1$", "%1(%2)")
result = gsub(result, "()(.+)%-%1%-%1()", "%1(%2)%3")
end
return result
end
function M.pytemp(text,comp,pos,p,is_erhua)
local m_cmn_pron = require("Module:zh/data/cmn-pron")
local wordlist_1, wordlist_2, wordlist_3 = require("Module:zh/data/wordlist/1"), require("Module:zh/data/wordlist/2"), require("Module:zh/data/wordlist/3")
if not is_erhua then is_erhua = false end
if type(text) == 'table' then text,comp,pos = text.args,text.args,text.args or 'n' end
comp = comp or ''
local q = {}
local sum = 0
local wordlist_result = wordlist_1 or wordlist_2 or wordlist_3 or nil
local moe_pron = wordlist_result and mw.text.split(wordlist_result, " ") or {}
local textconv = M.ts(text)
local length = len(text)
if is_erhua == true then
length = length - 1
textconv = sub(textconv, 1, length)
end
text = ''
if comp ~= '' and comp ~= '12' and comp ~= '21' and not is_erhua then
for i = 1, len(comp) do
sum = sum + tonumber(sub(comp,i,i))
q = 'y'
end
end
if not p then p={} end
for i = 1, length do
if p and p ~= '' then --pronunciation supplied
text = text .. p
else
local char = sub(textconv,i,i)
if ('一不期績绩蹟跡迹嵌框微突帆藩擊击夾夹鞠拈夕汐矽昔惜息危椰濤涛叔寂馴驯築筑質质播究菌矻識识穴膜餾馏企辱署偽伪蹈諷讽斂敛坊樸朴儲储剖檔档髮轍辙賜赐堤壑酵括懾慑蝸蜗淆攜携崖癌暫暂蟄蛰驟骤液血酪嘌覲幀蕁曳室癬癣亞亚穹褐貯贮淑場场踮鱒跌擁綏胺翕煦伐髮眶湮櫛栉萎閩闽銨铵鑿凿鈸钹謅诌雌綜综摑掴癖梵'):find(char, 1, true) then
text = text .. char
else
char = moe_pron or m_cmn_pron.py or char
if i ~= 1 and find(char,'^') then
char = "'" .. char
end
text = text .. char
end
end
if q == 'y' and i ~= length and not is_erhua and pos ~= 'cy' then text = text .. ' ' end
end
text = gsub(text," '"," ")
if pos == 'pn' or pos == 'propn' then
local characters = mw.text.split(text,' ')
for i = 1, #characters do
characters = mw.language.getContentLanguage():ucfirst(characters)
end
text = table.concat(characters,' ')
end
return text
end
function M.pytemp_er(text,comp,pos,p)
return M.pytemp(text,comp,pos,p,true)
end
function M.hzbox(title,comp,e,alt,gloss,lit,t2,t3,delink)
if type(title) == 'table' then title,comp = title.args,title.args end
local id = M.ts_determ(title)
local text = '{{zh-forms'
if e and e ~= "" then text = text .. '|' .. e end
if id == 'trad' then
text = text .. '|s=' .. M.ts(title)
end
text = text .. ((t2 and t2 ~= "") and ('|t2=' .. t2) or '')
text = text .. ((t3 and t3 ~= "") and ('|t3=' .. t3) or '')
text = text .. ((comp and comp ~= "") and ('|type=' .. comp) or '')
text = text .. ((alt and alt ~= "") and '|alt=' .. alt or '')
text = text .. ((gloss and gloss ~= "") and '|gloss=' .. gloss or '')
text = text .. ((lit and lit ~= "") and '|lit=' .. lit or '')
text = text .. ((delink and delink ~= "") and '|delink=' .. delink or '')
return text .. '}}'
end
function M.hzbox_er(title)
if type(title) == 'table' then title = title.args end
local length = len(title)
local id
if sub(title, length, length) == '兒' then id = 'trad' else id = 'simp' end
title = sub(title, 1, length-1)
local text = '{{zh-hanzi-box|'
if id == 'simp' then
text = (text .. ']]|]}}')
else
text = (text .. ']|]]}}')
end
return text
end
function M.create_er(f)
return M.create(f,true)
end
function M.semantics(text,name,sem)
local orig_text = text
if sem and sem ~= '' then
text = (text .. '\n\n====' .. name .. '====')
if name == 'Derived terms' or name == 'Compounds' then
if sem == 'a' or find(sem, '^a,') then
local zh_der = mw.getCurrentFrame():preprocess('{{subst:zh-new/der' .. gsub(gsub(sem, '^a', ''), ',', "|") .. '}}')
if zh_der == '{{col3|zh|}}' then return orig_text end
text = text .. '\n' .. zh_der
else
text = text .. '\n{{col3|zh|'
for i = 1, #sem do
text = text .. '|' .. sem
end
text = text .. '}}'
end
else
for i = 1, #sem do
text = text .. '\n* {{zh-l|' .. sem .. '}}'
end
end
end
return text
end
local function checkpos(pos)
for poscode,posname in pairs(pos_aliases_head) do
if pos == posname then
return poscode
end
end
for poscode,posname in pairs(pos_aliases_title) do
if pos == posname then
return poscode
end
end
return pos
end
function M.postitle(pos)
pos = pos or ''
if pos == '' then pos = 'n' end
return pos_aliases_title or pos
end
function M.poshead(pos)
return mw.ustring.lower(M.postitle(pos))
end
function M.poshead_vi(pos)
pos = pos or ''
if pos == '' then pos = 'n' end
return pos_aliases_head_vi or pos
end
function M.newDer(frame)
local title = mw.title.getCurrentTitle().subpageText
local prefix = "Module:zh/data/wordlist/"
local args = frame:getParent().args
local limit = args and tonumber(args) or false
local char_pronunciation = args or false
local fold = args or false
local hide_pron = args or false
local big = args or false
local result = {}
for _, arg in ipairs(args) do
table.insert(result, arg)
end
local i = 1
if big then
while i < 3 do
local wordlist = require(prefix .. 'big' .. tostring(i)).list
for _, word in ipairs(wordlist) do
if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then
table.insert(result, word)
end
end
i = i + 1
end
else
while i < 4 do
local wordlist = require(prefix .. tostring(i))
for word, pronunciation in pairs(wordlist) do
if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then
if char_pronunciation then
if mw.text.split(pronunciation, " ") == char_pronunciation then
table.insert(result, word)
end
else
table.insert(result, word)
end
end
end
i = i + 1
end
end
local hash, res = {}, {}
for _, element in ipairs(result) do
local section = mw.text.split(element, ":")
if not hash then
res = element
hash = true
end
end
return "{{col3|zh|" .. table.concat(res, "|") .. "}}"
end
function M.check_yue(title, c)
local ret = {}
if mw.ustring.len(title) > 1 then -- do not do anything on hanzi pages. cf. 宑&diff=49855439
c = gsub(c, ", ", " ")
c = gsub(c, " *%.%.%. *", " ")
for phrase in mw.text.gsplit(c, ",") do
local c_set = mw.text.split(phrase, " ")
i = 0
for ch in mw.text.gsplit(mw.ustring.gsub(title, "", ""), "") do
i = i + 1
if mw.title.new(ch).exists then
local content = mw.title.new(ch):getContent()
local templates = mw.ustring.gmatch(content, "|c=(+)")
local prons = {}
for template in templates do
for indiv_pron in mw.text.gsplit(template, ",") do
prons = true
end
end
if not prons] then
table.insert(ret, "{{attention|yue|Cantonese pronunciation '" .. c_set .. "' not found in the entry " .. ch .. ".}}")
end
end
end
end
end
return #ret > 0 and (table.concat(ret, '\n') .. '\n\n') or ''
end
function M.create(f,is_erhua)
if not is_erhua then is_erhua = false end
local title = mw.title.getCurrentTitle().text
local params = {
= {}, = {alias_of = "type"},
= {list = true, allow_holes = true},
= {list = true, allow_holes = true},
= {list = true, allow_holes = true},
= {list = "e", allow_holes=true}, = {list = "etym", allow_holes=true}, = {list = "etymology", allow_holes=true}, = {list = true, allow_holes=true},
= {}, = {alias_of = "k"}, = {alias_of = "k"},
= {}, = {alias_of = "kt"}, = {alias_of = "kt"}, = {alias_of = "kt"}, = {alias_of = "kt"}, = {alias_of = "kt"}, = {alias_of = "kt"}, = {alias_of = "kt"},
= {}, = {alias_of = "ke"}, = {alias_of = "ke"}, = {alias_of = "ke"},
= {}, = {alias_of = "v"}, = {alias_of = "v"},
= {}, = {alias_of = "ve"}, = {alias_of = "ve"}, = {alias_of = "ve"}, = {alias_of = "ve"},
= {list = true, allow_holes=true}, = {list = true, allow_holes=true}, = {list = true, allow_holes=true},
= {},
= {}, = {alias_of = "e"}, = {alias_of = "e"}, = {alias_of = "e"}, = {alias_of = "e"}, = {alias_of = "e"},
= {list = true}, = {list = true},
= {list = true}, = {list = true},
= {list = true}, = {list = true},
= {list = true}, = {list = true},
= {list = true}, = {list = true}, = {list = true},
= {list = true}, = {list = true}, = {list = true}, = {list = true},
= {list = true}, = {list = true},
= {list = true}, = {list = true}, = {list = true}, = {list = true},
= {}, = {alias_of = "wp"}, = {alias_of = "wp"},
= {list = true},
= {list = true},
= {list = true},
= {}, = {alias_of = "pic"}, = {alias_of = "pic"}, = {alias_of = "pic"},
= {}, = {alias_of = "piccap"}, = {alias_of = "piccap"}, = {alias_of = "piccap"},
= {}, = {alias_of = "er"},
= {}, = {alias_of = "tl"}, = {alias_of = "tl"}, = {alias_of = "tl"}, = {alias_of = "tl"}, = {alias_of = "tl"},
= {}, = {alias_of = "a"}, = {alias_of = "a"}, = {alias_of = "a"}, = {alias_of = "a"},
= {}, = {alias_of = "alt"}, = {alias_of = "alt"}, = {alias_of = "alt"}, = {alias_of = "alt"},
= {}, = {alias_of = "c"}, = {alias_of = "c"},
= {}, = {alias_of = "mn"}, = {alias_of = "mn"},
= {}, = {alias_of = "w"}, = {alias_of = "w"},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= {},
}
local args = require("Module:parameters").process(f:getParent().args, params)
local comp = args or ""
local pos = {}
local def = {}
for i=1,math.max(args.maxindex/2, args.maxindex, args.maxindex) do
table.insert(pos, args or args or "")
table.insert(def, args or args or "")
end
local function length(array)
return array.maxindex or #array
end
local function expand(arg)
local result = {}
local maximum = 0
for i=1,#arg do if length(arg) > maximum then maximum = length(arg) end end
local current = nil
for i=1,maximum do
current = nil
for j=1,#arg do
if current then
break
else
current = arg
end
end
current = current or ""
table.insert(result, current)
end
return result
end
local etyms = expand({args, args, args, args})
local ko = args or ""
local kotrans = args or "" -- currently unused
local kodef = args or def or ""
local vi = args or ""
local videf = args or def or ""
local p = expand({args, args, args})
local glyph_origin = args or ""
local etym = args or ""
local syn = expand({args, args})
local ant = expand({args, args})
local hyper = expand({args, args})
local hypo = expand({args, args})
local coo = expand({args, args, args})
local der = expand({args, args, args, args})
local rel = expand({args, args})
local also = expand({args, args, args, args})
local wp = args or ""
local cat = args
local poscat = args
local rawcat = args
local pic = args or ""
local piccap = args or ""
local er = args or ""
local tl = args or ""
local audio = args or ""
local alt = args or ""
local m = args or ""
local m_s = args or ""
local m_x = args or ""
local m_nj = args or ""
local dg = args or ""
local c = args or ""
local c_dg = args or ""
local c_t = args or ""
local c_yj = args or ""
local g = args or ""
local h = args or ""
local j = args or ""
local mb = args or ""
local md = args or ""
local mn = args or ""
local mn_t = args or ""
local mn_l = args or ""
local px = args or ""
local sp = args or ""
local w = args or ""
local x = args or ""
local x_h = args or ""
local x_l = args or ""
local mc = args or ""
local oc = args or ""
local ts = args or ""
local gloss = args or ""
local lit = args or ""
local t2 = args or ""
local t3 = args or ""
local delink = args or ""
local vtype = args or ""
local tlb = args or ""
local text = ''
if not pos or pos == '' then pos = 'n' end
for i=1,#pos do pos = checkpos(pos) end
text = (text .. '==Chinese==\n')
if M.ts_determ(title) == 'simp' and ts ~= "trad" then
return text .. '{{zh-see|' .. M.st(title) .. ('}}'):format(mw.title.getCurrentTitle().text)
end
length = len(title)
local noerhua = sub(title,1,length-1) -- currently unused
local erhua = sub(title,length,length) -- currently unused
text = text .. (is_erhua and M.hzbox_er(title) or M.hzbox(title,comp,table.concat(etyms,'|'),alt,gloss,lit,t2,t3,delink))
if wp ~= '' then text = (text .. '\n{{zh-wp' .. (wp ~= 'y' and '|' .. wp or '') .. '}}') end
if pic ~= '' then text = (text .. '\n]') end
if is_erhua then
text = (text .. '===Pronunciation===\n{{zh-pron\n|m=' .. M.pytemp_er(title,comp,pos,p) .. 'r\n|cat=' .. table.concat(pos,',') .. '\n}}\n\n')
else
text = (text .. '\n\n')
if glyph_origin ~= '' then text = (text .. '===Glyph origin===\n' .. glyph_origin .. '\n\n') end
if etym ~= '' then text = (text .. '===Etymology===\n' .. etym .. '\n\n') end
text = (text .. '===Pronunciation===\n{{zh-pron')
if m ~= '-' then
if m ~= '' then
local m_pron = m
text = (text .. '\n|m=' .. m_pron)
else
local m_pron = gsub(M.pytemp(title,comp,pos,p), ',', ', ')
text = (text .. '\n|m=' .. m_pron)
if er ~= '' then text = (text .. ',er=' .. er) end
if tl ~= '' then text = (text .. ',tl=y') end
end
end
if length == 1 or m_s ~= '' then text = (text .. '\n|m-s=' .. (m_s or "")) end
if length == 1 or m_x ~= '' then text = (text .. '\n|m-x=' .. (m_x or "")) end
if length == 1 or m_nj ~= '' then text = (text .. '\n|m-nj=' .. (m_nj or "")) end
if length == 1 or dg ~= '' then text = (text .. '\n|dg=' .. (dg or "")) end
if c == '' then c = M.check_pron(title, 'yue', length) or '' end
if length == 1 or (c ~= '' and c ~= '-') then text = (text .. '\n|c=' .. (c or "")) end
if length == 1 or c_dg ~= '' then text = (text .. '\n|c-dg=' .. (c_dg or "")) end
if length == 1 or c_t ~= '' then text = (text .. '\n|c-t=' .. (c_t or "")) end
if length == 1 or c_yj ~= '' then text = (text .. '\n|c-yj=' .. (c_yj or "")) end
if length == 1 or g ~= '' then text = (text .. '\n|g=' .. (g or "")) end
if h == '' then h = M.check_pron(title, 'hak') or '' end
if length == 1 or (h ~= '' and h ~= '-') then text = (text .. '\n|h=' .. (h and ("pfs=" .. h) or "")) end
if length == 1 or j ~= '' then text = (text .. '\n|j=' .. (j or "")) end
if length == 1 or mb ~= '' then text = (text .. '\n|mb=' .. (mb or "")) end
if length == 1 or md ~= '' then text = (text .. '\n|md=' .. (md or "")) end
if mn == '' then mn = check_pron_nan(title) or '' end
if length == 1 or (mn ~= '' and mn ~= '-') then text = (text .. '\n|mn=' .. (mn or "")) end
if length == 1 or mn_t ~= '' then text = (text .. '\n|mn-t=' .. (mn_t or "")) end
if length == 1 or mn_l ~= '' then text = (text .. '\n|mn-l=' .. (mn_l or "")) end
if length == 1 or sp ~= '' then text = (text .. '\n|sp=' .. (sp or "")) end
if length == 1 or px ~= '' then text = (text .. '\n|px=' .. (px or "")) end
if length == 1 or w ~= '' then text = (text .. '\n|w=' .. (w or "")) end
if length == 1 or x ~= '' then text = (text .. '\n|x=' .. (x or "")) end
if length == 1 or x_h ~= '' then text = (text .. '\n|x-h=' .. (x_h or "")) end
if length == 1 or x_l ~= '' then text = (text .. '\n|x-l=' .. (x_l or "")) end
if audio ~= '' then text = (text .. '\n|ma=') if audio ~= 'y' then text = (text .. audio) else text = (text .. 'y') end end
if length == 1 or mc ~= '' then text = (text .. '\n|mc=' .. (mc ~= "" and mc or 'y')) end
if length == 1 or oc ~= '' then text = (text .. '\n|oc=' .. (oc ~= "" and oc or 'y')) end
text = (text .. '\n|cat=' .. table.concat(require('Module:table').removeDuplicates(pos),',') .. '\n}}\n\n')
local pcall_success, yue_check = pcall(M.check_yue, title, c)
if pcall_success and c and c ~= '' and c ~= '-' then text = text .. yue_check end
end
text = (text .. '===' .. (length == 1 and "Definitions" or M.postitle(pos)) .. '===\n')
if length == 1 then
text = (text .. '{{head|zh|hanzi}}') -- an alias for 'Han character', see ]
else
if M.poshead(pos) == "verb" then
text = (text .. '{{zh-verb' .. (vtype ~= '' and '|type=' .. vtype or '') .. '}}')
else
text = (text .. '{{head|zh|' .. M.poshead(pos) .. '}}')
end
end
if tlb ~= "" then
text = (text .. ' {{tlb|zh|' .. tlb ..'}}')
end
text = (text.. '\n\n')
if is_erhua then
text = text .. '# {{lb|zh|Mandarin}} {{erhua form'
if def and def ~= '' then text = text .. '|' .. def end
text = text .. '}}'
else
text = (text .. '# ' .. ((def and def ~= "") and def or "{{rfdef|zh}}"))
end
if syn then
if match(syn, "^dial") then
text = text .. "\n\n====Synonyms====\n{{zh-" .. syn:gsub('=', '|') .. "}}"
else
text = M.semantics(text,'Synonyms',syn)
end
end
text = M.semantics(text,'Antonyms',ant)
text = M.semantics(text,'Hypernyms',hyper)
text = M.semantics(text,'Hyponyms',hypo)
text = M.semantics(text,'Coordinate terms',coo)
text = M.semantics(text,length == 1 and 'Compounds' or 'Derived terms',der)
text = M.semantics(text,'Related terms',rel)
for i=2,#pos do
text = text .. '\n\n===' .. M.postitle(pos) .. '===\n'
text = text .. '{{head|zh|' .. M.poshead(pos) .. '}}\n\n'
if is_erhua then
text = text .. '# {{lb|zh|Mandarin}} {{erhua form'
if def ~= '' then text = text .. '|' .. def end
text = text .. '}}'
else
text = text .. '# ' .. def
end
end
if #also > 0 then
text = (text .. '\n\n====See also====')
for i=1,#also do
text = (text .. '\n* {{zh-l|' .. also .. '}}')
end
end
if #cat > 0 or #poscat > 0 or #rawcat > 0 then
text = text .. "\n"
end
local chinese_rawcats = {}
local mandarin_rawcats = {}
local mandarin_poscats = {}
if #rawcat > 0 then
for _, rc in ipairs(rawcat) do
local c = rc:match("^Mandarin (.*)$")
if c then
table.insert(mandarin_poscats, c)
end
if not c then
c = rc:match("^Chinese (.*)$")
if c then
table.insert(poscat, c)
end
end
if not c then
c = rc:match("^(.*Mandarin.*)$")
if c then
table.insert(mandarin_rawcats, c)
end
end
if not c then
table.insert(chinese_rawcats, rc)
end
end
end
if #cat > 0 then text = (text .. "\n{{C|zh|" .. table.concat(cat, "|") .. "}}") end
if #poscat > 0 then text = (text .. "\n{{cln|zh|" .. table.concat(poscat, "|") .. "}}") end
if #mandarin_poscats > 0 then text = (text .. "\n{{cln|cmn|" .. table.concat(mandarin_poscats, "|") .. "}}") end
if #chinese_rawcats > 0 then text = (text .. "\n{{cat|zh|" .. table.concat(chinese_rawcats, "|") .. "}}") end
if #mandarin_rawcats > 0 then text = (text .. "\n{{cat|cmn|" .. table.concat(mandarin_rawcats, "|") .. "}}") end
if ko ~= '' then
text = text .. '\n\n==Korean==\n{{ko-hanjatab}}\n\n===Noun===\n{{ko-noun|hj|hangeul=' .. ko .. '}}\n\n# {{hanja form of|' .. ko .. '|' .. kodef .. '}}'
end
if vi ~= '' then
text = text .. '\n\n==Vietnamese==\n{{vi-hantutab}}\n\n===' .. M.postitle(pos) .. '===\n{{vi-' .. M.poshead_vi(pos) .. '|sc=Hani}}\n\n# {{han tu form of|' .. vi .. '|' .. videf .. '}}'
end
return text
end
return M