A modult a Modul:zh/doc lapon tudod dokumentálni
local M = {}
local len = mw.ustring.len
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local find = mw.ustring.find
local cmn_pron = nil
local function format_Chinese_text(text) return '<span class="Hani" lang="zh">' .. text .. '</span>' end
local function format_rom(text) return text and '<i><span class="tr Latn">' .. text .. '</span></i>' or nil end
local function format_gloss(text) return text and '“' .. text .. '”' or nil end
local function replace_chars(s, tab)
-- use UTF-8 character pattern for speed
return string.gsub(s, "*", tab)
end
function M.ts_determ(f)
local m_ts_data = mw.loadData("Module:zh/data/ts")
local m_st_data = mw.loadData("Module:zh/data/st")
local text = type(f) == 'table' and f.args or f
local i = 0
for cp in mw.ustring.gcodepoint(text) do
local ch = mw.ustring.char(cp)
if m_ts_data.ts then return 'trad' end
if m_st_data.st then if i > 1 then return 'simp' else i = i + 1 end end
end
return (i > 0 and 'simp' or 'both')
end
function M.ts(f)
local m_ts_data = mw.loadData("Module:zh/data/ts")
local text = type(f) == 'table' and f.args or f
text = replace_chars(text, m_ts_data.ts)
return text
end
function M.st(f)
local m_st_data = mw.loadData("Module:zh/data/st")
local text = type(f) == 'table' and f.args or f
text = replace_chars(text, m_st_data.st)
return text
end
function M.py(text, comp, pos, p, is_erhua)
local m_cmn_pron = mw.loadData("Module:zh/data/cmn-pron")
if not is_erhua then is_erhua = false end
if type(text) == 'table' then
text, comp, pos, p, is_erhua = text.args, text.args, text.args, text.args, text.args
end
comp = comp or ''
local q = {}
local sum = 0
local length = len(text)
if is_erhua then length = length - 1 end
local textconv = text
text = ''
if comp ~= '' and comp ~= '12' and comp ~= '21' and not ((pos == 'cy' or pos == 'Idiom' or pos == 'idiom') and length == 4) and not is_erhua then
for i = 1, len(comp) do
sum = sum + tonumber(sub(comp,i,i))
q = 'y'
end
end
if not p then p={} end
local initial = true
for i = 1, length do
if p and p ~= '' then --pronunciation supplied
text = text .. p
else
local char = sub(textconv,i,i)
char = m_cmn_pron.py or m_cmn_pron.py or char
if not is_erhua and not initial and find(char,'^') then
text = text .. "'"
end
text = text .. char
initial = char == sub(textconv,i,i)
and sub(textconv,i-3,i) ~= "</b>" --checks for closing bold tag
and (i-2 == 1 or sub(textconv,i-2,i) ~= "<b>" or sub(textconv,i-3,i) == "^<b>") --checks for opening bold tag
and (i-3 == 1 or sub(textconv,i-3,i) ~= "^<b>") --checks for opening bold tag with capitalization
end
if q == 'y' and i ~= length and not is_erhua then text = text .. ' ' end
end
text = gsub(text, "<b>'", "'<b>") --fix bolding of apostrophe
if is_erhua then text = text .. 'r' end
if pos == 'pn' or pos == 'propn' then
local characters = mw.text.split(text,' ')
for i=1,#characters do
characters = mw.language.getContentLanguage():ucfirst(characters)
end
text = table.concat(characters,' ')
end
return text
end
function M.py_er(text,comp,pos,p)
return M.py(text,comp,pos,p,true)
end
function M.extract_pron(title, variety, cap)
local tr = nil
local title = mw.title.new(title)
local content = title:getContent()
if content then
content = gsub(content, ",()", ";%1")
local template = match(content, "{{zh%-pron*| ?" .. variety .. "=(+)")
cap = cap or find(content, "{{zh%-pron*| ?" .. variety .. "=(+);cap%=y")
if template and template ~= "" then
if cmn_pron == nil then
cmn_pron = require("Module:cmn-pron")
end
tr = cmn_pron.str_analysis(template, 'link')
end
end
if cap then
tr = gsub(tr, '^(.)', mw.ustring.upper)
end
return tr
end
function M.link(frame, mention, args, pagename, no_transcript)
local params = {
= {},
= {},
= {},
= {},
= {},
= {},
= {},
= { alias_of = 'gloss' },
}
if mention then
params = {}
end
local moduleCalled
if args then
moduleCalled = true
end
args = args or frame:getParent().args
if not moduleCalled then
params.required = true
end
args = require("Module:parameters").process(args, params)
if moduleCalled then
if not args then
return ""
end
end
pagename = pagename or mw.title.getCurrentTitle().text
local text, tr, gloss
if args and match(args, '') then
gloss = args
tr = args
text = args .. '/' .. args
else
text = args
if args then
tr = args
gloss = args
else
if args or (args and (match(args, '') or match(args, 'h?y???g?'))) then
tr = args
gloss = args
else
gloss = args
end
end
end
if args then
tr = args
gloss = gloss or args
end
if text then
if not text:match'%%]' then
local words = mw.text.split(text, "/", true)
if #words == 1 and M.ts_determ(words) == 'trad' and not match(words, '%*') then
table.insert(words, M.ts(words))
end
if not tr and not no_transcript and words then
cap = find(words, "^%^")
words = gsub(words, "^%^", "")
if words then
words = gsub(words, "^%^", "")
end
tr = M.extract_pron(words, "m", cap)
end
for i, word in ipairs(words) do
word = gsub(word, '%*', '')
if mention then
words = '<i class="Hani mention" lang="zh">]</i>'
--[[ (disabled to allow links to, for example, a link to 冥王星#Chinese from 冥王星#Japanese. 18 May, 2016)
elseif word == pagename then
word = format_Chinese_text('<b>' .. word .. '</b>')
]]
else
words = format_Chinese_text(']')
end
end
text = table.concat(words, "/")
else
text = require("Module:links").language_link{
term = text,
lang = require("Module:languages").getByCode("zh"),
}
if mention then
text = '<i class="Hani mention" lang="zh">' .. gsub(text, "%*", "") .. '</i>'
else
text = format_Chinese_text(gsub(text, "%*", ""))
end
end
end
if tr == '-' or no_transcript then
tr = nil -- allow translit to be disabled: remove translit if it is "-", just like normal {{l}}
end
local notes = args
local lit = args
if tr or gloss or notes or lit then
local annotations = {}
if tr then
tr = format_rom(tr)
tr = gsub(tr, "'", "'")
tr = gsub(tr, "#", "")
table.insert(annotations, tr)
end
if gloss then
table.insert(annotations, format_gloss(gloss))
end
table.insert(annotations, notes)
if lit then
table.insert(annotations, "literally " .. format_gloss(lit))
end
annotations = table.concat(annotations, ", ")
text = text .. " (" .. annotations .. ")"
end
return text
end
function M.mention(frame)
return M.link(frame, true)
end
function M.check_pron(text, variety, length, entry_creation)
if type(text) == 'table' then text, variety = text.args, text.args end
if not text then
return
end
local startpoint, address = { = 51, = 19968, = 19968 }, { = 'yue-word/%03d', = 'hak-pron/%02d', = 'nan-pron/%03d' }
local unit = 1000
local first_char = sub(text, 1, 1)
local result, success, data
if length == 1 and variety == "yue" then
success, data = pcall(mw.loadData, 'Module:zh/data/Jyutping character')
else
local page_index = math.floor((mw.ustring.codepoint(first_char) - startpoint) / unit)
success, data = pcall(mw.loadData,
('Module:zh/data/' .. address):format(page_index)
)
end
if success then
result = data or false
else
result = false
end
if result then
if variety == "nan" and entry_creation then
result = gsub(result, "%-á%-", "-仔-")
result = gsub(result, "%-á/", "-仔/")
result = gsub(result, "%-á$", "-仔")
result = gsub(result, "^(.+)%-%1%-%1$", "(%1)")
result = gsub(result, "^(.+)%-%1%-%1()", "(%1)%2")
result = gsub(result, "()(.+)%-%1%-%1$", "%1(%2)")
result = gsub(result, "()(.+)%-%1%-%1()", "%1(%2)%3")
end
end
return result
end
function M.der(frame)
local params = {
= { list = true },
= { type = "boolean" },
= {},
= {},
= { type = "boolean" },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local pagename = mw.title.getCurrentTitle().text
local result = {}
local fold = args
local name = args or "Derived terms from"
local title = args and " (<i>" .. args .. "</i>)" or ""
local saurus = (mw.title.getCurrentTitle().nsText == "Thesaurus")
local no_transcript = args
if args then
local m_ts_data = mw.loadData("Module:zh/data/ts")
for _, word in ipairs(args) do
if word then
local formatted_word = {}
if match(word, "/") then
for w in mw.text.gsplit(word, "/") do
table.insert(formatted_word, format_Chinese_text("]"))
end
else
table.insert(formatted_word, format_Chinese_text("]"))
local word_s = replace_chars(word, m_ts_data.ts)
if word_s ~= word then
table.insert(formatted_word, format_Chinese_text("]"))
end
end
table.insert(result, table.concat(formatted_word, "/"))
end
end
else
for _, word in ipairs(args) do
note, word = match(word, ";(.+)") or nil, match(word, "^+")
if word then
local colon_pos = word:find(":")
local split_word = colon_pos and { word:sub(1, colon_pos - 1), word:sub(colon_pos + 1) }
or { word }
local slash_pos = split_word:find("/")
local split_word_main = slash_pos and split_word:sub(1, slash_pos - 1) or split_word
if split_word_main ~= pagename then -- synonyms should not be the same as the title
table.insert(result,
M.link(frame, nil, split_word, pagename, no_transcript) ..
(note and " (''" .. note .. "'')" or ""))
end
end
end
end
return
require("Module:columns/old").create_table(
(len(pagename) > 1 and 2 or 3), -- column number
result, -- terms
true, -- alphabetize
"#F5F5FF", -- background
(((#result > 72 or fold) and not saurus) and true or false), -- whether to collapse
"derivedterms", -- class applied to table containing terms
name .. " " .. format_Chinese_text(pagename) .. title, -- title
nil, -- column width
nil, -- line start (??)
nil -- lang: not needed
)
end
local lang_abbrev = {
= 'Mandarin',
= 'Cantonese', = 'Gan', = 'Hakka', = 'Jin',
= 'Min Dong', = 'Min Nan', = 'Teochew',
= 'Wu', = 'Xiang',
}
function M.cls(frame)
local args = frame:getParent().args
local result, categories = {}, {}
local m_zh_cat = require("Module:zh-cat")
local space = '<span style="padding-left:4px; padding-right:4px"> </span>'
for _, combination in ipairs(args) do
local part = mw.text.split(combination, ":")
local note
if #part == 2 then
local dialect = { "" }
local function annotate(main_text, annotation)
return "<span style=\"border-bottom: 1px dotted #000; cursor:help\" " ..
"title=\"" .. annotation .. "\"><i>" .. main_text .. "</i></span>"
end
for variety in mw.text.gsplit(part, ",") do
table.insert(dialect, annotate(variety, lang_abbrev))
end
note = table.concat(dialect, " ")
else
note = false
end
table.insert(result, M.link(frame, nil, { part or part, tr = "-" }, pagename) .. -- pagename is undefined!
(note or ""))
table.insert(categories, m_zh_cat.categorize("Classifier:" .. (part or part)))
end
return "<span style=\"padding-left:15px; font-size:80%\"><span style=\"background:#EDFFFF\">(''Classifier'': " ..
table.concat(result, ";" .. space) .. ")</span></span>" ..
(mw.title.getCurrentTitle().nsText == "" and table.concat(categories) or "")
end
function M.wikipedia(frame)
local args = frame:getParent().args
local title = mw.title.getCurrentTitle().text
local wp_data = {
= { "Written Standard Chinese<sup>]</sup>", "zh" },
= { "Min Dong", "cdo" },
= { "Gan", "zh" },
= { "Hakka", "hak" },
= { "Classical", "zh" },
= { "Min Nan", "nan" },
= { "Wu", "zh" },
= { "Cantonese", "zh" },
= { "English", "en" },
}
args = args and args or { "zh" }
local result = { '<div class="sister-wikipedia sister-project noprint floatright" style="border: 1px solid #aaa; font-size: 90%; background: #f9f9f9; width: 250px; padding: 4px; text-align: left;"><div style="float: left;">]</div><div style="margin-left: 40px;">] has ' ..
(args and "articles" or "an article") .. ' on:<ul>' }
for _, arg in ipairs(args) do
local lang, pagename = mw.ustring.match(arg, "(.+):(.+)")
if not pagename then lang, pagename = arg, title end
if lang == "zh-classical" then error("Please use lzh instead of zh-classical.")
elseif lang == "zh-yue" then error("Please use yue instead of zh-yue.")
elseif lang == "zh-min-nan" then error("Please use nan instead of zh-min-nan.") end
local lang_data = wp_data or error("" .. lang .. " is not a recognized language.")
local annotation = lang_data or false
if lang == "zh" and not args then
annotation = false
elseif annotation then
annotation = " <span style=\"font-size:80%\">(" .. annotation .. ")</span>"
end
local script = match(pagename, "") and "Hani" or "Latn"
table.insert(result, '<li><b class="' .. script .. '" lang="' .. lang_data .. '">]</b>' .. (annotation or "") .. '</li>')
end
table.insert(result, '</ul></div></div>')
return table.concat(result)
end
function M.syn_saurus(frame, kind)
local args = frame:getParent().args
local title = mw.title.getCurrentTitle().text
local word = args or title
local content = mw.title.new("Thesaurus:" .. word):getContent()
local temp = kind or "syn"
local template = match(content, "{{zh%-" .. temp .. "%-list|(+)}}")
if template and template ~= "" then
local set = {}
for item in mw.text.gsplit(template, "|") do
table.insert(set, item ~= title and item or nil)
end
set = ((args or kind) == "ant") and "Antonyms of" or "Synonyms of"
set = args or nil
if args or #set > 10 then set = 1 end
return '<div style="float: right; clear: right; font-size:60%"><span class="plainlinks">[' ..
tostring(mw.uri.fullUrl("Thesaurus:" .. word, { = "edit" })) ..
' edit]</span></div>' .. frame:expandTemplate{ title = "Template:zh-list", args = set }
else
return ""
end
end
function M.ant_saurus(frame)
return M.syn_saurus(frame, "ant")
end
function M.div(frame)
local args = frame:getParent().args
local m_links = require("Module:links")
local lang = require("Module:languages").getByCode("zh")
local pagename = mw.title.getCurrentTitle().text
local i, result = 1, ""
local function add_link(pagename, description)
local target_page = mw.title.new(pagename .. description)
if target_page.exists and not target_page.isRedirect then
return format_Chinese_text(m_links.language_link({ term = pagename .. description, alt = "~" .. description, lang = lang }))
else
return format_Chinese_text("~" .. m_links.language_link({ term = description, lang = lang }))
end
end
while args do
if i ~= 1 then result = result .. "''separator ''" end
result = result .. add_link(pagename, args)
if i == 1 and args then
local j = 2
result = result .. "'', formerly ''" .. add_link(pagename, args)
while args do
result = result .. "'', ''" .. add_link(pagename, args)
j = j + 1
end
end
i = i + 1
end
result = gsub(result, "separator", match(result, "formerly") and ";" or ",")
return format_Chinese_text("(") .. result .. format_Chinese_text(")")
end
function M.short(frame)
local args = frame:getParent().args
local pinyin = args or false
local gloss = args or false
local nocap = args or false
local notext = args or false
local comb = args or false
local nodot = args or false
local ital = frame.args or false
local noterm = not args or false
local t, s, tr, anno, word = {}, {}, {}, {}, {}
local start = (ital and "<i>" or "") .. (nocap and "s" or "S") .. "hort for " .. (ital and "</i>" or "")
local cat = require("Module:zh-cat").categorize("short")
if comb then
for _, arg in ipairs(args) do
table.insert(word, M.link(frame, nil, { arg }))
end
return start .. table.concat(word, " + ") ..
(gloss and ": " .. format_gloss(gloss) or "") ..
((ital and not nodot) and "<i>.</i>" or "") .. cat
end
for _, arg in ipairs(args) do
cap = find(arg, "^%^")
arg = gsub(arg, "^%^", "")
table.insert(t, "]")
table.insert(s, "]")
if not pinyin then table.insert(tr, M.extract_pron(arg, "m", cap)) end
end
local trad = format_Chinese_text(table.concat(t))
local simp = format_Chinese_text(table.concat(s))
pinyin = pinyin ~= "-" and pinyin or (#tr == #t and table.concat(tr, " ") or false)
table.insert(anno, format_rom(pinyin))
table.insert(anno, format_gloss(gloss))
return (notext and "" or start) .. (noterm and "" or trad .. (trad ~= simp and "/" .. simp or "") ..
((pinyin or gloss) and " (" .. table.concat(anno, ", ") .. ")" or "") ..
((ital and not nodot) and "<i>.</i>" or "")) .. cat
end
function M.extract_gloss(content, useetc)
local senses = {}
local len = mw.ustring.len
local literally = match(content, 'zh%-forms*|lit=(+)')
local sense_id = 0
local etc = false
local translingual_section, zh_section, j, pos, section
while true do
-- Find language sections beginning with ==...== and ending with the same
-- or an empty string. Grab the Chinese and Translingual ones.
_, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.-)\n==%f", pos)
if j == nil then
i, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.+)", pos)
end
if j == nil then
break
else
-- Move to the beginning of "==" at the end of the current match.
pos = j - 1
end
if language_name == 'Translingual' then
translingual_section = section
elseif language_name == 'Chinese' then
zh_section = section
break
end
end
if not zh_section then
zh_section = translingual_section
if not zh_section then
return ""
end
elseif translingual_section then -- also use translingual section if Chinese section contains only rfdef
zh_section = zh_section..translingual_section
end
-- Delete etymology sections, because they sometimes contain ordered lists,
-- which would then be interpreted as definitions.
zh_section = zh_section:gsub("\n===+Etymology.-(\n==)", "%1")
for sense in zh_section:gmatch('\n# (+)') do
if not sense:match('rfdef') and not sense:match('defn') then
sense_id = sense_id + 1
if sense_id > 2 then
etc = true
break
end
table.insert(senses, sense)
end
end
gloss_text = (literally and literally .. "; " or "") .. (senses or "")
local gloss_text_extend = gloss_text .. (senses and "; " .. senses or "")
gloss_text = (len(gloss_text) < 80 and len(gloss_text_extend) < 160) and gloss_text_extend or gloss_text
if gloss_text ~= gloss_text_extend then etc = true end
local function replace_gloss(text)
local function replace_wp(text)
return text:gsub('{{w|(+)|?(*)}}',
function(w_link, w_display)
return ']'
end)
end
if text:find("{{") then
text = replace_wp(text)
text = text:gsub(' %({{taxlink+}}%)', '')
:gsub('{{zh%-l|%*(*)}}', '%1')
:gsub('{{lb|zh|*}}', '')
:gsub('{{zh%-erhua form of|word=+}}', '')
:gsub('{{zh%-erhua form of|(+)}}', '%1')
:gsub('{{zh%-alt%-name|+|(+)}}', '%1')
:gsub('{{zh%-short%-comp|+|t=(+)*}}', '%1')
:gsub('{{zh%-short%-comp|+}}', '')
:gsub('{{zh%-classifier|+|t=(+)*}}', '%1')
:gsub('{{zh%-classifier|+}}', '')
:gsub('{{zh%-alt%-form|+}}', '')
:gsub('{{zh%-+|+|(+)}}', '%1')
:gsub('{{place|zh|*t=(+)*}}', '%1')
:gsub('{{vern', '{{w')
:gsub('|', "|")
end
text = text:gsub('( ?)(+++)', function(space, captured)
local taxlink = captured:match("{{taxlink|(+)")
local wiki_link =
taxlink and "''" .. taxlink .. "''" or
(match(captured, "({{w|.+}})") or false)
return wiki_link and space..wiki_link or "" end)
text = mw.text.split(text, ';')
local text_sec = {}
for _, s in ipairs(text) do
if s:find'%w' then
table.insert(text_sec, (s:gsub('^%s+',''):gsub('%s+$','')))
end
end
return table.concat(text_sec, '; ')
end
gloss_text = replace_gloss(gloss_text)
gloss_text = replace_gloss(gloss_text)
if etc and useetc and gloss_text ~= "" then
gloss_text = gloss_text .. "; etc."
elseif gloss_text:find("{{") then --temporary solution to suppress wikitext issues
gloss_text = ""
end
return gloss_text
end
return M