local export = {}
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local gsplit = mw.text.gsplit
function export.extract_gloss(content, useetc)
local senses = {}
local len = mw.ustring.len
local literally = match(content, 'zh%-forms*|lit=(+)')
local sense_id = 0
local etc = false
local translingual_section, zh_section, j, pos, section
while true do
-- Find language sections beginning with ==...== and ending with the same
-- or an empty string. Grab the Chinese and Translingual ones.
_, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.-)\n==%f", pos)
if j == nil then
i, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.+)", pos)
end
if j == nil then
break
else
-- Move to the beginning of "==" at the end of the current match.
pos = j - 1
end
if language_name == 'Translingual' then
translingual_section = section
elseif language_name == 'Chinese' then
zh_section = section
break
end
end
if not zh_section then
zh_section = translingual_section
if not zh_section then
return ""
end
elseif translingual_section then -- also use translingual section if Chinese section contains only rfdef
zh_section = zh_section..translingual_section
end
-- Delete etymology sections, because they sometimes contain ordered lists,
-- which would then be interpreted as definitions.
zh_section = zh_section:gsub("\n===+Etymology.-(\n==)", "%1")
for sense in zh_section:gmatch('\n# (+)') do
if not sense:match('rfdef') and not sense:match('defn') then
sense_id = sense_id + 1
if sense_id > 2 then
etc = true
break
end
table.insert(senses, sense)
end
end
gloss_text = (literally and literally .. "; " or "") .. (senses or "")
local gloss_text_extend = gloss_text .. (senses and "; " .. senses or "")
gloss_text = (len(gloss_text) < 80 and len(gloss_text_extend) < 160) and gloss_text_extend or gloss_text
if gloss_text ~= gloss_text_extend then etc = true end
local function replace_gloss(text)
local function replace_wp(text)
return text:gsub('{{w|(+)|?(*)}}',
function(w_link, w_display)
return ']'
end)
end
if text:find("{{") then
text = replace_wp(text)
text = text:gsub(' %({{taxlink+}}%)', '')
:gsub('{{zh%-l|%*(*)}}', '%1')
:gsub('{{lb|zh|*}}', '')
:gsub('{{zh%-erhua form of|word=+}}', '')
:gsub('{{zh%-erhua form of|(+)}}', '%1')
:gsub('{{zh%-alt%-name|+|(+)}}', '%1')
:gsub('{{zh%-short%-comp|+|t=(+)*}}', '%1')
:gsub('{{zh%-short%-comp|+}}', '')
:gsub('{{zh%-classifier|+|t=(+)*}}', '%1')
:gsub('{{zh%-classifier|+}}', '')
:gsub('{{zh%-alt%-form|+}}', '')
:gsub('{{zh%-+|+|(+)}}', '%1')
:gsub('{{place|zh|*t=(+)*}}', '%1')
:gsub('{{vern', '{{w')
:gsub('|', "|")
end
text = text:gsub('( ?)(+++)', function(space, captured)
local taxlink = captured:match("{{taxlink|(+)")
local wiki_link =
taxlink and "''" .. taxlink .. "''" or
(match(captured, "({{w|.+}})") or false)
return wiki_link and space..wiki_link or "" end)
text = mw.text.split(text, ';')
local text_sec = {}
for _, s in ipairs(text) do
if s:find'%w' then
table.insert(text_sec, (s:gsub('^%s+',''):gsub('%s+$','')))
end
end
return table.concat(text_sec, '; ')
end
gloss_text = replace_gloss(gloss_text)
gloss_text = replace_gloss(gloss_text)
if etc and useetc and gloss_text ~= "" then
gloss_text = gloss_text .. "; etc."
elseif gloss_text:find("{{") then --temporary solution to suppress wikitext issues
gloss_text = ""
end
return gloss_text
end
function export.is_redirect(frame)
if mw.title.new( frame.args ).isRedirect then
return 1
else
return 0
end
end
function export.link(text)
return require("Module:links").language_link(text, nil, require("Module:languages").getByCode("zh"))
end
local function ine(var)
if var == "" then
return nil
else
return var
end
end
local tones = ''
local py_tone = {
= '1',
= '2',
= '3',
= '4'
}
function export.py_transform(text, detone, not_spaced)
if type(text) == 'table' then text, detone, not_spaced = text.args, text.args, text.args end
if find(text, '') then
error("Pinyin contains the hidden character: (U+200B). Please remove that character from the text.")
end
detone = ine(detone)
not_spaced = ine(not_spaced)
text = gsub(gsub(mw.ustring.toNFD(text), mw.ustring.toNFD('ê'), 'ê'), mw.ustring.toNFD('ü'), 'ü')
if find(mw.ustring.lower(text), '' .. tones .. '?' .. tones .. '') and not not_spaced then
error(("Missing apostrophe before null-initial syllable - should be \"%s\" instead."):format(gsub(text, '(' .. tones .. '?)(' .. tones .. ')', "%1'%2"))) end
original_text = text
text = gsub(text,'()()(' .. tones .. ')', '%1%3%2')
text = gsub(text,'()(' .. tones .. ')()', '%1%3%2')
if text ~= original_text then
error("Incorrect diacritic placement in Pinyin - should be \"".. text .. "\" instead.") end
text = mw.ustring.lower(text)
if not mw.ustring.find(text, tones) and text:find('') then
return gsub(text, '(%d)(%l)', '%1 %2')
end
text = gsub(text, "#", " #")
if find(text, '') then
text = gsub(text, '()$', { = ' yī', = ' bù'})
text = gsub(text, '()', ' %1 ')
text = gsub(text, '()', ' %1 ')
text = gsub(text, ' +', ' ')
text = gsub(text, '^ ', '')
text = gsub(text, ' $', '')
text = gsub(text, '%. %. %.', '...')
end
text = gsub(text, "", ' ')
text = gsub(text, '(' .. tones .. '?n?g?r?)(h?)', '%1 %2')
text = gsub(text, ' ()$', '%1')
text = gsub(text, ' () ', '%1 ')
if detone then
text = gsub(text, tones, py_tone)
text = gsub(text, '()(*)', '%2%1')
text = gsub(text, '() ', '%15 ')
text = gsub(text, '()$', '%15')
end
if not_spaced then
text = gsub(text, ' ', '')
end
return mw.ustring.toNFC(text)
end
function export.py_tongyong(text)
if type(text) == 'table' then text = text.args end
local ty_tone = {
= "", = "\204\129", = "\204\140", = "\204\128", = "\204\138"
}
local function num_to_mark(syllable, tone)
tone = ty_tone
if tone ~= "" then
if syllable:find('') then
syllable = syllable:gsub("()", "%1" .. tone)
elseif syllable:find('o') then
syllable = syllable:gsub("(o)", "%1" .. tone)
elseif syllable:find('') then
syllable = syllable:gsub("()", "%1" .. tone)
end
end
return syllable
end
local words = {}
for word in gsplit(text, " ") do
local cap = word:find("^")
word = export.py_transform(word, true)
local syllables = {}
for syllable in gsplit(word, " ") do
syllable = syllable:gsub("(h?)i", "%1ih")
syllable = syllable:gsub("ü", "yu")
syllable = syllable:gsub("()u", "%1yu")
syllable = syllable:gsub("iu", "iou")
syllable = syllable:gsub("ui", "uei")
syllable = syllable:gsub("()eng", "%1ong")
syllable = syllable:gsub("wen", "wun")
syllable = syllable:gsub("iong", "yong")
syllable = syllable:gsub("^zh", "jh")
syllable = syllable:gsub("^q", "c")
syllable = syllable:gsub("^x", "s")
syllable = #syllables ~= 0 and syllable:gsub("^()", "'%1") or syllable
syllable = syllable:gsub("^(+)()$", num_to_mark)
table.insert(syllables, syllable)
end
word = table.concat(syllables, "")
word = cap and word:gsub("^.", string.upper) or word
table.insert(words, word)
end
return mw.ustring.toNFC(table.concat(words, " "))
end
function export.pfs_check_invalid(text)
local correct = mw.ustring.toNFD(text) .. "-"
local accent = ""
local switch = "%1%3%2%4"
correct = gsub(correct, "(o)()(" .. accent .. ")()", switch)
--correct = gsub(correct, "(o)(" .. accent .. ")()(?ⁿ?)", switch)
--correct = gsub(correct, "(oa)(i)(" .. accent .. ")(h?ⁿ?)", switch)
--correct = gsub(correct, "(a)()(" .. accent .. ")(h?ⁿ?)", switch)
--correct = gsub(correct, "(i)(" .. accent .. ")()(u??g?ⁿ?)", switch)
--correct = gsub(correct, "(ia)(u)(" .. accent .. ")(h?ⁿ?)", switch)
--correct = gsub(correct, "(u)(i)(" .. accent .. ")(?)", switch)
--correct = gsub(correct, "(e)(e)(" .. accent .. ")(h?ⁿ?)", switch)
--correct = gsub(correct, "(o" .. accent .. ")", "%1͘")
correct = mw.ustring.toNFC(gsub(correct, "-$", ""))
--if text ~= correct then
--error("invalid poj \"" .. gsub(text, "-$", "") .. "\": correct poj is \"" .. correct .. "\"")
--end
return correct
end
function export.gd_to_ipa(text)
local initial_conv = {
= "p", = "pʰ", = "m", = "f", = "ʋ",
= "t", = "tʰ", = "n", = "l",
= "k", = "kʰ", = "ŋ", = "h", = "",
= "t͡s", = "t͡sʰ", = "s",
= "t͡ɕ", = "t͡ɕʰ", = "ɕ"
}
local final_conv = {
= "z̩", = "i", = "u",
= "a", = "ia", = "ua",
= "e", = "ie", = "ue",
= "o", = "io", = "uo",
= "m̩", = "n̩",
= "aɪ", = "iaɪ", = "uaɪ",
= "oɪ",
= "uɪ", = "iuɪ",
= "au", = "iau",
= "eu",
= "iu",
= "əm", = "im",
= "am", = "iam",
= "ɛm",
= "ən", = "in",
= "an", = "ian", = "uan",
= "ɛn", = "iɛn", = "uɛn",
= "ɔn", = "iɔn", = "uɔn",
= "un", = "iun",
= "aŋ", = "iaŋ", = "uaŋ",
= "ɔŋ", = "iɔŋ", = "uɔŋ",
= "ʊŋ", = "iʊŋ",
= "əp̚", = "ip̚",
= "ap̚", = "iap̚",
= "ɛp̚",
= "ət̚", = "it̚",
= "at̚", = "iat̚", = "uat̚",
= "ɛt̚", = "iɛt̚", = "uɛt̚",
= "ɔt̚",
= "ut̚", = "iut̚",
= "ak̚", = "iak̚", = "uak̚",
= "ɔk̚", = "iɔk̚", = "uɔk̚",
= "ʊk̚", = "iʊk̚"
}
local tone_conv = {
= "⁴⁴", = "¹¹",
= "³¹",
= "⁵³",
= "¹", = "⁵",
= "⁴⁴⁻³⁵",
= "⁵³⁻⁵⁵"
}
local palatal = {
= 'c',
= 'cʰ',
= 'ɲ',
= 'ç'
}
if type(text) == 'table' then text = text.args end
local syllables = mw.text.split(mw.ustring.gsub(text, 'gd=', ''), ' ')
local initial, final, tone, ipa, result = {}, {}, {}, {}, {}
for i, syllable in ipairs(syllables) do
initial = mw.ustring.match(syllable, "^?g?")
final = mw.ustring.match(mw.ustring.sub(syllable, mw.ustring.len(initial) + 1, -1), "^*")
final = mw.ustring.gsub(mw.ustring.gsub(final, "^yi", "i"), "^y", "")
if mw.ustring.find(initial, "") and final == "i" then
final = "ii"
end
if final == "" then
final = initial
initial = ""
end
tone = mw.ustring.match(syllable, "$")
end
for i, syllable in ipairs(syllables) do
initial = (mw.ustring.find(final, "^i") and palatal] or initial_conv]) or error(("Unrecognised initial: \"%s\""):format(initial))
final = final_conv] or error(("Unrecognised final: \"%s\""):format(final))
if mw.ustring.match(tone, "") and mw.ustring.match(tone or "", "") then
tone = tone .. "*"
end
tone = tone_conv]
ipa = initial .. final .. tone
end
return table.concat(ipa, " ")
end
function export.pfs_to_hrs(text)
if type(text) == 'table' then text = text.args end
local syllables = mw.text.split(mw.ustring.gsub(mw.ustring.gsub(mw.ustring.lower(text), 'pfs=', ''), ' ', '-'), "-")
for i, syllable in ipairs(syllables) do
-- change consonants
syllable = mw.ustring.gsub(syllable,'',{='b',='d',='g',='i'})
syllable = mw.ustring.gsub(syllable,'h',{='p',='t',='k',='z'})
syllable = mw.ustring.gsub(syllable,'zh','c')
local palatal = {='j',='q',='x',=''}
syllable = mw.ustring.gsub(syllable,'()()', function(a,b) return palatal..b end)
-- find tones
local tone = ''
if mw.ustring.find(syllable, '') then
tone = '´'
elseif mw.ustring.find(syllable, '') then
tone = 'ˇ'
elseif mw.ustring.find(syllable, '') or
(mw.ustring.find(syllable, '$') and not mw.ustring.find(syllable, '̍')) then
tone = '`'
end
-- remove tone marks and fix vowels
local final_conv = {
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'a', = 'e', = 'i', = 'o', = 'u', = '',
= 'n', = 'n',
= '',
= 'ii',
}
syllable = mw.ustring.gsub(syllable, '', final_conv)
syllable = mw.ustring.gsub(syllable, 'o()', 'u%1')
-- add new tone marks
syllables = syllable .. tone
end
return table.concat(syllables, " ")
end
function export.test()
local a = "abc"
local b = "abc"
local c = {}
c = 5
return (c == c)
end
return export