<span class="Hani" lang="cmn">],]。](])]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ <dl><dd><i>vē, vē. vē (vēvē) vēvēvēvēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>roman test: with punctuation</dd></dl> ◆◆◆◆ roman test: with punctuation ◆◆◆◆ 14
<span class="Hani" lang="cmn">]]]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="cmn">]]]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>vēvē vēvēvē vē vēvē vē vēvē?</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>roman test: pinyin</dd></dl> ◆◆◆◆ roman test: pinyin ◆◆◆◆ 12
<span class="Hani" lang="yue">]]]]]。]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="yue">]]]]]。]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>ve1 ve1 ve1 ve1 ve1 ve1 ve1 ve1. paat1 taai1</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>roman test: jyutping</dd></dl> ◆◆◆◆ roman test: jyutping ◆◆◆◆ 13
<span class="Hani" lang="nan">]]]]],]] ]]]]] ],] ]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="nan">]]]]],]] ]]]]] ],] ]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>ve ve-ve ve ve ve-ve, in ve-ve-kiáⁿ teh ve ve-ve-ve 2 ve-á niā-niā, iah tòe ve ve.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>roman test: peh-oe-ji</dd></dl> ◆◆◆◆ roman test: peh-oe-ji ◆◆◆◆ 32
<span class="Hani" lang="nan">]]]]]],]]]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="nan">]]]]]],]]]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>ma e ka hoe hip hoa--khi, soo-i au--lai kai ho cho chek-so</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>roman test: peh-oe-ji</dd></dl> ◆◆◆◆ roman test: peh-oe-ji ◆◆◆◆ 17
<span class="Hani" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēvē, '''vē'''</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: auto bolding and change simplified ◆◆◆◆ syntax test: auto bolding and change simplified ◆◆◆◆ 10
<span class="Hani" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">],]</span><span class="Zsym mention" style="font-size:100%;">/</span><span class="Hans" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēer, '''vē'''</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: auto bolding, change simplified, and change roman ◆◆◆◆ syntax test: auto bolding, change simplified, and change roman ◆◆◆◆ 7
<span class="Hani" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">],]</span><span class="Zsym mention" style="font-size:100%;">/</span><span class="Hans" lang="cmn">],]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēao, '''vē'''</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: auto bolding, change simplified, and change roman ◆◆◆◆ syntax test: auto bolding, change simplified, and change roman ◆◆◆◆ 7
<span class="Hani" lang="yue">]。]。]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="yue">]。]。]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>Gaa1 si1. Saai1 si2. fen1</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax and roman test: change roman and capitalize roman, as jyutping ◆◆◆◆ syntax and roman test: change roman and capitalize roman, as jyutping ◆◆◆◆ 9
<span class="Hani" lang="cmn">]]]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="cmn">]]]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>vē-vē vē-vē-'''vē''' vē '''vē'''-vē vē vēvē?</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>hyphen</dd></dl> ◆◆◆◆ hyphen ◆◆◆◆ 22
<span class="Hani" lang="cmn">]冇]冇]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>yes vē yes vē vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: do not link ◆◆◆◆ syntax test: do not link ◆◆◆◆ 7
<span class="Hani" lang="cmn">]]no冇]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>yes vē no vē vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― the wikitext is ok but renderer is being too smart for its own good ◆◆◆◆ the wikitext is ok but renderer is being too smart for its own good ◆◆◆◆ 7
<span class="Hani" lang="cmn">]</span> <span class="mention-gloss-paren annotation-paren">(</span><span lang="cmn-Latn" class="tr Latn">yǎnlèi</span><span class="mention-gloss-paren annotation-paren">)</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]</span> <span class="mention-gloss-paren annotation-paren">(</span><span lang="cmn-Latn" class="tr Latn">yǎnlèi</span><span class="mention-gloss-paren annotation-paren">)</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: change link ◆◆◆◆ syntax test: change link ◆◆◆◆ 5
<span class="Hani" lang="cmn">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: change link ◆◆◆◆ syntax test: change link ◆◆◆◆ 5
<span class="Hani" lang="cmn">]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ <dl><dd><i>vēvē '''vēvē''' '''vēvē''' vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>syntax test: bolding</dd></dl> ◆◆◆◆ syntax test: bolding ◆◆◆◆ 15
<span class="Hani" lang="cmn">]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ <dl><dd><i>vēvē '''vēvē''' '''vēvē''' vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>syntax test: bolding</dd></dl> ◆◆◆◆ syntax test: bolding ◆◆◆◆ 12
<span class="Hani" lang="cmn">]]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="cmn">]]]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>vēvē '''vēvē''' '''vēvē''' '''vēvēvē''' vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>syntax test: bolding</dd></dl> ◆◆◆◆ syntax test: bolding ◆◆◆◆ 18
<span class="Hani" lang="yue">]。</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>'''zi6'''.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: capitalize roman, bolding ◆◆◆◆ syntax test: capitalize roman, bolding ◆◆◆◆ 6
<span class="Hani" lang="yue">]。</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>'''ve1'''.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― syntax test: capitalize roman, bolding ◆◆◆◆ syntax test: capitalize roman, bolding ◆◆◆◆ 6
<span class="Hani" lang="nan">]]]],]]]]] ]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="nan">]]]],]]]]] ]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>ve-ve ve-ve ê ve, ve-ve ve the̍h ve ê '''phiat''' ve hō͘ ve-ve ve ngeh nn̄g--ve.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>?</dd></dl> ◆◆◆◆ ? ◆◆◆◆ 28
<span class="Hani" lang="cmn">]]],]]]]]]16]]]]]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="cmn">]]],]]]]]]16]]]]]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>vē 「 vēvēvēvē, vēvēvēvē 」 vē vēvē vēvē vēvēvē 16 vē vē vēvē vēvē vē vē vēvē vē vēvē vē vē.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>test</dd></dl> ◆◆◆◆ test ◆◆◆◆ 39
<span class="Hani" lang="cmn">]]]]]]]]16]]]]]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="cmn">]]]]]]]]16]]]]]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>vē 「 '''vēvēvēvē,vēvēvēvē''' 」 vē vēvē vēvē vēvēvē 16 vē vē vēvē vēvē vē vē vēvē vē vēvē vē vē.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>test: auto bolding</dd></dl> ◆◆◆◆ test: auto bolding ◆◆◆◆ 39
<span class="Hani" lang="cmn">]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vēvē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― split link, join roman ◆◆◆◆ split link, join roman ◆◆◆◆ 2
<span class="Hani" lang="cmn">]</span> <span class="mention-gloss-paren annotation-paren">(</span><span lang="cmn-Latn" class="tr Latn">yǎnlèi</span><span class="mention-gloss-paren annotation-paren">)</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]</span> <span class="mention-gloss-paren annotation-paren">(</span><span lang="cmn-Latn" class="tr Latn">yǎnlèi</span><span class="mention-gloss-paren annotation-paren">)</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>vē vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― join link, split roman ◆◆◆◆ join link, split roman ◆◆◆◆ 2
<span class="Hani" lang="cmn">]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>'''vē'''vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― split link, join roman ◆◆◆◆ split link, join roman ◆◆◆◆ 2
<span class="Hani" lang="cmn">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="cmn">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>'''vē''' vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― join link, split roman ◆◆◆◆ join link, split roman ◆◆◆◆ 2
<span class="Hani" lang="yue">C:]]]]?<br>A:]]。]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="yue">C:]]]]?<br>A:]]。]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>C: Ve1 ve1 '''ben1''' ve1? <br> A: Ve1 ve1. ve1 ve1 ve1 ve1 ve1 ve1 '''ben1'''.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd><small><i>From:</i> '''1998''', , </small></dd><dd>newline, ref, and various syntax</dd></dl> ◆◆◆◆ newline, ref, and various syntax ◆◆◆◆ 99
<span class="Hani" lang="cmn">]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ <dl><dd><i>vē</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd><small><i>From:</i> The '']'', circa 11th – 7th centuries BCE</small></dd><dd>ref</dd></dl> ◆◆◆◆ ref ◆◆◆◆ 99
<span class="Hani" lang="nan">]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="nan">]]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>i1 tso3-ni5 bho5 lai5?</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― x ◆◆◆◆ x ◆◆◆◆ 6
<span class="Hani" lang="hak">]!]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span><br /><span class="Hani" lang="hak">]!]]]]]]]。</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ <dl><dd><i>phai-se! m-tu-ho lau ng ke chha-e chhong-fai le.</i> <span style="color:darkgreen; font-size:x-small;">[]]</span></dd><dd>x</dd></dl> ◆◆◆◆ x ◆◆◆◆ 15
<span class="Hani" lang="cdo">]]]?</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>nu so̤i die-ne̤?</i> <span style="color:darkgreen; font-size:x-small;">[] / IPA]</span> ― x ◆◆◆◆ x ◆◆◆◆ 5
<span class="Hani" lang="wuu">] ] ] ]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>i have no translit</i> <span style="color:darkgreen; font-size:x-small;"></span> ― x ◆◆◆◆ x ◆◆◆◆ 8
<span class="Hani" lang="nan">]]]</span> <span style="color:darkgreen; font-size:x-small;">[], ] and ]]</span> ◆◆◆◆ ― <i>ve-ve-ve</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― x ◆◆◆◆ x ◆◆◆◆ 3
<span class="Hani" lang="nan">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="nan">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>khah</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― x ◆◆◆◆ x ◆◆◆◆ 3
<span class="Hani" lang="nan">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span>/<span class="Hani" lang="nan">]</span> <span style="color:darkgreen; font-size:x-small;">[], ]]</span> ◆◆◆◆ ― <i>dalao-ve</i> <span style="color:darkgreen; font-size:x-small;">[]]</span> ― hyphen after roman before hani ◆◆◆◆ hyphen after roman before hani ◆◆◆◆ 4
local export = {}
local M = require("Module:zh")
local m_links = require("Module:links")
local varinfo = mw.loadData("Module:User:Suzukaze-c/zh/data/info").data
local gsub = mw.ustring.gsub
local split = mw.text.split
local match = mw.ustring.match
local find = mw.ustring.find
local trim = mw.text.trim
local match_Han = ''
local match_nonHan = ''
local match_Han_mod1 = "" -- picks up hanzi + bold
local match_nonHan_mod1 = "" -- takes special syntax into account
local match_nonHan_mod2 = "" -- include capitalization syntax too
local function var_gen(abbr, var_page, var_link_name, rom_page, rom_link_name)
local link_var = ') .. '|' .. (var_link_name or varinfo) .. ']]'
local iso = varinfo
local link_rom = ') .. '|' .. (rom_link_name or varinfo) .. ']]'
return { link_var, iso, link_rom }
end
local variety_list = {
= var_gen('m'),
= var_gen('c'),
= var_gen('c', false, 'Guangzhou Cantonese'),
= var_gen('c', false, 'Literary Cantonese'),
= { "]", "cdo", "] / IPA" },
= var_gen('mn'),
= { "]", "nan", "]" },
= var_gen('mn-t'),
= { "]", "wuu", "IPA" },
= { "]", "wuu", "IPA" },
= { "]", "hak", "]" },
= var_gen('cl'),
}
local m_punctuation = require('Module:User:Suzukaze-c/punctuation')
local punctuationZhRegexRange = m_punctuation.langRegexRange('zh')
local ref_list = {
= { "cl", "The ''] of Confucius'', circa 475 – 221 BCE" },
= { "cl", "'']'', circa 2nd century BCE" },
= { "cl", "The '']'', circa 1st century CE" },
= { "cl", "The '']'', circa 4th – 2nd century BCE" },
= { "cl", "''] (Mencius)'', circa 4th century BCE" },
= { "cl", "''] (])'', circa 4th century BCE" },
= { "cl", "The '']'', circa 4th – 3rd century BCE" },
= { "cl", "The '']'', by ], circa 91 BCE" },
= { "cl", "The '']'', circa 11th – 7th centuries BCE" },
= { "cl", "The '']'', circa 7th – 4th centuries BCE" },
= { "cl", "'']'', circa 2nd century CE" },
= { "cl", "The '']'', circa 5th century CE" },
= { "cl", "'']'', 3rd – 2nd millennia BCE" },
= { "cl", "'']'', circa 5th – 3rd centuries BCE" },
= { "cl", "'']'', circa 3rd – 2nd centuries BCE" },
}
-- TODO: finish this, for use within export.show() and within testcases
-- NOTE: don't concat d.ex or d.tr into a string (remember the idea for tidying up pinyin using an external function that takes a table, or the possibility of sending d.ex to an external function for conversion into romanization?)
-- IDEAS:
-- combining with zh-l?
-- allowing manual wikilinks?
function export.process(d)
-- $d
-- $p
-- =require('Module:debug').dump(p.process({ex='lorem ipsum 牛腩飯',pagename='飯'}))
-- d.ex
-- d.tr
-- d.variety
-- d.pagename
local b_esc = '㊟⒝㊟'
local sp_esc = '㊟⒮㊟'
-- 「'''美國 華盛頓州'''」→「㊟⒝㊟美國㊟⒝㊟ ㊟⒝㊟華盛頓州㊟⒝㊟」
if find(d.ex, "'''") then
d.ex = gsub(d.ex, "'''(+)'''", function(text) return b_esc .. gsub(text, ' ', b_esc .. ' ' .. b_esc) .. b_esc end)
else
d.ex = gsub(d.ex, d.pagename, b_esc .. d.pagename .. b_esc)
end
-- space cleanup
d.ex = gsub(d.ex, ' +', ' ')
-- preserve spaces between Latn words
-- 「lorem ipsum」→「lorem㊟⒮㊟ipsum」
d.ex = gsub(d.ex, "(" .. match_nonHan .. ") (" .. match_nonHan .. ")", "%1" .. sp_esc .. "%2")
-- pad punctuation with spaces
d.ex = m_punctuation.space(d.ex, 'zh')
-- pad '\n' with spaces (never part of a word)
d.ex = gsub(d.ex, '\\n', ' \\n ')
return d
end
function export.show(frame)
local example = frame.args or error('Example unspecified.')
local manual_tr = frame.args or false
local translation = frame.args or '<span class="error">Lacking translation.</span>]'
local ref = frame.args or frame.args or false
local variety = frame.args or (ref_list and ref_list or false) or 'm'
local pagename = frame.args or mw.title.getCurrentTitle().text
local variety_name = variety_list
local iso = variety_list
local variety_rom_name = variety_list
local trad_example, simp_example, tr_example = {}, {}, {}
if ref_list then
ref = ref_list
end
-- save approximate usex length for later
local function length(example)
example = gsub(example, ' ', '') -- syntax
example = gsub(example, '{+}', '') -- syntax
example = gsub(example, '', '') -- syntax
example = gsub(example, '', '兩') -- more-or-less account for half-width characters
return mw.ustring.len(example)
end
local len = length(example)
-- 「'''美國 華盛頓州'''」→「'''美國''' '''華盛頓州'''」
if find(example, "'''") then
example = gsub(example, "'''(+)'''", function(text) return "'''" .. gsub(text, " ", "''' '''") .. "'''" end)
else
example = gsub(example, pagename, "'''" .. pagename .. "'''")
end
-- space cleanup
example = gsub(example, ' +', ' ')
-- preserve spaces between Latn words
example = gsub(example, "(" .. match_nonHan_mod1 .. ") (" .. match_nonHan_mod1 .. ")", "%1㍊㍖%2") -- 「lorem ipsum」→「lorem㍊㍖ipsum」
example = gsub(example, "(" .. match_nonHan_mod1 .. "'*) ('*" .. match_nonHan_mod1 .. ")", "%1㍊㍖%2") -- 「lorem '''ipsum'''」→「lorem㍊㍖'''ipsum'''」
-- pad punctuation with spaces
example = m_punctuation.space(example, 'zh')
-- pad '\n' with spaces (never part of a word)
example = gsub(example, '\\n', ' \\n ')
-- un-split xiehouyu that has been split (only works on the page of the xiehouyu itself...)
example = gsub(example, gsub(pagename, ',', ' , '), pagename)
-- internal POJ double hyphen markup
example = gsub(example, '%-%-', '¥')
-- space cleanup
example = gsub(example, ' +', ' ')
example = trim(example)
example = split(example, ' ')
for i, word in pairs(example) do
local trad_word, simp_word, tr_word = word, word, word
if word == '\\n' then
trad_word, simp_word, tr_word = '<br>', '<br>', '<br>'
elseif m_punctuation.convChar(word, lang) then
tr_word = m_punctuation.convChar(word, lang)
else
-- { } change roman, part 1
if find(trad_word, '%{') then
trad_word = gsub(trad_word, '{'..'(+)'..'}', '')
simp_word = gsub(simp_word, '{'..'(+)'..'}', '')
end
-- change simplified
if find(trad_word, '%[') then
trad_word = gsub(trad_word, '%', '')
simp_word = gsub(simp_word, '('..match_Han..')' .. '%', '%2')
tr_word = gsub(tr_word, '%', '')
else
simp_word = M.ts(simp_word)
end
-- { } change roman, part 2
if find(tr_word, '%{') then
if iso == 'cmn' then
tr_word = gsub(tr_word, '('..match_Han..')' .. '{'..'(+)'..'}', '%2') -- 「要{jiu1}」→「jiu1」
tr_word = gsub(tr_word, '('..match_nonHan_mod2..'+)' .. '{'..'(+)'..'}', '%2') -- 「size{saai1 si2}」→「saai1 si2」
elseif iso == 'nan' or iso == 'hak' or iso == 'cdo' then
tr_word = gsub(tr_word, '('..match_Han..')' .. '{'..'(+)'..'}', '%2❖')
tr_word = gsub(tr_word, '('..match_nonHan_mod2..'+)' .. '{'..'(+)'..'}', '%2❖')
else
tr_word = gsub(tr_word, '('..match_Han..')' .. '{'..'(+)'..'}', '%2❧')
tr_word = gsub(tr_word, '('..match_nonHan_mod2..'+)' .. '{'..'(+)'..'}', '%2❧')
end
end
-- auto roman
-- TODO: replace with actual auto roman lol
if iso == 'cmn' then
tr_word = gsub(tr_word, match_Han, "vē") -- TODO: process with Module:cmn-pron (see current Module:zh-usex)
elseif iso == 'nan' or iso == 'hak' then
tr_word = gsub(tr_word, match_Han, 've❖')
elseif iso == 'yue' then
tr_word = gsub(tr_word, match_Han, 've1❧')
end
-- \ change link text
trad_word = gsub(trad_word, '\\', '|')
simp_word = gsub(simp_word, '\\', '|')
tr_word = gsub(tr_word, '.+\\', '')
-- ^ capitalize roman
trad_word = gsub(trad_word, '%^', '')
simp_word = gsub(simp_word, '%^', '')
tr_word = gsub(tr_word, '%^(.)', mw.ustring.upper)
-- _ split link, join roman
trad_word = gsub(trad_word, '_', ']][[') -- if I replace "_" with " " the space remains after processing
simp_word = gsub(simp_word, '_', ']][[')
tr_word = gsub(tr_word, '_', '')
-- . join link, split roman
trad_word = gsub(trad_word, '%.', '')
simp_word = gsub(simp_word, '%.', '')
tr_word = gsub(tr_word, '%.', ' ')
-- ¥ min nan poj double hyphen
trad_word = gsub(trad_word, '¥', '')
simp_word = gsub(simp_word, '¥', '')
tr_word = gsub(tr_word, '¥', '--')
-- return spaces
trad_word = gsub(trad_word, '㍊㍖', ' ')
simp_word = gsub(simp_word, '㍊㍖', ' ')
tr_word = gsub(tr_word, '㍊㍖', ' ')
-- linking
if find(trad_word, "@") or find(simp_word, "@") then
trad_word = gsub(trad_word, '@', '')
simp_word = gsub(simp_word, '@', '')
tr_word = gsub(tr_word, '@', '')
elseif find(trad_word, "'''") or find(simp_word, "'''") then
-- ]
trad_word = gsub(trad_word, '(+)', function(text) return ']' end)
simp_word = gsub(simp_word, '(+)', function(text) return ']' end)
else
trad_word = gsub(trad_word, '(+)', function(text) return ']' end)
simp_word = gsub(simp_word, '(+)', function(text) return ']' end)
end
end
trad_example, simp_example, tr_example = trad_word, simp_word, tr_word
end
trad_example = table.concat(trad_example, '')
simp_example = table.concat(simp_example, '')
tr_example = table.concat(tr_example, ' ')
-- romanization hyphen substitute
tr_example = gsub(tr_example, '❖()', '%1') -- dispose of before a non-letter
tr_example = gsub(tr_example, '❖$', '') -- dispose of at very end
tr_example = gsub(tr_example, '❖', '-')
-- romanization space substitute
tr_example = gsub(tr_example, "❧'''", "''' ") -- 「'''一'''。」→「'''jat1❧'''◆.◇」→「'''jat1''' ◆.◇」
tr_example = gsub(tr_example, "❧", ' ')
-- punctuation spacing
tr_example = m_punctuation.main(tr_example)
tr_example = trim(tr_example)
if manual_tr then
tr_example = manual_tr
end
-- roman beautifying
if iso == 'cmn' then
-- TODO: format? cmn-pron
end
if iso == 'yue' then
-- TODO: super
end
if variety == 'mn-t' then
-- TODO: super
end
if iso == 'cdo' then
-- TODO: rom + ipa
end
if iso == 'wuu' then
-- TODO: rom > ipa
end
-- trad/simp on different lines
if ref or find(trad_example, '<br>') then
len = 99
end
-- fancy links and language tagging stuff
local lang, sc = require("Module:languages").getByCode(iso), require("Module:scripts").getByCode('Hani')
trad_example = m_links.full_link({ lang = lang, term = trad_example .. '//', sc = sc })
simp_example = m_links.full_link({ lang = lang, term = simp_example, sc = sc })
if trad_example == simp_example then
simp_example = false
end
-- tags
local function tag(text)
return ' <span style="color:darkgreen; font-size:x-small;">[' .. text .. ']</span>' -- HTML entity since "]" is interpreted poorly
end
local tag_text = {
= '] and ]',
= ']',
= ']',
}
-- add structure
tr_example = '<i>' .. tr_example .. '</i>' .. tag(variety_rom_name)
tr_example = gsub(tr_example, ' +', ' ')
if len > 10 then
if simp_example then
example = trad_example .. tag(variety_name..', '..tag_text) .. '<br />' .. simp_example .. tag(variety_name..', '..tag_text)
else
example = trad_example .. tag(variety_name..', '..tag_text)
end
tr_example = '<dl><dd>' .. tr_example .. '</dd>' .. (ref and '<dd><small><i>From:</i> ' .. ref .. '</small></dd>' or '') .. '<dd>' .. translation .. '</dd></dl>'
else
if simp_example then
example = trad_example .. tag(variety_name..', '..tag_text) .. '/' .. simp_example .. tag(variety_name..', '..tag_text)
else
example = trad_example .. tag(variety_name..', '..tag_text)
end
tr_example = ' ― ' .. tr_example .. ' ― ' .. translation
end
local testing = frame:preprocess('<pre><nowiki>' .. example .. '\n◆◆◆◆\n' .. tr_example .. '\n◆◆◆◆\n' .. translation .. '\n◆◆◆◆\n' .. len .. '</nowiki></pre>') .. '<br />'
return example .. tr_example .. testing
end
return export