Module:User:kc kennylau/zh-usex

Hello, you have come here looking for the meaning of the word Module:User:kc kennylau/zh-usex. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:kc kennylau/zh-usex, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:kc kennylau/zh-usex in singular and plural. Everything you need to know about the word Module:User:kc kennylau/zh-usex you have here. The definition of the word Module:User:kc kennylau/zh-usex will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:kc kennylau/zh-usex, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.
-- sandbox of ]

local export = {}

local m_zh = require("Module:zh")
local m_languages = require("Module:languages")

local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local split = mw.text.split

-- Use this when the actual title needs to be known.
local actual_title = mw.title.getCurrentTitle()

-- Use this when testcases need to be able to override the title (for bolding,
-- for instance).
local title = actual_title
local PAGENAME = PAGENAME or title.text

local data = mw.loadData("Module:zh-usex/data")
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction

local zh_format_end = "</span>"

--local Han_pattern = ""
local Han_pattern = ""
local UTF8_char = '*'
local UTF8_char2 = '*' -- not ""

local tr_data = {
	cmn = {
		segment_c = " %-",
		separator_conv = {="",=" ",="",="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t)
			return t:gsub("^%f","\3") -- temporary substitute of the apostrophe
		end,
	},
	yue = {
		segment_c = " ",
		separator_conv = {="",=" "},
		link_ignore = "\1",
		tr_cap = false,
		combine = function(t) return " "..t end,
	},
	 = {
		segment_c = " ~",
		separator_conv = {="",=" ",="-"},
		link_ignore = "\1%%.^",
		tr_cap = true,
		combine = function(t) return "-"..t end,
	},
	hak = {
		segment_c = " ~",
		separator_conv = {="",=" ",="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t) return "-"..t end,
	},
	default = {
		segment_c = " ",
		separator_conv = {="",=" "},
		link_ignore = "\1",
		tr_cap = false,
	},
}

local function get_tr(display, norm_code)
	local given, given_pos = {}, 1 -- record the characters with given transcription
	local punc, punc_pos = {}, 1  -- record the punctuations with given transcription
	local tr_datapoint = tr_data
	local word_regex = "+" -- regex that matches words
	local tr_word = display:gsub("\1", " ")
		:gsub("%", "")
		:gsub("("..UTF8_char.."){(*)}", function(a,b) -- record given tr and replace with "{"
			given = a:find("^%w$") and b or tr_datapoint.combine(b)
			given_pos = given_pos + 1
			return "{"
		end)
		:gsub("%f+%f", function(a) -- record punctuation and replace with "}"
			if punctuation then
				punc = punctuation
				punc_pos = punc_pos + 1
				return "}"
			end
			return a
		end)
		:gsub("<b>","\1"):gsub("</b>","\2") -- substitute bold tags for further processing
		:gsub(word_regex,function(word)
			-- first attempt to get the pronunciation of the whole word
			local res = polysyllable_pron_correction
				or pron_correction
			if res then return res end
			local length = 0 -- for check_pron (a bit hacky because check_pron only checks if length == 1)
			if word:find("^"..UTF8_char.."$") then length = 1 end
			res = m_zh.check_pron(word, norm_code, length)
			if res then return tr_datapoint.combine(res:gsub("/.+","")) end
			-- if it fails, get pronunciation of each character
			return word:gsub(UTF8_char, function(ch)
				local ch_res = pron_correction
				if ch_res then return ch_res end
				ch_res = m_zh.check_pron(ch, norm_code, 1)
				return ch_res and tr_datapoint.combine(ch_res:gsub("/.+","")) or ch
			end)
		end)
	if norm_code == "cmn" then
		tr_word = tr_word:gsub("%.%.","-")
	end
	if norm_code ~= "yue" then
		tr_word = tr_word:gsub("%."," ")
	end
	given_pos, punc_pos = 0,0
	tr_word = tr_word:gsub("{",function() -- substitute back the stored results
			given_pos = given_pos + 1
			return given
		end)
		:gsub("}",function() -- substitute back the punctuations
			punc_pos = punc_pos + 1
			return punc
		end)
	return tr_word
end

local function make_link(target, display)
	target = target == "" and display or target
	-- Remove bold tags from target
	target = target:gsub("</?b>","")
	-- Generate link to Chinese section
	local result = "]"
	-- For debugging purposes
	--if actual_title.nsText == "Module" then mw.log(display, target, "->", result) end
	return result
end

local function convert(conv_fun, text)
	return (text .. "A")
		:gsub("(]*)"..UTF8_char2.."%",
			function(a,b) return conv_fun(a)..b end)
		:sub(1,-2)
end

function export.show(frame)
	local params = {
		 = { required = true },	-- example
		 = {},					-- translation
		 = {},					-- variety
		lit = {},
		tr = {},
		ref = {}, r = { alias_of = "ref" },
		inline = {},
		audio = {}, a = { alias_of = "audio" },
		collapsed = { type = "boolean" },
		-- Allow specifying pagename in testcases on documentation page.
		pagename = actual_title.nsText == "Template" and {} or nil,
		nocat = { type = "boolean" },
		tr_nocap = { type = "boolean" },
		simp = { type = "boolean" }
	}
	
	local category = frame.args or error("Please specify the category.")
	
	local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
	
	if args.pagename then
		-- Override title in Module namespace.
		title = mw.title.new(args.pagename)
		PAGENAME = title.text
	end
	
	local example = args or error("Example unspecified.")
	local translation = args
	local literal = args
	local reference = args
	local manual_tr = args
	local display = args
	local inline = args
	local audio_file = args
	local collapsed = args
	local simp = args
	local phonetic = ""
	local original_length = example:gsub("+",""):len()
	local variety = args or frame.args or (ref_list and ref_list or false) or "cmn"
	local variety_data = data.varieties_by_code or data.varieties_by_old_code or error("Variety " .. variety .. " not recognized.")
	-- unpack() doesn't work here because the data was loaded using mw.loadData()
	local std_code, norm_code, desc, tr_desc = variety_data, variety_data, variety_data, variety_data
	norm_code = norm_code or std_code
	variety = std_code
	
	local lang_obj_wikt = m_languages.getByCode(variety, 3, "allow etym")
	
	if next(unrecognized_args) then
		--]
		require("Module:debug").track_unrecognized_args(unrecognized_args, "zh-usex")
	end
	
	if reference then
		require("Module:debug").track("zh-usex/ref")
	end
	
	if example:find("") then
		require("Module:debug").track("zh-usex/parentheses")
	end
	
	if example:find("&#") then
		require("Module:debug").track("zh-usex/html")
	end
	
	-- future escape character?
	if example:find("`") then
		require("Module:debug").track("zh-usex/backtick")
	end
	if example:find("  ") then
		require("Module:debug").track("zh-usex/double-space")
	end
	
	if (norm_code == "nan-hbl" or norm_code:find("^hak")) and example:find("%-") then
		require("Module:debug").track("zh-usex/hyphen")
	end
	
	if example:find("%w%{") then
		require("Module:debug").track("zh-usex/rom-text")
	end
	
	if not translation or translation == '' then -- per standard ]
		translation = '<small>(please add an English translation of this ' .. (category == "quotations" and "quotation" or "usage example") .. ')</small> ]'
	end
	
	-- should we generate the other (simp/trad) form
	-- (in the end, only actually display if the converted text is different)
	local do_conv = true
	if norm_code == "vi" or norm_code == "ko" then
		do_conv = false
	end
	local conv_fun = m_zh.ts
	if simp then
		if category ~= "quotations" then error("parameter simp cannot be true in ] or ].") end
		if norm_code == "vi" or norm_code == "ko" or norm_code == "lzh" or variety == "yue-HK" or variety == "cmn-TW" or
				variety == "nan-hbl-TW" or variety == "lzh-cmn-TW" or variety == "hak-hai" or variety == "hak-dab" or
				variety == "hak-zha" then
			error(("Parameter simp= cannot be specified for variety '%s'"):format(variety))
		end
		conv_fun = m_zh.st
	end
	
	-- should we generate the transcription
	local generate_tr = false
	if tr_data then
		if manual_tr then
			require("Module:debug").track("zh-usex/manual-tr")
		else
			generate_tr = true
		end
	end
	
	local boldify = false
	-- automatically boldify pagetitle if nothing is in bold
	if not example:find("'''") and not punctuation then
		boldify = true
	end
	
	-- tidying up the example, making it ready for transcription
	example = gsub(example, "", " %0 ")
	example = example:gsub("—  —", "——") -- double em-dash (to be converted to single em-dash later)
		:gsub("<br */?>"," <br> ") -- process linebreaks
		:gsub("^ *",""):gsub(" *$",""):gsub("  +"," ") -- process spaces
		:gsub("%%]%f]",function(a) -- process ]
			return a:gsub(" ","\1")
		end)
		:gsub("'''(+)'''", "<b>%1</b>") -- normalise bold syntax
		:gsub("%^<b>","<b>^")
		:gsub("</b>(%)","%1</b>")
		:gsub("</b>({*})","%1</b>")
	
	-- parsing: convert "-", "--", "---" to "-", "..", "--" respectively
	-- so that "-" is the character that delimits links
	-- further explanation will use the replacement result to refer to the commands
	if norm_code == "cmn" then
		example = example:gsub("%-+",{="..",="--"})
		if example:find("%-+\\") then
			require("Module:debug").track("zh-usex/extra-pinyin")
		end
	end

	local regex_data = tr_data or tr_data.default
	local segment_c = regex_data.segment_c -- the characters that delimit links
	local separator_conv = regex_data.separator_conv -- the table for separator mapping
	local link_ignore = regex_data.link_ignore -- the characters that do not affect links
	local tr_cap = regex_data.tr_cap -- transliteration can be capitalised
	local segment_regex = "(*)(+)" -- the regex that matches each segment and the separator before it
	
	local cache = {} -- store the result of each segment
	local trad_text = ""
	local simp_text = ""
	-- generate the transliteration
	-- but store the results in the cache
	-- and also build up trad_text and simp_text
	local tr_text = example:gsub(segment_regex, function(separator,seg)
		separator = separator_conv or error('Invalid separator: "'..separator..'"')
		if cache then
			trad_text = trad_text .. cache.trad
			simp_text = simp_text .. cache.simp
			return separator..cache.tr
		end
		
		if punctuation then
			cache = {
				trad = seg,
				simp = seg,
				tr = punctuation
			}
			trad_text = trad_text .. seg
			simp_text = simp_text .. seg
			return separator..punctuation
		end
		
		local generate_link = 0
		seg, generate_link = seg:gsub("@","")
		generate_link = (generate_link == 0)
		
		local target, display = "", seg
		local pos = seg:find("\\",1,true)
		if generate_link and pos then
			-- move formatting from start of target to display
			-- e.g. <b>^甲\乙 --> 甲\<b>^乙
			local bold = ""
			local caret = ""
			local start = 1
			if seg:sub(1,3) == "<b>" then
				bold,start = "<b>",4
			end
			if tr_cap and seg:sub(start) == "^" then
				caret,start = "^",start+1
			end
			target, display = seg:sub(start,pos-1), bold..caret..seg:sub(pos+1,-1)
			if target:find("</?b>") then -- Check for bold tags in target.
				require("Module:debug").track("zh-usex/bold-target")
			end
		end
		
		local target_trad = target:gsub("%","")
		local target_simp = do_conv and convert(conv_fun, target)
		
		local occurrences = 0
		if boldify then
			display, occurrences = display:gsub(PAGENAME,"<b>"..PAGENAME.."</b>")
		end
		if occurrences > 0 then
			display = display:gsub("%","%")
				:gsub("%^<b>","<b>^")
				:gsub("</b>(%)","%1</b>")
				:gsub("</b>({*})","%1</b>")
		end
		
		local display_derom = display:gsub("{*}","")
			:gsub("+","")
		local display_trad = display_derom:gsub("%","")
		local display_simp = do_conv and convert(conv_fun, display_derom) or ""
		local seg_tr = generate_tr and get_tr(display, norm_code) or ""
		
		if display_trad:gsub("</?b>","") == PAGENAME or target_trad == PAGENAME then
			generate_link = false
			if boldify and occurrences == 0 then
				display_trad = "<b>" .. display_trad .. "</b>"
				display_simp = "<b>" .. display_simp .. "</b>"
				seg_tr = "<b>" .. seg_tr .. "</b>"
			end
		end
		
		local seg_trad = generate_link and make_link(target_trad, display_trad) or display_trad
		local seg_simp = generate_link and do_conv and make_link(target_simp, display_simp) or display_simp
		cache = {
			trad = seg_trad,
			simp = seg_simp,
			tr = seg_tr
		}
		trad_text = trad_text .. seg_trad
		simp_text = simp_text .. seg_simp
		return separator..seg_tr
	end)
	
	if trad_text == simp_text then
		do_conv = false
		simp_text = nil
	end
	
	if not trad_text:find("</?b>") then
		require("Module:debug").track("zh-usex/no-bold")
	end
	
	-- format generated tr
	-- at this point we have three temporary substitutions:
	-- <b>:\1, </b>:\2, ':\3
	if generate_tr then
		if norm_code == "cmn" then -- format apostrophe
			tr_text = tr_text
				:gsub("%f(*)\3", "%1")
				:gsub("\1\3","\3\1") -- <b>' → '<b>
				:gsub("^\3","\3^")   -- ^'   → '^ (shouldn't occur)
		elseif norm_code == "nan-hbl" or norm_code == "hak" then -- format hyphens
		mw.log(tr_text)
			tr_text = tr_text
				:gsub("%^%-","-^")
				:gsub("\1%-","-\1") -- <b>-  → -<b>
				:gsub("%-\2","\2-") -- -</b> → </b>-
				:gsub("%f%-%f","") -- "-chhek" at beginning -> "chhek"
				:gsub("%f%-%f","") -- "shi-" at the end -> "shi"
				:gsub("%-+","-")
				:gsub("%-?%%%-?", "--")
		mw.log(tr_text)
		end
		tr_text = tr_text:gsub("",{="<b>",="</b>",="&#39;"})
		
		if tr_text:find(Han_pattern) then
			require("Module:debug").track("zh-usex/character without transliteration")
		end
	end

	local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">&#91;" -- HTML entity since "]" is interpreted poorly
	local tag_end = "&#93;</span>"
	
	local simp_link = "<i>]</i>"
	local trad_link = "<i>]</i>"
	if simp then
		simp_link, trad_link = trad_link, simp_link
	end
	
	trad_text, auto_spaces = trad_text:gsub("(%]%])(%)", "%1 %2")
	simp_text = do_conv and simp_text:gsub("(%]%])(%)", "%1 %2") or false
	phonetic = manual_tr or (generate_tr and tr_text)

	if auto_spaces > 0 then
		require("Module:debug").track("zh-usex/auto-spaces")
	end
	
	-- overall transcription formatting
	if phonetic then
		phonetic = gsub(phonetic, " </b>", "</b> ")
		phonetic = gsub(phonetic, "  ", " ")
		if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "nan-tws" or norm_code == "nan-hnm" or
			norm_code == "zhx-sic" or norm_code == "cjy" or norm_code == "hsn" or norm_code == "gan" or
			variety == "hak-mei" then
			phonetic = gsub(phonetic, "(+)(+)", "%1<sup>%2</sup>") -- superscript tones
		end
		phonetic = gsub(phonetic, " ()", "%1") -- remove excess spaces from punctiation
		phonetic = gsub(phonetic, "() ", "%1")
		phonetic = phonetic:gsub(" <br> ", "<br>")
		if not manual_tr then
			if norm_code == "nan-hbl" then
				phonetic = gsub(phonetic, " +%-%-", "--")
			end
		end

		-- capitalisation
		if not manual_tr then
			if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "cjy" or norm_code == "hsn" or
				norm_code == "cmn-wuh" or norm_code == "nan-tws" or norm_code == "wxa" or norm_code == "wuu" or
				variety == "hak-mei" then
				args.tr_nocap = true
			end
			if not args.tr_nocap and match(example, "") then
				phonetic = "^" .. gsub(phonetic, "() ", "%1 ^")
			end
			if not args.tr_nocap then
				phonetic = gsub(phonetic, "() (.)", "%1 ^%2")
				phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
				phonetic = gsub(phonetic, ": ()(.)", ": %1^%2")
			end
			phonetic = gsub(phonetic, "%^<b>", "<b>^")
			phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
			phonetic = gsub(phonetic, "%^", "")
		end

		if norm_code == "wuu" then
			local wuu_pron = require("Module:wuu-pron")
			if phonetic:find(":") then
				phonetic = "''" .. wuu_pron.wugniu_format(phonetic:sub(4)) .. "''"
			else
				phonetic = "''" .. wuu_pron.wugniu_format(wuu_pron.wikt_to_wugniu(phonetic)) .. "''"
			end
		elseif norm_code == "cmn-wuh" or norm_code == "wxa" then
			phonetic = "<span class=\"IPA\"></span>"

		elseif norm_code == "cdo" then
			local cdo_pron = require("Module:cdo-pron")
			phonetic = "<i>" .. phonetic .. "</i>" ..
				(not match(phonetic, "-+-+-+-")
					and " / <span class=\"IPA\"><small></small></span>"
					or "")

		else
			phonetic = "<i>" .. phonetic .. "</i>"
		end
		phonetic = "<span lang=\"zh-Latn\" style=\"color:#404D52\">" .. phonetic .. "</span>"
	end
	
	local collapse_start, collapse_end, collapse_tag, collapse_border_div, collapse_border_div_end = '', '', '', '', ''
	simplified_start = '<br>'
	if collapsed then
		collapse_start = '<span class="vsHide">'
		collapse_end = '</span>'
		collapse_tag = '<span class="vsToggleElement" style="color:darkgreen; font-size:x-small;padding-left:10px"></span>'
		collapse_border_div = '<div class="vsSwitcher" data-toggle-category="usage examples" style="border-left: 1px solid #930; border-left-width: 2px; padding-left: 0.8em;">'
		collapse_border_div_end = '</div>'
		simplified_start = '<hr>'
	end
	
	if actual_title.nsText == '' and (not args.nocat) then -- fixme: probably categorize only if text contains the actual word
		if reference then
			cat = "]"
		else
			cat = "]"
		end
	end
	
	local zh_format_start_simp = "<span lang=\"zh-Hans\" class=\"Hans\">"
	local zh_format_start_trad = "<span lang=\"zh-Hant\" class=\"Hant\">"
	if simp then zh_format_start_simp, zh_format_start_trad = zh_format_start_trad, zh_format_start_simp end
	
	-- indentation, font and identity tags
	if ((norm_code == "cmn" and original_length > 7)
			or (norm_code ~= "cmn" and original_length > 5)
			or reference
			or collapsed
			or (match(example, "") and norm_code == "wuu")
			or (norm_code == "cdo" and original_length > 3)
			or (inline or "" ~= "")) then

		trad_text = zh_format_start_trad .. trad_text .. zh_format_end

		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end

		if phonetic then
			phonetic = "<dd>" .. collapse_start .. phonetic
			translation = "<dd>" .. translation .. "</dd>"
			tr_tag = tag_start .. tr_desc .. tag_end .. collapse_end .. "</dd>"
		else
			translation = "<dd>" .. translation .. "</dd>"
		end

		if audio_file then
			audio = "<dd>]</dd>"
		end
		
		if do_conv then
			trad_tag = collapse_start .. tag_start .. desc .. ", " .. trad_link .. tag_end .. collapse_end .. collapse_tag
			simp_text = simplified_start .. collapse_start .. zh_format_start_simp .. simp_text .. zh_format_end
			simp_tag = tag_start .. desc .. ", " .. simp_link .. tag_end .. collapse_end
		elseif norm_code == "vi" or norm_code == "ko" then
			trad_tag = collapse_start .. tag_start .. desc ..", " .. trad_link .. tag_end .. collapse_end .. collapse_tag
		else
			trad_tag = collapse_start .. tag_start .. desc ..", " .. trad_link .. " and " .. simp_link .. tag_end .. collapse_end .. collapse_tag
		end

		if reference then
			reference = "<dd>" .. collapse_start .. "<small><i>From:</i> " ..
				(ref_list and ref_list or reference) .. "</small>" .. collapse_end .. "</dd>"
		end

		return collapse_border_div .. "<dl class=\"zhusex\">" .. trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
			(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation .. "</dl>" .. (cat or "") .. collapse_border_div_end

	else
		trad_text = zh_format_start_trad .. trad_text .. zh_format_end
		divider = "&nbsp; ―&nbsp; "

		if variety ~= "cmn" then
			ts_tag = tag_start .. desc .. tag_end
			tr_tag = tag_start .. tr_desc .. tag_end
		end

		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end

		if do_conv then
			simp_text = "<span lang=\"zh-Hani\" class=\"Hani\">/</span>" .. zh_format_start_simp .. simp_text .. zh_format_end
		end

		if audio_file then
			audio = " ]"
		end

		return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
			(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation .. (literal and " (literally, “" .. literal .. "”)" or "") ..
			(cat or "")
	end
end

-- function export.migrate(text, translation, ref)
-- 	if type(text) == "table" then
-- 		if not text.args or not text.args then
-- 			text = text:getParent()
-- 		end
-- 		if text.args and text.args ~= '' then
-- 			ref = text.args
-- 			translation = text.args
-- 			text = text.args
-- 		else
-- 			text = text.args
-- 		end
-- 	end
-- 	text = text:gsub('^+', ''):gsub('+$', ''):gsub(' +', ' '):gsub('\n+', '<br>'):gsub('|', '\\'):gsub('\'\'\'%%]\'\'\'', ' '):gsub('%]%]%%]', ''):gsub('%[%[', '')
-- :gsub('\'\'\'', ''):gsub(',', ','):gsub('!', '!'):gsub('%?', '?')
-- 	if translation then
-- 		if ref and ref ~= '' then
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '|ref=' .. ref .. '}}'
-- 		else
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '}}'
-- 		end
-- 	else
-- 		return text
-- 	end
-- end

return export