Module:zh/extract

Hello, you have come here looking for the meaning of the word Module:zh/extract. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:zh/extract, but we will also tell you about its etymology, its characteristics and you will know how to say Module:zh/extract in singular and plural. Everything you need to know about the word Module:zh/extract you have here. The definition of the word Module:zh/extract will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:zh/extract, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}

local m_template_parser = require("Module:template parser")

local class_else_type = m_template_parser.class_else_type
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local parse = m_template_parser.parse
local cmn_pron

function export.extract_pron(title, variety, cap)
	-- if title contains the asterisk "*" that disables everything fancy
	-- like ]
	-- then stop early instead of trying to :getContent()
	-- (wtf?)
	if string.find(title, "*") then
		return
	end

	local tr = nil
	local title = mw.title.new(title)
	local content = title:getContent()
	local cat = nil
	if content then
		content = gsub(content, ",()", ";%1")
		local template = match(content, "{{zh%-pron*| ?" .. variety .. "=(+)")
		cap = cap or find(content, "{{zh%-pron*| ?" .. variety .. "=(+);cap%=y")
		if template and template ~= "" then
			if cmn_pron == nil then
			   cmn_pron = require("Module:cmn-pron")
			end
			tr = cmn_pron.str_analysis(template, 'link')
		end
	else
		cat = "]"
	end
	if cap then
		tr = gsub(tr, '^(.)', mw.ustring.upper)
	end
	return tr, cat
end

function export.extract_gloss(content, useetc)
	local senses = {}
	local len = mw.ustring.len
	local literally = match(content, 'zh%-forms*|lit=(+)')
	local sense_id = 0
	local etc = false
	local translingual_section, zh_section, j, pos, section
	while true do
		-- Find language sections beginning with ==...== and ending with the same
		-- or an empty string. Grab the Chinese and Translingual ones.
		_, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.-)\n==%f", pos)
		
		if j == nil then
			i, j, language_name, section = content:find("%f==%s*(+)%s*==(\n.+)", pos)
		end
		
		if j == nil then
			break
		else
			-- Move to the beginning of "==" at the end of the current match.
			pos = j - 1
		end
		
		if language_name == 'Translingual' then
			translingual_section = section
		elseif language_name == 'Chinese' then
			zh_section = section
			break
		end
	end
	
	if not zh_section then
		zh_section = translingual_section
		if not zh_section then
			return ""
		end
	elseif translingual_section then -- also use translingual section if Chinese section contains only rfdef
		zh_section = zh_section..translingual_section
	end

	-- Delete etymology and glyph origin sections, 
	-- because they sometimes contain ordered lists,
	-- which would then be interpreted as definitions.
	zh_section = zh_section:gsub("\n===+Etymology.-(\n==)", "%1")
	zh_section = zh_section:gsub("\n===+Glyph origin.-(\n==)", "%1")
	
	for sense in zh_section:gmatch('\n# (+)') do
		if not sense:match('rfdef') and not sense:match('defn') then
			sense_id = sense_id + 1
			if sense_id > 2 then
				etc = true
				break
			end
			table.insert(senses, sense)
		end
	end
	local gloss_text = (literally and literally .. "; " or "") .. (senses or "")
	local gloss_text_extend = gloss_text .. (senses and "; " .. senses or "")
	gloss_text = (len(gloss_text) < 80 and len(gloss_text_extend) < 160) and gloss_text_extend or gloss_text
	if gloss_text ~= gloss_text_extend then etc = true end

	local function replace_gloss(text)
		local function replace_wp(text)
			return text:gsub('{{w|(+)|?(*)}}',
				function(w_link, w_display)
					return ']'
			end)
		end
		
		if text:find("{{") then
			text = replace_wp(text)
			text = text:gsub(' %({{taxlink+}}%)', '')
				:gsub('{{zh%-l|%*(*)}}', '%1')
				:gsub('{{lb|zh|*}}', '')
				:gsub('{{zh%-erhua form of|word=+}}', '')
				:gsub('{{zh%-erhua form of|(+)}}', '%1')
				:gsub('{{zh%-alt%-name|+|(+)}}', '%1')
				:gsub('{{zh%-classifier|+|t=(+)*}}', '%1')
				:gsub('{{zh%-classifier|+}}', '')
				:gsub('{{zh%-alt%-form|+}}', '')
				:gsub('{{vern', '{{w')
				:gsub('%b{}', function(matched_braces)
					if matched_braces:find("^{{place|zh|") then
						local template = parse(matched_braces)
						if class_else_type(template) == "template" then
							local template_args = template:get_arguments()
							return template_args.t or template_args.t1
						end
					elseif matched_braces:find("^{{zh%-short%-comp|") then
						local template = parse(matched_braces)
						if class_else_type(template) == "template" then
							local template_args = template:get_arguments()
							return template_args.t or ""
						end
					end
				end)
				:gsub('{{zh%-+|+|(+)}}', '%1')
				:gsub('|', "|")
		end
		text = text:gsub('( ?)(+++)', function(space, captured)
			local taxlink = captured:match("{{taxlink|(+)")
			local wiki_link = 
				 taxlink and "''" .. taxlink .. "''" or 
				(match(captured, "({{w|.+}})") or false)
			return wiki_link and space..wiki_link or "" end)
		text = mw.text.split(text, ';')
		local text_sec = {}
		for _, s in ipairs(text) do
			if s:find'%w' then
				table.insert(text_sec, (s:gsub('^%s+',''):gsub('%s+$','')))
			end
		end
		return table.concat(text_sec, '; ')
	end
	gloss_text = replace_gloss(gloss_text)
	gloss_text = replace_gloss(gloss_text)
	if etc and useetc and gloss_text ~= "" then
		gloss_text = gloss_text .. "; etc."
	end
	if gloss_text:find("{{") or gloss_text:find("}}") or gloss_text:find("=") then --temporary solution to suppress wikitext issues
		gloss_text = ""
	end
	return gloss_text
end

return export