Hello, you have come here looking for the meaning of the word . In DICTIOUS you will not only get to know all the dictionary meanings for the word , but we will also tell you about its etymology, its characteristics and you will know how to say in singular and plural. Everything you need to know about the word you have here. The definition of the word will help you to be more precise and correct when speaking or writing your texts. Knowing the definition of, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}

export.postprocess_handlers = {}

local labels_module = "Module:User:Benwing2/labels"

-- Remove duplicated labels like 'Taiwanese' in 'Taiwanese Hokkien|and|Taiwanese Hakka'. Also remove duplicated labels
-- in things like
-- * 'Quanzhou|_|Hokkien' (which canonicalizes to 'Quanzhou Hokkien|_|Hokkien');
-- * 'Xiamen|and|Quanzhou|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|and|Quanzhou Hokkien|_|Hokkien');
-- * 'Xiamen|and|Anxi|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|and|Anxi|_|Hokkien');
-- * 'Xiamen|Zhangzhou|and|Quanzhou|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|Zhangzhou Hokkien|and|Quanzhou Hokkien|_|Hokkien');
-- * 'Xiamen|Zhangzhou|and|Anxi|_|Hokkien' (which canonicalizes to 'Xiamen Hokkien|Zhangzhou Hokkien|and|Anxi|_|Hokkien').
-- We do two passes. The first pass fixes cases like 'Quanzhou Hokkien|_|Hokkien', irrespective of whether there's an
-- "and" present. The second pass looks for a stretch of labels where (a) all of the labels have the same prefix or
-- suffix, and (b) in between the labels is at least one occurrence of "and" (which can also start out as "&" but is
-- canonicalized to "and"); but (c) we count two labels separated by "_" (which is canonicalized to a blank label) as a
-- single label.
table.insert(export.postprocess_handlers,
	function(data)
		local labels = data.labels
		if #labels == 1 then
			return
		end
		local m_labels = require(labels_module)

		-- First, split the labels into `link` and `display` component parts (done only once).
		local split_labels = {}
		for i, label in ipairs(labels) do
			local link, display = m_labels.split_display_form(label.label)
			split_labels = {link = link, display = display}
		end

		-- Then compute "label starts" (indices of label sets to consider when looking for runs with the same prefix or
		-- suffix), where a label start is either a single label or a set of two labels separated by an underscore,
		-- and where we take occurrences of "and" into consideration.
		local label_starts = {}
		local i = 1
		while i <= #labels do
			local start = i
			local followed_by_and = false
			local after_underscore
			if i <= #labels - 4 and labels.label == "" and labels == "and" and labels == "" then
				-- 'Foo|_|and|_|Bar'; redundant underscores
				followed_by_and = true
				i = i + 3
			elseif i <= #labels - 2 and labels.label == "and" then
				followed_by_and = true
				i = i + 1
			elseif i <= #labels - 2 and labels.label == "" then
				after_underscore = i + 2
				i = i + 1
			end
			table.insert(label_starts, {
				start = start,
				followed_by_and = followed_by_and,
				after_underscore = after_underscore
			})
			i = i + 1
		end

		-- Now the main loop.

		-- Each spec is {"affix", `at_beginning`}, or {{"affix", "affix"}, `at_beginning`} where "affix" is a prefix or
		-- suffix to remove and `at_beginning` indicates whether "affix" is a prefix or suffix. If more than one affix
		-- is listed, any affix counts, e.g. 'Taiwan Mandarin|and|Taiwanese Hokkien'.
		for _, affix_spec in ipairs {
			{{"Taiwanese", "Taiwan"}, true}, {"Chinese"}, {"Gan"}, {"Hakka"}, {"Hokkien"}, {"Mandarin"},
			{"Southern Min"}, {"Min"}, {"Wu"}, {"Xiang"}
		} do
			local affixes, at_beginning = unpack(affix_spec)
			if type(affixes) == "string" then
				affixes = {affixes}
			end

			-- Does `item` match against the prefix or suffix when both prefix/suffix and something else are
			-- present? If so, return the something else, which is what we need to set the label to if we remove
			-- the prefix/suffix.
			local function matches_affix_with_space(item)
				for _, affix in ipairs(affixes) do
					local space_regex = at_beginning and "^" .. affix .. " (.+)$" or "^(.+) " .. affix .. "$"
					local rest = item:match(space_regex)
					if rest then
						return rest
					end
				end
				return false
			end
			-- Does `item` match against the prefix or suffix exactly? If so, return an empty string, which is what
			-- we need to set the label to if we remove the prefix/suffix.
			local function matches_affix_exactly(item)
				for _, affix in ipairs(affixes) do
					if item == affix then
						return ""
					end
				end
				return false
			end
			-- Does the link or display at `label_index` match with `match_function`? If so, return a three-element
			-- list of `label_index`, `component` (either "link" or "display") and the return value of `match_function`.
			local function check_match(label_index, match_function)
				local link, display = split_labels.link, split_labels.display
				local rest = display and match_function(display)
				if rest then
					return {label_index, "display", rest}
				else
					rest = link and matches_affix_with_space(link)
					if rest then
						return {label_index, "link", rest}
					end
				end
				return nil
			end
			-- Given {`label_index`, `component`, `value`}, set the link or display component (depending on `component`)
			-- of the label at `label_index` to `value`.
			local function set_component_value(to_erase)
				local label_index, component, value = unpack(to_erase)
				if value == "" then
					labels.label = ""
				else
					local link, display = split_labels.link, split_labels.display
					if component == "display" then
						display = value
					else
						link = value
					end
					labels.label = m_labels.combine_display_form_parts(link, display)
				end
			end

			-- First pass: Look for two labels separated by an underscore, with the suffix occurring on both parts.
			-- (This shouldn't happen with prefixes.)
			if not at_beginning then
				for _, label_start in ipairs(label_starts) do
					local to_erase = check_match(label_start.start, matches_affix_with_space)
					if to_erase and label_start.after_underscore and
						check_match(label_start.after_underscore, matches_affix_exactly) then
						set_component_value(to_erase)
					end
				end
			end

			-- Second pass.

			-- Check whether a prefix or suffix matches the given label start index (index of a label set in the
			-- `label_starts` list; see above). If it matches, return value is {`index`, `component`, `value`}, i.e.
			-- the label index to change, the component ("link" or "display") to change and the value to set the
			-- component to. Otherwise, return nil.
			local function affix_matches(label_start_index)
				local label_start = label_starts
				-- If we're dealing with a suffix, there are two cases: (1) 'Quanzhou Hokkien';
				-- (2) 'Quanzhou|_|Hokkien'. If we're dealing with a prefix, there are similarly (1) 'Taiwanese Hakka';
				-- (2) 'Taiwanese|_|Hakka'. In addition, we have to check both the link and the display.
				local to_erase = check_match(label_start.start, matches_affix_with_space)
				if to_erase then
					return to_erase
				end
				local after_underscore = label_start.after_underscore
				if not after_underscore then
					return nil
				end
				return check_match(at_beginning and label_start.start or after_underscore, matches_affix_exactly)
			end

			-- Now, try to find a run of two or more label sets with the same prefix or suffix, with at least one "and"
			-- in the middle.
			local j = 1
			while j <= #label_starts - 1 do
				local saw_and = false
				local run = {}
				local match = affix_matches(j)
				if match then
					table.insert(run, match)
					local k = j + 1
					while k <= #label_starts do
						match = affix_matches(k)
						if not match then
							break
						end
						table.insert(run, match)
						if label_starts.followed_by_and then
							saw_and = true
						end
						k = k + 1
					end
					if #run > 1 and saw_and then
						-- We saw a run of two or more with at least one 'and' in the middle. Remove the prefix or
						-- suffix from all but the last (if we're dealing with a suffix) or all but the first (if we're
						-- dealing with a prefix).
						if at_beginning then
							table.remove(run, 1)
						else
							table.remove(run)
						end
						for _, to_erase in ipairs(run) do
							set_component_value(to_erase)
						end
					end
					j = k + 1
				else
					j = j + 1
				end
			end
		end
	end
)

return export