Module:category tree/poscatboiler/data/characters

Hello, you have come here looking for the meaning of the word Module:category tree/poscatboiler/data/characters. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:category tree/poscatboiler/data/characters, but we will also tell you about its etymology, its characteristics and you will know how to say Module:category tree/poscatboiler/data/characters in singular and plural. Everything you need to know about the word Module:category tree/poscatboiler/data/characters you have here. The definition of the word Module:category tree/poscatboiler/data/characters will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:category tree/poscatboiler/data/characters, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local raw_categories = {}
local handlers = {} 
local raw_handlers = {}

local m_str_utils = require("Module:string utilities")

local insert = table.insert
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = m_str_utils.match
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD


local function track(page)
	require("Module:debug/track")("poscatboiler-characters/" .. page)
	return true
end


-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories = {
	description = "Umbrella categories covering terms categorized by unusual characters contained in them.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Umbrella metacategories",
		{name = "terms by their individual characters", is_label = true, sort = " "},
	},
}

-- FIXME! This should probably be deleted.
raw_categories = {
	description = "Categories specifying individual letters, containing the languages that use those letters.",
	additional = "{{{umbrella_meta}}}",
	parents = {
		"Fundamental",
	},
}



-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------


-- 	If char is a combining character, returns a version with a dotted circle before it.
local function add_dotted_circle(char, combining)
	return combining and "◌" .. char or char
end

insert(handlers, function(data)
	-- NOTE: The "character" in the title may actually be a description such as
	-- "gershayim". In that case, char= is specified as a parameter indicating the
	-- actual character.
	local titlechar = data.label:match("^terms spelled with (.+)$")
	if not titlechar then
		return nil
	end
	local params = {
		 = {},
		 = {},
		-- Not sure what used to be done with the following parameters.
		 = {},
		 = {},
	}
	local args = require("Module:parameters").process(data.args, params)
	if args.context or args.context2 then
		track("terms-spelled-with-context")
	end

	local special_cases = {
		numbers = {
			sort = "#",
			desc = "numeric digits",
		},
		emoji = {
			sort = "⌚", -- the first emoji in our list in ]
		},
		parentheses = {
			sort = "(",
		},
		 = {
			sort = "[",
		},
		 = {
			sort = "<",
		},
		braces = {
			sort = "{",
		},
	}
	if special_cases then
		local sortkey = args.sort or special_cases.sort
		return {
			description = "{{{langname}}} terms spelled with one or more " .. (special_cases.desc or titlechar) .. ".",
			parents = {{name = "terms by their individual characters", sort = sortkey }},
			breadcrumb = titlechar,
			umbrella = {
				breadcrumb = titlechar,
				parents = {{name = "Terms by their individual characters subcategories by language", sort = " " .. sortkey }}
			},
		}, true
	end
	
	local char = args.char or titlechar
	local titlechar_is_desc = args.char and args.char ~= titlechar
	if titlechar_is_desc then
		track("titlechar_is_desc")
	end
	
	local lang = data.lang or require("Module:languages").getByCode("mul")
	
	local combining = ulen(char) == 1 and require("Module:Unicode_data").is_combining(char)
	
	local specials = { = "ẞ",  = "ͅ"}
	local upper = toNFD(char)
		:gsub("*", function(m)
			return specials or m:uupper()
		end)
	upper = toNFC(upper)
	
	local standard_chars = lang:getStandardCharacters()
	-- FIXME: This should be able to handle non-atomic single characters (e.g. "Q̓").
	if char ~= upper and ulen(char) == 1 then
		-- We want uppercase characters; but unless we're careful, we run into an issue with
		-- ] due to the weird behavior of this character,
		-- which has standard "I" as its uppercase equivalent.
		if standard_chars then
			local function err()
				error("Category titles should use uppercase characters: '" .. data.label .. "'", 2)
			end
			if lang:getCode() ~= "hi" and lang:getCode() ~= "lo" then
				if not umatch(standard_chars, upper) then
					err()
				end
			elseif not umatch(upper, "") then
				err()
			end
		end
	end
	
	-- Compute description.
	
	local character = require("Module:links").full_link(
		{
			term = char,
			alt = combining and add_dotted_circle(char, true) or nil,
			lang = lang,
			tr = combining and "-" or nil,
		},
		"term"
	)
	
	-- If the letter has a lowercase form that's also not in the standard characters,
	-- show it. This time, it's ] that causes
	-- issues, because the lowercase equivalent is standard "i".
	-- Note that ulower("İ") has a bug where it outputs "i" with a combining dot, instead
	-- of plain "i", so this has to be accounted for.
	local lower = ulower(char:gsub("İ", "I"))
	if lower ~= char and not (standard_chars and umatch(lower, "")) then
		character = "upper case " .. character .. " or lower case " ..
			require("Module:links").full_link(
				{
					term = lower,
					lang = lang
				},
				"term"
			)
	end
	
	if titlechar_is_desc then
		character = character .. " (" .. titlechar .. ")"
	end
	
	local description = "{{{langname}}} terms spelled with " .. character .. "."

	-- Set tagged character for displaytitle and breadcrumb.
	local tagged_titlechar = not titlechar_is_desc and
		require("Module:script utilities").tag_text(titlechar, lang, nil, "term") or nil
	local tagged_char = titlechar_is_desc and titlechar or
		require("Module:script utilities").tag_text(add_dotted_circle(char, combining), lang, nil, "term")
	
	local han = umatch(char, "^+$")
	
	-- Make the sortkey. Always use Hani-sortkey for Han characters, as this circumvents any reading-based sortkey methods.
	local sortkey = args.sort or han and require("Module:Hani-sortkey").makeSortKey(char) or lang:makeSortKey(char)
	-- Use the char as a fallback.
	if sortkey == "" then
		sortkey = char
	end
	
	return {
		description = description,
		-- The following doesn't apply to Sinitic or Japonic, where we categorize all characters.
		additional = not lang:inFamily("zhx", "jpx") and
			"Note that categories of the form '''''LANG terms spelled with CHAR''''' are intended for characters not "
			.. "part of the standard repertoire of a language (e.g. Cyrillic characters in English or Latin characters in Russian)." or nil,
		displaytitle = not titlechar_is_desc and "{{{langname}}} terms spelled with " .. tagged_titlechar or nil,
		parents = {{name = "terms by their individual characters", sort = sortkey }},
		breadcrumb = tagged_char,
		umbrella = {
			displaytitle = not titlechar_is_desc and "Terms spelled with " .. tagged_titlechar .. " by language" or nil,
			breadcrumb = tagged_char,
			parents = {{name = "Terms by their individual characters subcategories by language", sort = " " .. sortkey }}
		},
	}, true
end)



-----------------------------------------------------------------------------
--                                                                         --
--                               RAW HANDLERS                              --
--                                                                         --
-----------------------------------------------------------------------------


-- Special-cased categories that we allow, for Turkish letters.
local letter_cat_allow_list = require("Module:table/listToSet") {
	"İi",
}

insert(raw_handlers, function(data)
	-- Only recognize cases consisting of an uppercase letter followed by the
	-- corresponding lowercase letter, either as the entire category name or
	-- followed by a colon (for cases like ]). Cases that
	-- don't fit this profile (e.g. for Turkish ] and
	-- ]) need to call {{letter cat}} directly. Formerly this
	-- handler was much less restrictive and would fire on categories named
	-- ], ], etc.
	local upper, lower = umatch(data.category, "^(%u)(%l)%f")
	if not upper or not letter_cat_allow_list and lower:uupper() ~= upper then
		return nil
	end
	return {
		description = ('Languages that use the uppercase letter "%s" (lowercase equivalent "%s").'):format(upper, lower),
		parents = {"Letters"},
	}
end)


return {RAW_CATEGORIES = raw_categories, HANDLERS = handlers, RAW_HANDLERS = raw_handlers}