Module:User:Benwing2/category tree/topic

Hello, you have come here looking for the meaning of the word Module:User:Benwing2/category tree/topic. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Benwing2/category tree/topic, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Benwing2/category tree/topic in singular and plural. Everything you need to know about the word Module:User:Benwing2/category tree/topic you have here. The definition of the word Module:User:Benwing2/category tree/topic will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Benwing2/category tree/topic, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local raw_handlers = {}
local raw_categories = {}


--[=[
This module implements the topic category subsystem. It is currently implemented with a single raw handler that
handlers both language-specific and umbrella topic categories. The topmost category ] is special
and potentially could be handled as a separate raw category, but currently it's handled as part of the raw handler.
]=]

local functions_module = "Module:fun"
local labels_utilities_module = "Module:labels/utilities"
local languages_module = "Module:languages"
local patterns_module = "Module:patterns"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"

local topic_data_module = "Module:User:Benwing2/category tree/topic/data"
local topic_utilities_module = "Module:category tree/topic/utilities"
local thesaurus_data_module = "Module:User:Benwing2/category tree/topic/thesaurus"

local m_patterns = require(patterns_module)

local concat = table.concat
local insert = table.insert
local dump = mw.dumpObject
local is_callable = require(functions_module).is_callable
local pattern_escape = m_patterns.pattern_escape
local replacement_escape = m_patterns.replacement_escape
local split = require(string_utilities_module).split

local type_data = {
	 = {
		desc = "terms related to",
		additional = "'''NOTE''': This is a \"related-to\" category. It should contain terms directly related to " ..
		"{{{topic}}}. Please do not include terms that merely have a tangential connection to {{{topic}}}. " ..
		"Be aware that terms for types or instances of this topic often go in a separate category.",
	},
	set = {
		desc = "terms for types or instances of",
		additional = "'''NOTE''': This is a set category. It should contain terms for {{{topic}}}, not merely " ..
		"terms related to {{{topic}}}. It may contain more general terms (e.g. types of {{{topic}}}) or more " ..
		"specific terms (e.g. names of specific {{{topic}}}), although there may be related categories "..
		"specifically for these types of terms.",
	},
	name = {
		desc = "names of specific",
		additional = "'''NOTE''': This is a name category. It should contain names of specific {{{topic}}}, not " ..
		"merely terms related to {{{topic}}}, and should also not contain general terms for types of {{{topic}}}.",
	},
	type = {
		desc = "terms for types of",
		additional = "'''NOTE''': This is a type category. It should contain terms for types of {{{topic}}}, not " ..
		"merely terms related to {{{topic}}}, and should also not contain names of specific {{{topic}}}.",
	},
	grouping = {
		desc = "categories concerning more specific variants of",
		additional = "'''NOTE''': This is a grouping category. It should not directly contain any terms, but " ..
		"only subcategories. If there are any terms directly in this category, please move them to a subcategory.",
	},
	toplevel = {
		desc = "UNUSED", -- all categories of this type hardcode their description
		additional = "'''NOTE''': This is a top-level list category. It should not directly contain any terms, but " ..
		"only a {{{topic}}}.",
	},
}


local function invalid_type(types)
	local valid_types = {}
	for typ, _ in pairs(type_data) do
		insert(valid_types, ("'%s'"):format(typ))
	end
	error(("Invalid type '%s', should be one or more of %s, comma-separated")
		:format(types, mw.text.listToText(valid_types)))
end


local function split_types(types)
	types = types or "related-to"
	local splitvals = split(types, "%s*,%s*")
	for i, typ in ipairs(splitvals) do
		-- FIXME: Temporary
		if typ == "topic" then
			typ = "related-to"
		end
		if not type_data then
			invalid_type(types)
		end
		splitvals = typ
	end
	return splitvals
end


local function gsub_escaping_replacement(str, from, to)
	return (str:gsub(pattern_escape(from), replacement_escape(to)))
end


function ucfirst(txt)
	local italics, raw_txt = txt:match("^('*)(.-)$")
	return italics .. mw.getContentLanguage():ucfirst(raw_txt)
end


function lcfirst(txt)
	local italics, raw_txt = txt:match("^('*)(.-)$")
	return italics .. mw.getContentLanguage():lcfirst(raw_txt)
end


local function convert_spec_to_string(data, desc)
	if not desc then
		return desc
	end
	local desc_type = type(desc)
	if desc_type == "string" then
		return desc
	elseif desc_type == "number" then
		return tostring(desc)
	elseif not is_callable(desc) then
		error("Internal error: `desc` must be a string, number, function, callable table or nil; received a " ..
			desc_type)
	end
	desc = desc {
		lang = data.lang,
		sc = data.sc,
		label = data.label,
		category = data.category,
		topic_data = data.topdata,
	}
	if not desc then
		return desc
	end
	desc_type = type(desc)
	if desc_type == "string" then
		return desc
	end
	error("Internal error: the value returned by `desc` must be a string or nil; received a " .. desc_type)
end


local function get_and_cache(data, obj, key)
	local val = convert_spec_to_string(data, obj)
	obj = val
	return val
end


local function process_default(desc)
	local stripped_desc = desc
	local no_singularize, wikify, add_the
	while true do
		local new_stripped_desc = stripped_desc:match("^(.+) no singularize$")
		if new_stripped_desc then
			no_singularize = true
		end
		if not new_stripped_desc then
			new_stripped_desc = stripped_desc:match("^(.+) wikify$")
			if new_stripped_desc then
				wikify = true
			end
		end
		if not new_stripped_desc then
			new_stripped_desc = stripped_desc:match("^(.+) with the$")
			if new_stripped_desc then
				add_the = true
			end
		end
		if new_stripped_desc then
			stripped_desc = new_stripped_desc
		else
			break
		end
	end
	if stripped_desc == "default" then
		return true, no_singularize, wikify, add_the
	else
		return false
	end
end


local function format_desc(data, desc)
	local desc_parts = {}
	local types = split_types(data.topdata.type)
	for _, typ in ipairs(types) do
		insert(desc_parts, type_data.desc .. " " .. desc)
	end
	return "{{{langname}}} " .. require(table_module).serialCommaJoin(desc_parts) .. "."
end


local substitute_template_specs

local function format_displaytitle(data, include_lang_prefix, upcase)
	local topdata, lang, label = data.topdata, data.lang, data.label
	local displaytitle = substitute_template_specs(data, topdata.displaytitle)
	if not displaytitle then
		return nil
	end
	if upcase then
		displaytitle = ucfirst(displaytitle)
	end
	if include_lang_prefix and lang then
		displaytitle = ("%s:%s"):format(lang:getCode(), displaytitle)
	end

	return displaytitle
end


local function get_breadcrumb(data)
	local topdata, lang, label = data.topdata, data.lang, data.label
	local ret

	if lang then
		ret = topdata.breadcrumb or format_displaytitle(data, false, "upcase")
	else
		ret = topdata.umbrella and topdata.umbrella.breadcrumb or
			topdata.breadcrumb or format_displaytitle(data, false, "upcase")
	end
	if not ret then
		ret = label
	end

	if type(ret) == "string" or type(ret) == "number" then
		ret = {name = ret}
	end

	local name = substitute_template_specs(data, ret.name)
	local nocap = ret.nocap

	return {name = name, nocap = nocap}
end


local function make_category_name(lang, label)
	if lang then
		return lang:getCode() .. ":" .. ucfirst(label)
	else
		return ucfirst(label)
	end
end


local function replace_special_descriptions(data, desc)
	if not desc then
		return desc
	end

	if desc:find("^=") then
		desc = desc:gsub("^=", "")
		return format_desc(data, desc)
	end

	local is_default, no_singularize, wikify, add_the = process_default(desc)
	if is_default then
		local linked_label = require(topic_utilities_module).link_label(data.label, no_singularize, wikify)
		if add_the then
			linked_label = "the " .. linked_label
		end
		return format_desc(data, linked_label)
	else
		return desc
	end
end


local function get_displaytitle_or_label(data)
	return format_displaytitle(data, false) or data.label
end


local function process_default_add_the(data, topic)
	local is_default, _, _, add_the = process_default(topic)
	if is_default then
		topic = get_displaytitle_or_label(data)
		if add_the then
			topic = "the " .. topic
		end
	end
	return topic, is_default
end


substitute_template_specs = function(data, desc)
	desc = convert_spec_to_string(data, desc)
	if not desc then
		return nil
	end
	
	local topdata, lang, label = data.topdata, data.lang, data.label
	if desc:find("{{{umbrella_msg}}}") then
		local catname = ucfirst(label)
		desc = gsub_escaping_replacement(desc, "{{{umbrella_msg}}}",
			"This category contains no dictionary entries, only other categories. The subcategories are of two " ..
			"sorts:\n\n* Subcategories named like \"{{{thespref}}}aa:" .. catname ..
			"\" (with a prefixed language code) are categories of terms in specific languages. " ..
			"You may be interested especially in ], for English terms.\n" ..
			"* Subcategories of this one named without the prefixed language code are further categories just like " ..
			"this one, but devoted to finer topics."
		)
	end
	if desc:find("{{{topic}}}") then
		-- Compute the value for {{{topic}}}. If the user specified `topic`, use it. (If we're an umbrella category,
		-- allow a separate value for `umbrella.topic`, falling back to `topic`.) Otherwise, see if the description
		-- was specified as 'default' or a variant; if so, parse it to determine whether to add "the" to the label.
		-- Otherwise, just use the label directly.
		local topic = not lang and topdata.umbrella and topdata.umbrella.topic or topdata.topic
		if topic then
			topic = process_default_add_the(data, topic)
		else
			local desc
			if not lang then
				desc = topdata.umbrella and get_and_cache(data, topdata.umbrella, "description") or
					get_and_cache(data, topdata, "umbrella_description")
			end
			desc = desc or get_and_cache(data, topdata, "description")
			local defaulted_desc, is_default = process_default_add_the(data, desc)
			if is_default then
				topic = defaulted_desc
			else
				topic = get_displaytitle_or_label(data)
			end
		end

		desc = gsub_escaping_replacement(desc, "{{{topic}}}", topic)
	end
	
	desc = desc:gsub("{{{thespref}}}", data.thesaurus_data and "Thesaurus:" or "")

	return desc
end


local function process_box(data, def_topright_parts, val, pattern)
	if not val then
		return
	end
	local defval = ucfirst(data.label)
	if type(val) ~= "table" then
		val = {val}
	end
	for _, v in ipairs(val) do
		if v == true then
			insert(def_topright_parts, pattern:format(defval))
		else
			insert(def_topright_parts, pattern:format(v))
		end
	end
end


local function get_topright(data)
	local topdata, lang = data.topdata, data.lang
	local def_topright_parts = {}
	process_box(data, def_topright_parts, topdata.wp, "{{wikipedia|%s}}")
	process_box(data, def_topright_parts, topdata.wpcat, "{{wikipedia|category=%s}}")
	process_box(data, def_topright_parts, topdata.commonscat, "{{commonscat|%s}}")

	local def_topright
	if #def_topright_parts > 0 then
		def_topright = concat(def_topright_parts, "\n")
	end

	if lang then
		return substitute_template_specs(data, topdata.topright or def_topright)
	else
		return topdata.umbrella and substitute_template_specs(data, topdata.umbrella.topright) or
			substitute_template_specs(data, def_topright)
	end
end


local function remove_lang_params(desc)
	desc = desc:gsub("^{{{langname}}} ", "")
	desc = desc:gsub("{{{langcode}}}:", "")
	desc = desc:gsub("^{{{langcode}}} ", "")
	desc = desc:gsub("^{{{langcat}}} ", "")
	return desc
end


local function get_additional_msg(data)
	local types = split_types(data.topdata.type)
	if #types > 1 then
		local parts = {"'''NOTE''': This is a mixed category. It may contain terms of any of the following category types:"}
		for i, typ in ipairs(types) do
			insert(parts, ("* %s {{{topic}}}%s"):format(type_data.desc, i == #types and "." or ";"))
		end
		insert(parts, "'''WARNING''': Such categories are strongly dispreferred and should be split into separate per-type categories.")
		return concat(parts, "\n")
	elseif label == "all topics" then
		return "'''NOTE''': This is the topmost topic category for {{{langname}}}. It should not directly contain " ..
		"any terms, but only lists of topic categories organized by type."
	else
		return type_data].additional
	end
end


local function get_labels_categorizing(data)
	local m_labels_utilities = require(labels_utilities_module)
	return m_labels_utilities.format_labels_categorizing(
		m_labels_utilities.find_labels_for_category(data.label, "topic", data.lang), nil, data.lang)
end


-- Return the description along with the text following and preceding the description. The description and additional
-- (i.e. following) text are returned in the form of closures so the work of calculating the text (which can be
-- expensive, especially in the case of the additional text, where get_labels_categorizing() scans the entire set of
-- labels for any that categorize into this category) is not done when not needed, e.g. in higher levels of the
-- breadcrumb chain, where only the breadcrumb and parents (in fact, really just the first parent) are actually needed.
local function get_description_additional_preceding(data)
	local topdata, lang, label = data.topdata, data.lang, data.label
	local desc, additional, preceding

	-- This is kind of hacky, but it works for now.
	local function postprocess_thesaurus(txt)
		if not txt then
			return nil
		end
		if not data.thesaurus_data then
			return txt
		end
		txt = txt:gsub(" terms()", " thesaurus entries%1")
		return txt
	end

	if lang then
		desc = function()
			return postprocess_thesaurus(substitute_template_specs(data,
				replace_special_descriptions(data, get_and_cache(data, topdata, "description"))))
		end
		preceding = topdata.preceding
		additional = function()
			local additional_parts = {}
			if topdata.additional then
				insert(additional_parts, topdata.additional)
			end
			if not data.thesaurus_data then
				insert(additional_parts, get_additional_msg(data))
				local labels_msg = get_labels_categorizing(data)
				if labels_msg then
					insert(additional_parts, labels_msg)
				end
			end
			return postprocess_thesaurus(substitute_template_specs(data, concat(additional_parts, "\n\n")))
		end
	else
		if label == "all topics" then
			desc = "This is the topmost topic category for all languages."
			additional = "It contains no dictionary entries, only other categories. The subcategories are of two " ..
				"sorts:\n\n" ..
				"* Subcategories listed at the beginning, without a prefixed language code, are grouping " ..
				"categories similar to this category, but are devoted to general subject areas. Under them are " ..
				"finer-grained subject areas.\n" ..
				"* Subcategories named like \"aa:All topics\" (with a prefixed language code) are top-level " ..
				"categories like this one, but for specific languages. You may be interested especially in " ..
				"], for English terms.\n" ..
				"Note that categories under this tree categorize terms semantically rather than grammatically. " ..
				"Grammatical categories (such as all French verbs, or all English irregular plural forms) " ..
				"have a different naming structure, with the language name spelled out, such as " ..
				"] or ]."
			return desc, additional
		end

		-- Assume that if the description field contains a function, the function will return non-nil, so we don't
		-- have to call the function at this point (in case it is heavyweight).
		local has_umbrella_desc = topdata.umbrella and topdata.umbrella.description or topdata.umbrella_description

		desc = function()
			local desc = topdata.umbrella and get_and_cache(data, topdata.umbrella, "description") or
				get_and_cache(data, topdata, "umbrella_description")
			if not desc then
				 desc = get_and_cache(data, topdata, "description")
				 if desc then
					desc = replace_special_descriptions(data, desc)
					desc = remove_lang_params(desc)
					desc = desc:gsub("%.$", "")
					desc = "This category concerns the topic: " .. desc .. "."
				 end
			end
			if not desc then
				desc = "Categories concerning " .. label .. " in various specific languages."
			end
			return postprocess_thesaurus(substitute_template_specs(data, desc))
		end

		preceding = topdata.umbrella and topdata.umbrella.preceding or not has_umbrella_desc and topdata.preceding
		if preceding then
			preceding = remove_lang_params(preceding)
		end

		additional = function()
			local additional_parts = {}
			local topdata_additional = topdata.umbrella and topdata.umbrella.additional or
				not has_umbrella_desc and topdata.additional
			if topdata_additional then
				insert(additional_parts, remove_lang_params(topdata_additional))
			end
			insert(additional_parts, "{{{umbrella_msg}}}")
			if not data.thesaurus_data then
				insert(additional_parts, get_additional_msg(data))
				local labels_msg = get_labels_categorizing(data)
				if labels_msg then
					insert(additional_parts, labels_msg)
				end
			end
			return postprocess_thesaurus(substitute_template_specs(data, concat(additional_parts, "\n\n")))
		end
	end

	preceding = substitute_template_specs(data, preceding)
	return desc, additional, preceding
end


local function normalize_sort_key(data, sort)
	local lang, label = data.lang, data.label
	if not sort then
		-- When defaulting sort key to label, strip 'The ' (e.g. in 'The Matrix', 'The Hunger Games')
		-- and 'A ' (e.g. in 'A Song of Ice and Fire', 'A Christmas Carol') from label.
		local stripped_sort = label:match("^he (.*)$")
		if stripped_sort then
			sort = stripped_sort
		end
		if not stripped_sort then
			stripped_sort = label:match("^ (.*)$")
			if stripped_sort then
				sort = stripped_sort
			end
		end
		if not stripped_sort then
			sort = label
		end
	end

	sort = substitute_template_specs(data, sort)

	if not lang then
		sort = " " .. sort
	end

	return sort
end


local function get_topic_parents(data)
	local topdata, lang, label = data.topdata, data.lang, data.label
	local parents = topdata.parents

	if not lang and label == "all topics" then
		return {{ name = "Category:Fundamental", sort = "topics" }}
	end

	if not parents or #parents == 0 then
		return nil
	end

	local ret = {}

	for _, parent in ipairs(parents) do
		parent = mw.clone(parent)

		if type(parent) ~= "table" then
			parent = {name = parent}
		end

		parent.sort = normalize_sort_key(data, parent.sort)

		if type(parent.name) ~= "string" then
			error(("Internal error: parent.name is not a string: parent = %s"):format(dump(parent)))
		end
		if parent.name:find("^Category:") or parent.nontopic then
			-- leave as-is
			parent.nontopic = nil
		else
			parent.name = make_category_name(lang, parent.name)
		end
		parent.name = substitute_template_specs(data, parent.name)
		
		insert(ret, parent)
	end

	local function make_list_of_type_parent(typ)
		return {
			name = make_category_name(lang, ("list of %s categories"):format(typ)),
			sort = (not lang and " " or "") .. label,
		}
	end

	if topdata.type ~= "toplevel" then
		local types = split_types(topdata.type)
		for _, typ in ipairs(types) do
			insert(ret, make_list_of_type_parent(typ))
		end
		if #types > 1 then
			insert(ret, make_list_of_type_parent("mixed"))
		end
	end

	-- Add umbrella category.
	if lang then
		insert(ret, {
			name = make_category_name(nil, label),
			sort = lang:getCanonicalName(),
		})
	end

	return ret
end


local function get_thesaurus_parents(data)
	local topdata, lang, label = data.topdata, data.lang, data.label
	local parent_substitutions = data.thesaurus_data.parent_substitutions
	local parents = topdata.parents

	if not parents or #parents == 0 then
		return nil
	end

	local ret = {}

	for _, parent in ipairs(parents) do
		-- Process parent categories as follows:
		-- 1. skip non-topic cats and meta-categories that start with "List of"
		-- 2. map "en:All topics" to "English thesaurus entries" (and same for other languages), but map "All topics" itself to the root "Thesaurus" category
		-- 3. check if this parent is to be substituted, if so, substitute it
		-- 4. prepend "Thesaurus:" to all other category names
		parent = mw.clone(parent)

		if type(parent) ~= "table" then
			parent = {name = parent}
		end

		parent.sort = normalize_sort_key(data, parent.sort)

		if type(parent.name) ~= "string" then
			error(("Internal error: parent.name is not a string: parent = %s"):format(dump(parent)))
		end
		if parent.name:find("^Category:") or parent.nontopic then
			-- skip
		elseif parent.name == "all topics" or parent_substitutions == "all topics" then
			if not lang then
				insert(ret, {
					name = "Thesaurus",
					sort = label,
				})
			else
				insert(ret, {
					name = "thesaurus entries",
					sort = parent.sort,
					lang = lang:getCode(),
					is_label = true,
				})
			end
		else
			parent.name = "Thesaurus:" .. make_category_name(lang, parent_substitutions or parent.name)
			parent.name = substitute_template_specs(data, parent.name)
			insert(ret, parent)
		end
	end

	-- Add the non-thesaurus version of this category as a parent, unless it is a thesaurus-only category.
	if not topdata.thesaurusonly then
		insert(ret, { name = make_category_name(lang, label), sort = " " })
	end

	-- Add umbrella category.
	if lang then
		insert(ret, {
			name = "Thesaurus:" .. make_category_name(nil, label),
			sort = lang:getCanonicalName(),
		})
	end

	return ret
end


local function generate_spec(category, lang, upcase_label, thesaurus_data)
	local label_data = require(topic_data_module)
	local label

	-- Convert label to lowercase if possible
	local lowercase_label = mw.getContentLanguage():lcfirst(upcase_label)

	-- Check if the label exists
	local labels = label_data

	if labels then
		label = lowercase_label
	else
		label = upcase_label
	end

	local topdata = labels

	-- Go through handlers
	if not topdata then
		for _, handler in ipairs(label_data) do
			topdata = handler.handler(label)
			if topdata then
				topdata.module = handler.module
				break
			end
		end
	end

	if not topdata then
		return nil
	end

	local data = {
		category = category,
		lang = lang,
		label = label,
		topdata = topdata,
		thesaurus_data = thesaurus_data,
	}

	local description, additional, preceding = get_description_additional_preceding(data)
	local parents
	if thesaurus_data then
		parents = get_thesaurus_parents(data)
	else
		parents = get_topic_parents(data)
	end

	return {
		lang = lang and lang:getCode() or nil,
		description = description,
		additional = additional,
		preceding = preceding,
		parents = parents,
		breadcrumb = get_breadcrumb(data),
		displaytitle = format_displaytitle(data, "include lang prefix", "upcase"),
		topright = get_topright(data),
		module = topdata.module,
		can_be_empty = not lang,
		hidden = false,
	}
end


-- Handler for `Thesaurus:...` categories.
table.insert(raw_handlers, function(data)
	local code, upcase_label = data.category:match("^Thesaurus:(%l*%a):(.+)$")
	local lang
	if code then
		lang = require(languages_module).getByCode(code)
		if not lang then
			mw.log(("Category '%s' looks like a language-specific thesaurus category but unable to match language prefix"):
				format(data.category))
			return nil
		end
	else
		upcase_label = data.category:match("^Thesaurus:(.+)$")
	end

	if upcase_label then
		local thesaurus_data = require(thesaurus_data_module)
		-- substituted category names are not allowed
		if thesaurus_data.parent_substitutions then
			error(("Category is not allowed as a Thesaurus category: %s (see the list of parent substitutions at " ..
				"])"):format(data.category))
		end
		return generate_spec(data.category, lang, upcase_label, thesaurus_data)
	end
end)


-- Handler for regular topic categories.
table.insert(raw_handlers, function(data)
	local code, upcase_label = data.category:match("^(%l*%a):(.+)$")
	local lang
	if code then
		lang = require(languages_module).getByCode(code)
		if not lang then
			mw.log(("Category '%s' looks like a language-specific topic category but unable to match language prefix"):
				format(data.category))
			return nil
		end
	else
		upcase_label = data.category
	end

	return generate_spec(data.category, lang, upcase_label)
end)


-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories = {
	description = "Category for entries of the Wiktionary thesaurus, located in a separate namespace.",
	additional = [=[
There are '''three ways to browse''' the thesaurus:
* Look under ''']''' to get started.
* Use the search box below.
* Browse the thesaurus by topic using the links under "Subcategories" below.

The main project page is ].

{{ws header|<nowiki/>|link=}}]=],
	parents = {
		"Category:Fundamental",
		"Category:Wiktionary projects",
	},
}

return {RAW_CATEGORIES = raw_categories, RAW_HANDLERS = raw_handlers}