Module:category tree/lects

Hello, you have come here looking for the meaning of the word Module:category tree/lects. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:category tree/lects, but we will also tell you about its etymology, its characteristics and you will know how to say Module:category tree/lects in singular and plural. Everything you need to know about the word Module:category tree/lects you have here. The definition of the word Module:category tree/lects will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:category tree/lects, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This data submodule defines part of Wiktionary's category structure.

For an introduction to the category tree system and a description of how to add or modify categories, see Module:category tree/documentation.


local export = {}

local raw_categories = {}
local raw_handlers = {}

local labels_module = "Module:labels"
local labels_utilities_module = "Module:labels/utilities"
local languages_module = "Module:languages"
local parse_utilities_module = "Module:parse utilities"
local string_pattern_escape_module = "Module:string/patternEscape"
local string_replacement_escape_module = "Module:string/replacementEscape"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"

local m_table = require(table_module)

local get_lang = require(languages_module).getByCode
local insert_if_not = m_table.insertIfNot
local pattern_escape = require(string_pattern_escape_module)
local replacement_escape = require(string_replacement_escape_module)
local serial_comma_join = m_table.serialCommaJoin
local split = require(string_utilities_module).split
local split_on_comma = require(parse_utilities_module).split_on_comma

local function track(page)
	-- ]
	return require("Module:debug/track")("category tree/lects/" .. page)
end

-- This module handles lect/variety categories of all sorts, e.g. regional lect categories such as
-- ] and ]; temporal lect categories such as
-- ]; sociolect categories such as ]; and umbrella categories of the
-- form e.g. ] and ].

-- FIXME: Eliminate the word "dialect" here and in the {{auto cat}} parameter in favor of "lect" or "variety". 

--[=[
FIXME:

1. Support multiple parents. 
2. Support cat: in parents to indicate a category. 
3. When linking a description without embedded links, use the equivalent of {{wtorw}} to auto-link to Wikipedia. 
4. Support the = true. 
]=]

-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories = {
	description = "Categories that group terms in varieties of various languages (regional, temporal, sociolectal, etc.).",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Fundamental",
	},
}

raw_categories = {
	description = "Categories that group terms in regional varieties of various languages.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Fundamental",
		"Language varieties",
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                RAW HANDLERS                             --
--                                                                         --
-----------------------------------------------------------------------------


local function ucfirst(text)
	return mw.getContentLanguage():ucfirst(text)
end

local function lcfirst(text)
	return mw.getContentLanguage():lcfirst(text)
end

local function page_exists(page)
	local title = mw.title.new(page)
	return title and title.exists
end


-- Handle categories such as ] and ].
table.insert(raw_handlers, function(data)
	local langname = data.category:match("^Varieties of (.*)$")
	if langname then
		local lang = require("Module:languages").getByCanonicalName(langname)
		if lang then
			return {
				lang = lang:getCode(),
				description = "Categories containing terms in varieties of " .. lang:makeCategoryLink() .. " (regional, temporal, sociolectal, etc.).",
				parents = {
					"{{{langcat}}}",
					{name = "Language varieties", sort = langname},
				},
				breadcrumb = "Varieties",
			}
		end
	end
end)


-- Handle categories such as ] and ].
table.insert(raw_handlers, function(data)
	local langname = data.category:match("^Regional (.*)$")
	if langname then
		local lang = require("Module:languages").getByCanonicalName(langname)
		if lang then
			return {
				lang = lang:getCode(),
				description = "Categories containing terms in regional varieties of " .. lang:makeCategoryLink() .. ".",
				additional = "This category sometimes also directly contains terms that are uncategorized regionalisms: such terms should be recategorized by the particular regional variety they belong to, or categorized as dialectal.",
				parents = {
					"Varieties of {{{langname}}}",
					{name = "Regionalisms", sort = langname},
				},
				breadcrumb = "Regional",
			}
		end
	end
end)


-- Fancy version of ine() (if-not-empty). Converts empty string to nil, but also strips leading/trailing space.
local function ine(arg)
	if not arg then return nil end
	arg = mw.text.trim(arg)
	if arg == "" then return nil end
	return arg
end


-- Get the full language to use e.g. in the settings.
local function get_returnable_lang(lang)
	if lang:hasType("family") then
		return nil
	else
		return lang:getFull()
	end
end


-- Get the full language code to return in the settings.
local function get_returnable_lang_code(lang)
	if lang:hasType("family") then
		return "und"
	else
		return lang:getFullCode()
	end
end


local memoizing_lect_handler


local function category_to_lang_name(category)
	local getByCanonicalName = require("Module:languages").getByCanonicalName
	local lang
	lang = getByCanonicalName(category, nil, "allow etym", "allow family")
	if not lang then
		-- Some languages have lowercase-initial names e.g. 'the BMAC substrate', but the category begins with an
		-- uppercase letter.
		lang = getByCanonicalName(lcfirst(category), nil, "allow etym", "allow family")
	end
	return lang
end


-- Given a category (without the "Category:" prefix), look up the page defining the category, find the call to
-- {{auto cat}} (if any), and return a table of its arguments. If the category page doesn't exist or doesn't have
-- an {{auto cat}} invocation, return nil.
local function scrape_category_for_auto_cat_args(cat)
	local cat_page = mw.title.new("Category:" .. cat)
	if cat_page then
		local contents = cat_page:getContent()
		if contents then
			for template in require("Module:template parser").find_templates(contents) do
				-- The template parser automatically handles redirects and canonicalizes them, so uses of {{autocat}}
				-- will also be found.
				if template:get_name() == "auto cat" then
					return template:get_arguments()
				end
			end
		end
	end
	return nil
end


-- Try to figure out if this variety is extinct or reconstructed, if type= not given.
local function determine_lect_type(category, lang, default_parent_cat)
	if category:find("^Proto%-") or lang:getCanonicalName():find("^Proto%-") or lang:hasType("reconstructed") then
		-- Is it reconstructed?
		return "reconstructed"
	end
	if lang:getCode():find("^qsb%-") then
		-- Substrate.
		return "unattested"
	end
	if lang:hasType("full") then
		-- If a full language, scrape the {{auto cat}} call and check for extinct=1.
		local parent_args = scrape_category_for_auto_cat_args(lang:getCategoryName())
		if parent_args and ine(parent_args.extinct) and require("Module:yesno")(parent_args.extinct, false) then
			return "extinct"
		end
	end
	-- Otherwise, call the lect handler recursively for the parent category. This is correct e.g. for
	-- things like subvarieties of Classical Persian, where the lang itself (Persian) isn't extinct but the
	-- parent category refers to an extinct variety. If the lect handler fails to return a type, it's because
	-- the parent category doesn't exist or isn't defined using {{auto cat}}, and doesn't have a language as a
	-- suffix. In that case, if we're dealing with an etymology-only language, check the parent language. Finally,
	-- fall back to returning "extant" if all else fails.
	local parent_type
	if default_parent_cat then
		export.register_likely_lect_parent_cat(default_parent_cat)
		parent_type = select(2, memoizing_lect_handler(default_parent_cat, nil, true))
	end
	if parent_type then
		return parent_type
	end
	local parent_lang = lang:getParent()
	if parent_lang then
		return determine_lect_type(category, parent_lang, nil)
	end
	return "extant"
end


-- Try to figure out the region (used as the default breadcrumb and region description) from the language. If the
-- language name is an etymology-only language, try to derive a region based on a parent etymology-only or full
-- language. For example, if the pagename is ']', the language is 'en-GB' (British English)
-- and the same as the pagename, but we'd like to return a region 'British'. This is also called in cases where the
-- language is explicitly given but we need to infer the region from the parent language; e.g.
-- ] is a type of High Alemannic German but we want to infer 'Lucerne' based on
-- the parent 'Alemannic German'. If this doesn't work and the language name has a space in it, we try using
-- progressively smaller suffixes of the language. For example, for ]', the language is
-- 'wae' (Walser German), but the parent is 'Highest Alemannic German', whose parent is 'Alemannic German' (a full
-- language), and just "German" is nowhere in the parent-child relationships but found as a suffix in the parent
-- language. Another such case is with ], whose parent is 'Ancient Greek'.
local function infer_region_from_lang(pagename, lang)
	local langname = lang:getCanonicalName()
	local lang_to_check = lang
	if ucfirst(langname) == pagename then
		lang_to_check = lang_to_check:getParent()
	end
	-- First check against the language name and progressively smaller suffixes; then repeat for any parents (of
	-- etymology languages). If the language name is the same as the page name, we need to start with the parent;
	-- otherwise we will always match against a suffix, but that's not what we want.
	while lang_to_check do
		local suffix = lang_to_check:getCanonicalName()
		while true do
			local region = pagename:match("^(.*) " .. pattern_escape(suffix) .. "$")
			if region then
				return region
			end
			suffix = suffix:match("^.- (.*)$")
			if not suffix then
				break
			end
		end
		lang_to_check = lang_to_check:getParent()
	end

	return nil
end


-- Modeled after splitLabelLang() in ]. Try to split off a maximally long language (full or
-- etymology-only) on the right, and return the resulting language object and the region preceding it. We need to
-- check the maximally long language because of cases like 'English' vs 'Middle English' and 'Chinese Pidgin English';
-- ] should split as 'Late' and 'Middle English', not as 'Late Middle' and 'English'.
local function split_region_lang(pagename)
	local lang
	local region

	-- Try the entire title as a language; if not, chop off a word on the left and repeat.
	local words = split(pagename, " ")
	for i = 1, #words do
		lang = category_to_lang_name(table.concat(words, " ", i, #words))
		if lang then
			if i == 1 then
				region = nil
			else
				region = table.concat(words, " ", 1, i - 1)
			end
			break
		end
	end

	if not region and lang then
		-- The pagename is the same as a language name. Try to infer the region from the parent. See comment at
		-- function.
		region = infer_region_from_lang(pagename, lang)
	end

	return lang, region
end


-- Return the default parent cat for the given language and category. If the language and category are the same, we're
-- dealing with the overall cat for an etymology-only language, so use the category of the parent language; otherwise
-- we're dealing with a subcategory of a regular or etymology-only language (e.g. ], a
-- subcategory of ]), so use the language's category itself. If the resulting language is an
-- etymology-only language or a family, the parent category is that language or family's category, which for
-- etymology-only languages is named the same as the etymology-only language, and for families is named
-- "FAMILY languages"; otherwise, use "Regional LANG" as the category unless `noreg` is given, in which case we use
-- "Varieties of LANG".
local function get_default_parent_cat_from_category(category, lang, noreg)
	if lang:getCode():find("^qsb%-") then
		-- substrate
		return "Substrate languages"
	end
	local lang_for_cat
	if ucfirst(lang:getCanonicalName()) == category then
		lang_for_cat = lang:getParent()
		if not lang_for_cat then
			error(("Category '%s' has a name the same as a full language; you probably need to explicitly specify a different language using |lang="):format(category))
		end
	else
		lang_for_cat = lang
	end
	if lang_for_cat:hasType("etymology-only") or lang_for_cat:hasType("family") then
		return lang_for_cat:getCategoryName()
	elseif noreg then
		return "Varieties of " .. lang_for_cat:getCanonicalName()
	else
		return "Regional " .. lang_for_cat:getCanonicalName()
	end
end


-- Find the labels that categorize into `category`. Only categories specified using the `regional_categories` and
-- `plain_categories` fields will be returned. `lang` is the language object to use when looking up categories specified
-- using the `regional_categories` field, which append the language onto the specified category prefix. If `lang` is a
-- family or is omitted, no categories specified using `regional_categories` will be returned. Lang-specific modules for
-- all languages will be checked for matching labels that specify `category` as their category using `plain_categories`;
-- this helps e.g. with varieties of Chinese, whose labels are found in ]. The return value
-- is a table in the same format as returned by `find_labels_for_category` in ].
--
-- FIXME: It should be possible to check for categories specified using `regional_categories` even when `lang` is nil.
local function find_labels_for_category(category, lang)
	local regional_cat_labels, plain_cat_labels
	local full_lang
	local m_labels_utilities = require(labels_utilities_module)
	if lang and lang:hasType("language") then
		full_lang = lang:getFull()
		local regional_component = category:match("^(.-) " .. pattern_escape(full_lang:getCanonicalName()) .. "$")
		if regional_component then
			regional_cat_labels = m_labels_utilities.find_labels_for_category(regional_component,
				"regional", full_lang)
		end
	end
	plain_cat_labels = m_labels_utilities.find_labels_for_category(category, "plain", full_lang, "check all langs")

	local all_labels
	if regional_cat_labels and plain_cat_labels then
		all_labels = regional_cat_labels
		for k, v in pairs(plain_cat_labels) do
			all_labels = v
		end
	else
		all_labels = regional_cat_labels or plain_cat_labels
	end

	return all_labels
end


-- Find the labels for category `category` and language object `lang` (which can be nil or a family, but in that case,
-- no labels on a category specified using `regional_categories`; FIXME: it should be possible to implement this). Then
-- filter them down to those that are specified using a lang-specific module and sort them for use in checking
-- properties such as parent and description. We filter down to only lang-specific labels because those specified in a
-- general module (especially ]) won't be able to have proper descriptions and especially
-- parents, which tend to be language-specific. The sort order prioritizes labels that match the category exactly
-- (either through the canonical version or any alias); this is followed by labels that are a prefix of the category
-- (again, either through the canonical version or any alias), so that labels whose categories are specified using
-- `regional_categories` are prioritized. Any other labels are sorted last, so that e.g. if both the label "Alberta" and
-- "Canada" (with alias "Canadian") for lang=en categorize into ], we prefer the label
-- "Canada". For cases where e.g. both labels match the category as prefixes, ties are broken by prioritizing the labels
-- found in the lang-specific module whose language matches `lang`.
--
-- Returns two items. The first is a table of all labels categorizing into `category` (subject to the provisos described
-- in `find_labels_for_category()`), in the same format as returned by `find_labels_for_category` in
-- ]. (Specifically, the values are objects containing all relevant information on a given
-- label, and the keys are less important.) The second is a list of label objects after filtering and sorting, in the
-- same format as the values in the `all_labels` table. The first return value will be nil if no labels could be found
-- categorizing into `category`, and the second return value will be nil if no labels remain after filtering.
local function get_sorted_labels(category, lang)
	local all_labels = find_labels_for_category(category, lang)
	if not all_labels then
		return nil
	end

	local m_labels = require(labels_module)
	local lang_specific_pattern = "^" .. pattern_escape(m_labels.lang_specific_data_modules_prefix)
	local sorted_labels = {}
	for _, labelobj in pairs(all_labels) do
		if labelobj.module:find(lang_specific_pattern) then
			table.insert(sorted_labels, labelobj)
		end
	end

	local function sort_labelobj(a, b)
		local function matches_exactly(labelobj)
			if labelobj.canonical == category then
				return true
			end
			for _, alias in ipairs(labelobj.aliases) do
				if alias == category then
					return true
				end
			end
			return false
		end

		local function matches_as_prefix(labelobj)
			if category:find("^" .. pattern_escape(labelobj.canonical) .. " ") then
				return true
			end
			for _, alias in ipairs(labelobj.aliases) do
				if category:find("^" .. pattern_escape(alias) .. " ") then
					return true
				end
			end
			return false
		end

		local function tiebreak()
			local a_matches_lang = lang and a.lang:getFullCode() == lang:getFullCode()
			local b_matches_lang = lang and b.lang:getFullCode() == lang:getFullCode()
			if a_matches_lang and not b_matches_lang then
				return true
			elseif b_matches_lang and not a_matches_lang then
				return false
			else
				return a.canonical < b.canonical
			end
		end

		local a_matches_exactly = matches_exactly(a)
		local b_matches_exactly = matches_exactly(b)
		if a_matches_exactly and not b_matches_exactly then
			return true
		elseif b_matches_exactly and not a_matches_exactly then
			return false
		elseif a_matches_exactly and b_matches_exactly then
			return tiebreak()
		end

		local a_matches_as_prefix = matches_as_prefix(a)
		local b_matches_as_prefix = matches_as_prefix(b)
		if a_matches_as_prefix and not b_matches_as_prefix then
			return true
		elseif b_matches_as_prefix and not a_matches_as_prefix then
			return false
		else
			return tiebreak()
		end
	end

	table.sort(sorted_labels, sort_labelobj)
	if #sorted_labels > 0 then
		return all_labels, sorted_labels
	else
		return all_labels, nil
	end
end


-- Find the categories (only of type `regional_categories` and `plain_categories`) that label `label` categorizes into.
-- Return value is nil if the label couldn't be located at all, otherwise a list of categories (which may be empty).
local function get_categories_for_label(label, lang)
	local m_labels = require(labels_module)
	local labret = m_labels.get_label_info { label = label, lang = lang }
	if not labret.recognized then
		return nil
	end
	local categories = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,
		{ = true})
	local reg_cats = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,
		{ = true})
	if #reg_cats > 0 then
		for _, cat in ipairs(reg_cats) do
			table.insert(categories, cat)
		end
	end
	return categories
end


-- Given the sorted labels that categorize into `category`, return the parent categories for the first label that specifies
-- any parents. `default` is the default parent category, usually "Regional LANG" or (if noreg=1 is specified) "Varieties of LANG";
-- it is used if the parent is explicitly given as `true` or "+" (or one of these values occurs among others), or if a parent label
-- was given but didn't categorize into any regional or plain categories, or if no labels with parents could be found. If
-- `all_cats` is specified, all categories associated with all specified parent labels (if more than one is present) are returned;
-- otherwise, only the categories for the first parent label are returned.
--
-- Returns two values: the list of parent categories and the label object from which the categories were derived (or nil if no
-- label object could be found with a `parent` field, in which case the return value of the list of categories is a simple-element
-- list consisting of `default`). The format of the parent category list is such that the list can directly be specified as the
-- value of the `parents` field returned by the raw handler. This means that usually the individual list elements are strings
-- (referring to raw poscat labels), but they may be strings prefixed by "Category:" (for arbitrary categories), or objects of the
-- form {name = "CATEGORY", lang = "LANGCODE", is_label = true} for poscat language labels.
local function get_parents_from_sorted_labels(sorted_labels, category, all_cats)
	for _, labobj in ipairs(sorted_labels) do
		local parent = labobj.labdata.parent
		if parent == true then
			parent = {parent}
		elseif parent and type(parent) == "string" then
			parent = split_on_comma(parent)
		end
		local function get_parent_cats(par)
			if par == true or par == "+" then
				return {"+"}
			end
			if par:find("^cat:") then
				return {"Category:" .. par:gsub("^cat:", "")}
			end
			if par:find("^Category:") then
				return {par}
			end
			if par:find("^rawposcat:") then
				return {(par:gsub("^rawposcat:", ""))}
			end
			if par:find("^poscat:") then
				local langcode, label = par:match("^poscat:(+):(.*)$")
				if not langcode then
					error(("Parent poscatboiler language label '%s' for label '%s' for category '%s' (defined in module ]) needs to be of the form 'poscat:LANGCODE:LABEL'"):format(
						par, labobj.canonical, category, labobj.module))
				end
				return {{name = label, lang = langcode, is_label = true}}
			end
			local this_cats = get_categories_for_label(par, labobj.lang)
			if not this_cats then
				error(("Parent label '%s' for label '%s' for category '%s' (defined in module ]) couldn't be located"):format(
					par, labobj.canonical, category, labobj.module))
			end
			return this_cats
		end
		if parent then
			if type(parent) ~= "table" then
				error(("Internal error: Expected a string, boolean `true` or list for the value of the parent field for label '%s' for category '%s' (defined in module ]), but saw type '%s': %s"):format(
					labobj.canonical, category, labobj.module, type(parent), mw.dumpObject(parent)))
			end
			local cats
			if all_cats then
				cats = {}
				for _, par in ipairs(parent) do
					local this_cats = get_parent_cats(par)
					for _, this_cat in ipairs(this_cats) do
						insert_if_not(cats, this_cat)
					end
				end
			else
				cats = get_parent_cats(parent)
			end

			if #cats > 0 then
				return cats, labobj
			end
			-- FIXME: If the parent doesn't specify any categories, should we try the next parent or fall back
			-- to the parent determined through get_default_parent_cat_from_category() (which is what we currently
			-- do)?
			return {"+"}, labobj
		end
	end
	return {"+"}, nil
end

local likely_lect_parent_cat = {}

-- Register that `cat` is likely to be a lect cat, so we try to handle it as such in the lect handler when
-- we are called on that category. This avoids the need to have manual allow-lists of nonstandardly-named parent
-- lect categories to handle, such as ], ] ("Franconian" is
-- not a language) and ].
function export.register_likely_lect_parent_cat(cat)
	if type(cat) == "string" and not cat:find("^Category:") then
		likely_lect_parent_cat = true
	end
end

-- Handle lect categories such as ], ],
-- ], ] or arbitrarily-named categories like
-- ]. We currently require that lect=1 is specified to the call to {{auto cat}} to avoid
-- overfiring. However, if called from inside, we are processing the breadcrumb for the parent (or conceivably the
-- child) of a lect category, and won't have any params set, so we can't rely on lect=1. In that case, only fire
-- if the category is or ends in the name of a full or etymology-only language, and scrape the category's call to
-- {{auto cat}} to get the appropriate params. This means that nonstandardly-named categories like
-- ] can't be parents of other lect categories. To work around this, either we have to
-- relax the code below to operate on all raw categories (not necessarily a good idea), or we rename the
-- nonstandardly-named categories (e.g. in the case above, to ], since Walser German
-- is a recognized etymology-only language).
--
-- NOTE: We are able to handle categories for etymology-only families (currently only ] and
-- ]) and for etymology-only substrate languages (e.g. ]).
-- There is some special "family" code for the former.
local function lect_handler(category, raw_args, called_from_inside)
	if called_from_inside then
		-- Avoid infinite loops from wrongly processing non-lect categories. We have a check around line 344 below
		-- for categories whose {{auto cat}} doesn't say lect=1, but we still need the following in case of
		-- non-existent categories we're being asked to process (e.g. ] ->
		-- ] (nonexistent) -> ] (nonexistent), which
		-- causes an infinite loop without the check below.
		if category:find("^Regional ") or category:find("^Varieties of ") or category:find("^Rhymes:") then
			return nil
		end

		-- If called from inside we won't have any params available. See comment above about this. We scrape the
		-- category page's call to {{auto cat}} to get the appropriate params, and if that fails, we currently fall back
		-- to defaults based on the label(s) that categorize(s) into the category or the name of the category. Since the
		-- call from inside is only to get the parent category and breadcrumb, these defaults actually work in most
		-- cases but not all; e.g. in the chain ] -> ] ->
		-- ] -> ], if we are forced to use default values, we will
		-- produce the right parent for ] but not for ], where the
		-- default parent would be ] instead of the correct ].
		local lang, breadcrumb = split_region_lang(category)
		if lang or likely_lect_parent_cat then
			raw_args = scrape_category_for_auto_cat_args(category)
			if raw_args and not ine(raw_args.lect) then
				-- We are scraping something like ] that ends in a valid language but is not
				-- a lect.
				return nil
			end
			if not raw_args then
				-- If we can't parse the scraped {{auto cat}} spec, return default values. This helps e.g. in converting
				-- from the old {{dialectboiler}} template and generally when adding new varieties.
				local parents, label_with_parent

				local function getprop(prop)
					return -- ine(raw_args) or
						label_with_parent and label_with_parent.labdata
				end

				local sorted_labels = select(2, get_sorted_labels(category, lang))
				if sorted_labels then
					parents, label_with_parent = get_parents_from_sorted_labels(sorted_labels, category)
					if not lang and label_with_parent then
						lang = label_with_parent.lang
					end
				else
					parents = {"+"}
				end

				if not lang then
					-- We were instructed to scrape by virtue of `register_likely_lect_parent_cat`, but couldn't scrape
					-- anything.
					return nil
				end

				local default_parent_cat_from_category = get_default_parent_cat_from_category(category, lang,
					getprop("noreg"))
				for i, parent in ipairs(parents) do
					if parent == "+" then
						parents = default_parent_cat_from_category
					end
				end
				local first_parent_cat = parents
				if type(first_parent_cat) ~= "string" or first_parent_cat:find("^Category:") then
					-- Only keep `first_parent_cat` if it refers to a raw poscat label (which is probably a lect
					-- handler label).
					first_parent_cat = nil
				end

				track("lect")
				export.register_likely_lect_parent_cat(parents)

				-- NOTE: When called from inside, the description doesn't matter; nor do any parents other than the
				-- first. This is because called_from_inside is only set when computing the breadcrumb trail, which
				-- only needs the language, first parent and breadcrumb.
				return {
					-- FIXME, allow etymological codes here
					lang = get_returnable_lang_code(lang),
					description = "Foo",
					parents = parents,
					breadcrumb = breadcrumb or lang:getCanonicalName(),
					umbrella = false,
					can_be_empty = true,
				}, determine_lect_type(category, lang, first_parent_cat)
			end
		else
			return nil
		end
	end

	if not called_from_inside and not ine(raw_args.lect) then
		return nil
	end

	-------------------- 1. Process parameters. -------------------

	local boolean = {type = "boolean"}
	local sublist = {sublist = "comma without whitespace"}
	local args = require("Module:parameters").process(raw_args, {
		 = true,
		lect = boolean,
		lang = true,
		verb = true,
		prep = true,
		the = boolean,
		def = true,
		fulldef = true,
		addl = true,
		nolink = boolean,
		noreg = boolean, -- don't make the default parent be "Regional LANG"; instead, "Varieties of LANG"
		type = {set = {"extinct", "extant", "reconstructed", "unattested", "constructed"}},
		cat = true,
		othercat = sublist,
		country = sublist,
		wp = true,
		wikidata = true,
		breadcrumb = true,
		pagename = true, -- for testing or demonstration
	})

	-------------------- 2. Initialize breadcrumb, regiondesc and language from category. -------------------

	-- They may be overridden later.

	local lang, breadcrumb, regiondesc, langname
	category = args.pagename or category
	if not args.lang then
		lang, breadcrumb = split_region_lang(category)
		if lang then
			langname = lang:getCanonicalName()
		end
		-- The lang and/or breadcrumb may be nil at this point (e.g. we're processing a category like
		-- ] or ] that doesn't have a language in it). We don't throw an error
		-- yet because we may be able to fetch the lang, regiondesc and breadcrumb from a label that categorizes into
		-- the category.
		regiondesc = breadcrumb
	else
		lang = get_lang(args.lang, "lang", "allow etym")
		langname = lang:getCanonicalName()
		if category == ucfirst(langname) then
			-- breadcrumb and regiondesc should stay nil; breadcrumb will get `category` as a default, and the lack of
			-- regiondesc will cause an error to be thrown unless the user gave it explicitly or specified def=.
		else
			breadcrumb = category:match("^(.*) " .. pattern_escape(langname) .. "$")
			if not breadcrumb then
				-- Try to infer the region from the parent. See comment at function.
				breadcrumb = infer_region_from_lang(category, lang)
			end
			regiondesc = breadcrumb
		end
	end

	-------------------- 3. Determine labels categorizing into this category. -------------------

	local all_labels, sorted_labels = get_sorted_labels(category, lang)

	-------------------- 4. Determine parent categories and initialize additional properties. -------------------

	-- The first label with a parent is used to fetch additional properties, such as region= and addl=.

	local parents
	local first_parent_cat = args.cat
	local label_with_parent

	local function getprop(prop)
		return args or label_with_parent and label_with_parent.labdata
	end

	if first_parent_cat then
		parents = {first_parent_cat}
		if not lang then
			error(("lang= not given and unable to parse language from category '%s' (didn't check labels categorizing into the category because cat= explicitly given)"):format(category))
		end
	else
		if sorted_labels then
			parents, label_with_parent = get_parents_from_sorted_labels(sorted_labels, category, "all cats")
			if not lang and label_with_parent then
				lang = label_with_parent.lang
				langname = lang:getCanonicalName()
			end
		else
			parents = {"+"}
		end
		if not lang then
			error(("lang= not given, unable to parse language from category '%s' and can't find a label categorizing into the category"):format(category))
		end
		local default_parent_cat_from_category = get_default_parent_cat_from_category(category, lang, getprop("noreg"))
		for i, parent in ipairs(parents) do
			if parent == "+" then
				parents = default_parent_cat_from_category
			end
		end
		first_parent_cat = parents
	end
	if type(first_parent_cat) ~= "string" or first_parent_cat:find("^Category:") then
		-- Only keep `first_parent_cat` if it refers to a raw poscat label (which is probably a lect handler label).
		-- WARNING: Code below using `first_parent_cat` must handle nil.
		first_parent_cat = nil
	end

	local othercat = getprop("othercat")
	
	if type(othercat) == "string" then
		othercat = split_on_comma(othercat)
	end
	if othercat then
		for _, cat in ipairs(othercat) do
			if not cat:find("^Category:") then
				cat = "Category:" .. cat
			end
			table.insert(parents, cat)
		end
	end

	local countries = getprop("country") or {}
	if type(countries) == "string" then
		countries = split_on_comma(countries)
	end

	-- If no breadcrumb, this often happens when the langname and category are the same (happens only with etym-only
	-- languages), and the parent category is set below to the full parent, so the breadcrumb should show the
	-- language name (or equivalently, the category). If the langname and category are different, we should fall back to
	-- the category. E.g. for Singlish, lang=en is specified and we can't infer a breadcrumb because the lect name
	-- doesn't end in "English"; in this case we want the breadcrumb to show "Singlish".
	breadcrumb = getprop("breadcrumb") or breadcrumb or category

	local the_prefix

	if args then
		regiondesc = args
	else
		local regionprop = getprop("region")
		if regionprop then
			regiondesc = regionprop
		elseif label_with_parent then
			-- It's not clear which of the following two are better. The second one uses the actual label display form,
			-- which might be argued to be better, except that it will often be linked to a Wikipedia article about the
			-- lect rather than the place. The first one just uses the canonical label directly (which will later be
			-- linked to itself if unlinked). A third possibility is to use `label_with_parent.display` if present,
			-- otherwise `label_with_parent.canonical`.
			regiondesc = label_with_parent.canonical
			if label_with_parent.display and regiondesc ~= label_with_parent.display then
				track("display-different-from-canonical")
			end
			-- regiondesc = require(labels_module).get_displayed_label(label_with_parent.canonical, label_with_parent.labdata, lang)
		end
	end
	the_prefix = the_prefix or getprop("the") and "the " or ""

	if #countries == 0 then
		countries = regiondesc and the_prefix .. regiondesc or nil
	end
	for _, country in ipairs(countries) do
		if not country:find("") then
			country = require("Module:links").remove_links(country)
			local cat = "Category:Languages of " .. country
			if page_exists(cat) then
				table.insert(parents, cat)
			end
		end
	end

	-------------------- 5. Refine the language to an etymology-only child if possible. -------------------
	
	-- Now that we've determined the parent, we look up the parent hierarchy until we find a category naming an
	-- etymology-only language. If we find one and it's a child of the language we've determined, use it.

	local ancestral_cat = first_parent_cat

	local refined_lang
	while true do
		refined_lang = category_to_lang_name(ancestral_cat)
		if refined_lang then
			break
		end
		export.register_likely_lect_parent_cat(ancestral_cat)
		local settings, _ = memoizing_lect_handler(ancestral_cat, nil, true)
		if not settings then
			break
		end
		ancestral_cat = settings.parents
	end

	if refined_lang and refined_lang:hasParent(lang) then
		lang = refined_lang
		langname = lang:getCanonicalName()
	end

	-------------------- 6. Initialize `additional` with user-specified text and info about labels. -------------------

	local additional = getprop("addl")

	local function append_addl(addl_text)
		if not addl_text then
			return
		end
		if additional then
			additional = additional .. "\n\n" .. addl_text
		else
			additional = addl_text
		end
	end

	if all_labels then
		local m_labels_utilities = require(labels_utilities_module)
		append_addl(m_labels_utilities.format_labels_categorizing(all_labels, nil,
			get_returnable_lang(lang)))
	end

	-------------------- 7. Augment `additional` with information about etymology-only codes. -------------------

	local langname_for_desc
	local etymcodes = {}
	local function make_code(code)
		return ("<code>%s</code>"):format(code)
	end
	if lang:hasType("etymology-only") and ucfirst(langname) == category then
		langname_for_desc = lang:getParentName()
		local langcode = lang:getCode()
		table.insert(etymcodes, make_code(langcode))
		-- Find all alias codes for the etymology-only language.
		-- FIXME: There should be a better/easier way of doing this.
		local ety_code_to_name = mw.loadData("Module:etymology languages/code to canonical name")
		for code, canon_name in pairs(ety_code_to_name) do
			if canon_name == langname and code ~= langcode then
				table.insert(etymcodes, make_code(code))
			end
		end
		local addl_etym_codes = ("] code: %s."):format(
			serial_comma_join(etymcodes, {conj = "or"}))
		append_addl(addl_etym_codes)
	else
		langname_for_desc = langname
	end

	-------------------- 8. Try to figure out if this variety is extinct or reconstructed. -------------------

	local lect_type = getprop("type")
	if not lect_type then
		lect_type = determine_lect_type(category, lang, first_parent_cat)
	end
	local function prefix_addl(addl_text)
		if additional then
			additional = addl_text .. "\n\n" .. additional
		else
			additional = addl_text
		end
	end
	if lect_type == "extinct" then
		prefix_addl("This lect is ].")
		table.insert(parents, "Category:All extinct languages")
	elseif lect_type == "reconstructed" then
		prefix_addl("This lect is ].")
		table.insert(parents, "Category:Reconstructed languages")
	elseif lect_type == "unattested" then
		prefix_addl("This lect is {{w|unattested language|unattested}}.")
		table.insert(parents, "Category:Unattested languages")
	elseif lect_type == "constructed" then
		prefix_addl("This lect is ].")
		table.insert(parents, "Category:Constructed languages")
	end

	-------------------- 9. Compute `description`. -------------------

	local description

	local fulldef = getprop("fulldef")
	if fulldef then
		description = fulldef .. "."
	end

	if not description then
		local def = getprop("def")
		if def then
			description = ("Terms or senses in %s."):format(def)
		end
	end

	if not description then
		if not regiondesc then
			-- We need regiondesc for the description unless def= or fulldef= is given, which overrides the part that needs it.
			error(("1= (region) not given and unable to infer region from category '%s' given language name '%s'"):
				format(category, langname))
		end

		local lang_en = get_lang("en", true)

		local linked_regiondesc = regiondesc
		-- Don't try to link if HTML, = sign, template call or embedded link found in text. Embedded links will
		-- automatically be converted to English links by JavaScript.
		local function linkable(text)
			return not text:find("|]")
		end
		if linked_regiondesc:find("<country>") then
			if not countries then
				error(("Can't specify <country> in region description '%s' when country= not given"):format(linked_regiondesc))
			end
			-- Link the countries individually before calling serialCommaJoin(), which inserts HTML.
			local linked_countries = {}
			for _, country in ipairs(countries) do
				if linkable(country) then
					country = require("Module:links").full_link { lang = lang_en, term = country }
				end
				table.insert(linked_countries, country)
			end
			linked_countries = serial_comma_join(linked_countries)
			linked_regiondesc = linked_regiondesc:gsub("<country>", replacement_escape(linked_countries))
		elseif not getprop("nolink") and linkable(linked_regiondesc) then
			-- Even if nolink not given, don't try to link if HTML or = sign found in linked_regiondesc, otherwise
			-- we're likely to get an error.
			if page_exists(linked_regiondesc) then
				-- Only construct a Wiktionary link if the page exists; otherwise construct a Wikipedia link.
				linked_regiondesc = require("Module:links").full_link { lang = lang_en, term = linked_regiondesc }
			else
				linked_regiondesc = ("]"):format(linked_regiondesc, linked_regiondesc)
			end
		end
		linked_regiondesc = the_prefix .. linked_regiondesc
		local verb = getprop("verb") or "spoken"
		local prep = getprop("prep")

		if not langname_for_desc then
			error(category)
		end
		description = ("Terms or senses in %s as %s%s %s."):format(
			langname_for_desc, verb, prep == "-" and "" or " " .. (prep or "in"), linked_regiondesc)
	end

	-------------------- 10. Compute the Wikipedia articles that go into `topright`. -------------------

	local topright_parts = {}
	-- Insert Wikipedia article `article` for Wikimedia language `wmcode` into `topright_parts`, avoiding duplication.
	local function insert_wikipedia_article(wmcode, article)
		if wmcode == "commons" then
			insert_if_not(topright_parts, ("{{commons%s}}"):format(
				article == category and "" or "|" .. article
			))
		else
			insert_if_not(topright_parts, ("{{wp%s%s}}"):format(
				wmcode == "en" and "" or "|lang=" .. wmcode,
				article == category and "" or "|" .. article
			))
		end
	end

	local function insert_wikipedia_articles_for_wikipedia_specs(specs, default)
		for _, article in ipairs(specs) do
			local foreign_wiki
			if article == true then
				article = default
			else
				if article:find(":") then
					local actual_article
					foreign_wiki, actual_article = article:match("^(*):(.*)$")
					if actual_article then
						article = actual_article
					end
				end
				if article == "+" then
					article = default
				elseif article == "-" then
					article = nil
				else
					article = require("Module:yesno")(article, article)
					if article == true then
						article = default
					end
				end
			end
			if article then
				insert_wikipedia_article(foreign_wiki or "en", article)
			end
		end
	end

	local function insert_wikipedia_articles_for_wikidata_specs(specs, lang)
		if not mw.wikibase then
			error(("Unable to retrieve data from Wikidata ID's '%s'; `mw.wikibase` not defined"):format(args.wikidata))
		end
		local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang)
		local ids_without_wmcodes = {}
		local ids_with_wmcodes = {}
		for _, id in ipairs(specs) do
			if id:find(":") then
				table.insert(ids_with_wmcodes, id)
			else
				table.insert(ids_without_wmcodes, id)
			end
		end
		for _, wmcode in ipairs(wikipedia_langs) do
			for _, id in ipairs(ids_without_wmcodes) do
				local article = mw.wikibase.sitelink(id, wmcode .. "wiki")
				if article then
					insert_wikipedia_article(wmcode, article)
				end
			end
		end
		for _, id in ipairs(ids_with_wmcodes) do
			local wmcode, wikidata_id = id:match("^(.-):(.*)$")
			local article = mw.wikibase.sitelink(wikidata_id, wmcode .. "wiki")
			if article then
				insert_wikipedia_article(wmcode, article)
			end
		end
	end

	if args.wp or args.wikidata then
		if args.wp then
			insert_wikipedia_articles_for_wikipedia_specs(split_on_comma(args.wp), category)
		end
		if args.wikidata then
			insert_wikipedia_articles_for_wikidata_specs(split(args.wikidata, "%s*,%s*"), lang)
		end
	elseif category == ucfirst(langname) then
		local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang)
		for _, wmcode in ipairs(wikipedia_langs) do
			local article = lang:getWikipediaArticle("no category fallback", wmcode .. "wiki")
			if article then
				insert_wikipedia_article(wmcode, article)
			end
		end
	end
	if #topright_parts == 0 and sorted_labels then
		for _, labobj in pairs(all_labels) do
			local wp_specs = labobj.labdata.Wikipedia
			if wp_specs then
				if type(wp_specs) ~= "table" then
					wp_specs = {wp_specs}
				end
				insert_wikipedia_articles_for_wikipedia_specs(wp_specs, labobj.canonical)
			end
			local wikidata_specs = labobj.labdata.Wikidata
			if wikidata_specs then
				if type(wikidata_specs) ~= "table" then
					wikidata_specs = {wikidata_specs}
				end
				insert_wikipedia_articles_for_wikidata_specs(wikidata_specs, labobj.lang)
			end
		end
	end

	local topright
	if #topright_parts > 0 then
		topright = table.concat(topright_parts)
	end

	-------------------- 11. Return the combined structure of all information. -------------------

	track("lect")
	export.register_likely_lect_parent_cat(parents)

	return {
		-- FIXME, allow etymological codes here
		lang = get_returnable_lang_code(lang),
		topright = topright,
		description = description,
		additional = additional,
		parents = parents,
		breadcrumb = {name = breadcrumb, nocap = true},
		umbrella = false,
		can_be_empty = true,
	}, lect_type
end


local memoized_responses = {}

memoizing_lect_handler = function(category, raw_args, called_from_inside)
	mw.log(category)
	local retval = memoized_responses
	if not retval then
		retval = {lect_handler(category, raw_args, called_from_inside)}
		memoized_responses = retval
	end
	local obj, lect_type = retval, retval
	return obj, lect_type
end

-- Actual handler for lect categories. See lect_handler() above.
table.insert(raw_handlers, function(data)
	local settings, _ = memoizing_lect_handler(data.category, data.args, data.called_from_inside)
	return settings, not not settings
end)


return {RAW_CATEGORIES = raw_categories, RAW_HANDLERS = raw_handlers, export = export}