Module:User:Benwing2/la-adj

This module sandbox lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}

local lang = require("Module:languages").getByCode("la")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local m_para = require("Module:parameters")

NAMESPACE = NAMESPACE or mw.title.getCurrentTitle().nsText
PAGENAME = PAGENAME or mw.title.getCurrentTitle().text

local decl = require("Module:User:Benwing2/la-adj/data")
local m_table = require("Module:la-adj/table")

local rmatch = mw.ustring.match
local rfind = mw.ustring.find

local case_order = {
	"nom_sg_m",
	"gen_sg_m",
	"dat_sg_m",
	"acc_sg_m",
	"abl_sg_m",
	"voc_sg_m",
	"nom_sg_f",
	"gen_sg_f",
	"dat_sg_f",
	"acc_sg_f",
	"abl_sg_f",
	"voc_sg_f",
	"nom_sg_n",
	"gen_sg_n",
	"dat_sg_n",
	"acc_sg_n",
	"abl_sg_n",
	"voc_sg_n",
	"nom_pl_m",
	"gen_pl_m",
	"dat_pl_m",
	"acc_pl_m",
	"abl_pl_m",
	"voc_pl_m",
	"nom_pl_f",
	"gen_pl_f",
	"dat_pl_f",
	"acc_pl_f",
	"abl_pl_f",
	"voc_pl_f",
	"nom_pl_n",
	"gen_pl_n",
	"dat_pl_n",
	"acc_pl_n",
	"abl_pl_n",
	"voc_pl_n",
}

local function process_forms_and_overrides(data, args)
	local redlink = false
	if data.num == "pl" then
		table.insert(data.categories, "Latin plural-only adjectives")
	end
	
	local accel_lemma, accel_lemma_f
	if data.num and data.num ~= "" then
		accel_lemma = data.forms
		accel_lemma_f = data.forms
	else
		accel_lemma = data.forms
		accel_lemma_f = data.forms
	end
	
	for _, key in ipairs(case_order) do
		-- If noneut=1 passed, clear out all neuter forms.
		if data.noneut and key:find("_n") then
			data.forms = nil
		end
		if args or data.forms then
			if args then
				val = args
				data.user_specified = true
			else
				val = data.forms
			end
			if type(val) == "string" then
				val = mw.text.split(val, "/")
			end
			if data.num == "pl" and key:find("sg") then
				data.forms = ""
			elseif val == "" or val == "" or val == "-" or val == "—" or val == "-" or val == "—" then
				data.forms = "—"
			else
				for i, form in ipairs(val) do
					local word = data.prefix .. form .. data.suffix
					
					local accel_form = key
					accel_form = accel_form:gsub("_()_", "|%1|")

					if data.noneut then
						-- If noneut=1, we're being asked to do a noun like
						-- Aquītānus or Rōmānus that has masculine and feminine
						-- variants, not an adjective. In that case, make the
						-- accelerators correspond to nomminal case/number forms
						-- without the gender, and use the feminine as the
						-- lemma for feminine forms.
						if key:find("_f") then
							data.accel = {form = accel_form:gsub("|f$", ""), lemma = accel_lemma_f}
						else
							data.accel = {form = accel_form:gsub("|m$", ""), lemma = accel_lemma}
						end
					else
						if not data.forms.nom_sg_n and not data.forms.nom_pl_n then
							-- use multipart tags if called for
							accel_form = accel_form:gsub("|m$", "|m//f//n")
						elseif not data.forms.nom_sg_f and not data.forms.nom_pl_f then
							accel_form = accel_form:gsub("|m$", "|m//f")
						end
						
						-- use the order nom|m|s, which is more standard than nom|s|m
						accel_form = accel_form:gsub("|(.-)|(.-)$", "|%2|%1")
					
						data.accel = {form = accel_form, lemma = accel_lemma}
					end
					val = word
					if not redlink and NAMESPACE == '' then
						local title = lang:makeEntryName(word)
						local t = mw.title.new(title)
						if t and not t.exists then
							table.insert(data.categories, 'Latin adjectives with red links in their declension tables')
							redlink = true
						end
					end
				end
				data.forms = val
			end
		end
	end
end

local function show_forms(data)
	local noteindex = 1
	local notes = {}
	local seen_notes = {}
	for _, key in ipairs(case_order) do
		local val = data.forms
		if val and val ~= "" and val ~= "—" then
			for i, form in ipairs(val) do
				local link = m_links.full_link({lang = lang, term = form, accel = data.accel})
				local this_notes = data.notes
				if this_notes and not data.user_specified then
					if type(this_notes) == "string" then
						this_notes = {this_notes}
					end
					local link_indices = {}
					for _, this_note in ipairs(this_notes) do
						local this_noteindex = seen_notes
						if not this_noteindex then
							-- Generate a footnote index.
							this_noteindex = noteindex
							noteindex = noteindex + 1
							table.insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
							seen_notes = this_noteindex
						end
						ut.insert_if_not(link_indices, this_noteindex)
					end
					val = link .. '<sup style="color: red">' .. table.concat(link_indices, ",") .. '</sup>'
				else
					val = link
				end
			end
			data.forms = table.concat(val, ", ")
		end
	end
	data.footnote = table.concat(notes, "<br />") .. data.footnote
end

local function generate_forms(frame)
	local data = {
		title = "",
		footnote = "",
		num = "",
		voc = true,
		forms = {},
		types = {},
		categories = {},
		notes = {},
		user_specified = {},
		accel = {},
	}

	local iparams = {
		 = {},
		 = {},
		num = {},
	}

	local iargs = m_para.process(frame.args, iparams)

	local parent_args = frame:getParent().args

	local subtype = iargs or parent_args

	if subtype and subtype ~= "" then
		for name, val in ipairs(rsplit(decl_type, "%-")) do
			data.types = true
		end
	end

	local params = {
		 = {required = true, default = "{{{1}}}"},
		 = {},
		 = {},
		decltype = {},
		noun = {},
		num = {},
		prefix = {},
		suffix = {},
		noneut = {type = "boolean"},
	}
	for _, case in ipairs(case_order) do
		params = {}
	end

	local args = m_para.process(parent_args, params)

	data.num = iargs.num or args.num or ""
	data.prefix = args.prefix or ""
	data.suffix = args.suffix or ""
	data.noneut = args.noneut

	decl or args.decltype](data, args)

	process_forms_and_overrides(data, args)

	if data.prefix .. data.suffix ~= "" then
		table.insert(data.categories, "Kenny's testing category 6")
	end

	return data
end

function export.show(frame)
	local data = generate_forms(frame)

	show_forms(data)

	return m_table.make_table(data) .. m_utilities.format_categories(data.categories, lang)
end

function export.generate_forms(frame)
	local data = generate_forms(frame)

	local ins_text = {}
	for _, key in ipairs(case_order) do
		local val = data.forms
		if val and val ~= "" and val ~= "—" and #val > 0 then
			table.insert(ins_text, key .. "=" .. table.concat(val, ","))
		end
	end
	return table.concat(ins_text, "|")
end

-- Given an ending (or possibly a full regex matching the entire lemma, if
-- a regex group is present), return the base minus the ending, or nil if
-- the ending doesn't match.
local function extract_base(lemma, ending)
	if ending:find("%(") then
		return rmatch(lemma, ending)
	else
		return rmatch(lemma, "^(.*)" .. ending .. "$")
	end
end

-- Given ENDINGS_AND_SUBTYPES (a list of pairs of endings with associated
-- subtypes, where each pair consists of a single ending spec and a list of
-- subtypes), check each ending in turn against LEMMA. If it matches, return
-- the pair BASE, SUBTYPES where BASE is the remainder of LEMMA minus the
-- ending, and SUBTYPES is the subtypes associated with the ending. But don't
-- return SUBTYPES if any of the subtypes in the list is specifically canceled
-- in SPECIFIED_SUBTYPES (a set, i.e. a table where the keys are strings and
-- the value is always true); instead, consider the next ending in turn. If no
-- endings match, throw an error if DECLTYPE is non-nil, mentioning the
-- DECLTYPE (the user-specified declension); but if DECLTYPE is nil, just
-- return the tuple nil, nil, nil.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
--    the beginning with ^ and at the end with $), and contains a single
--    capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
--    SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
--    STEM2_ENDING is a string specifying the corresponding ending that must
--    be present in STEM2. If this form is used, the combination of
--    base + STEM2_ENDING must exactly match STEM2 in order for this entry
--    to be considered a match. An example is {"is", ""}, which will match
--    lemma == "follis", stem2 == "foll", but not lemma == "lapis",
--    stem2 == "lapid".
local function get_type_and_subtype_by_ending(lemma, stem2, decltype, specified_subtypes,
		endings_and_subtypes)
	for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
		local ending = ending_and_subtypes
		local rettype = ending_and_subtypes
		local subtypes = ending_and_subtypes
		local specified_stem2 = ending_and_subtypes
		not_this_subtype = false
		for _, subtype in ipairs(subtypes) do
			-- A subtype is directly canceled by specifying -SUBTYPE.
			if specified_subtypes then
				not_this_subtype = true
				break
			end
			-- A subtype is canceled if the user specified SUBTYPE and
			-- -SUBTYPE is given in the to-be-returned subtypes.
			must_not_be_present = rmatch(subtype, "^%-(.*)$")
			if must_not_be_present and specified_subtypes then
				not_this_subtype = true
				break
			end
		end
		if not not_this_subtype then
			local base
			if type(ending) == "table" then
				local lemma_ending = ending
				local stem2_ending = ending
				base = extract_base(lemma, lemma_ending)
				if base and base .. stem2_ending ~= stem2 then
					base = nil
				end
			else
				base = extract_base(lemma, ending)
			end
			if base then
				-- Remove subtypes of the form -SUBTYPE from the subtypes
				-- to be returned.
				local new_subtypes = {}
				for _, subtype in ipairs(subtypes) do
					if not rfind(subtype, "^%-") then
						table.insert(new_subtypes, subtype)
					end
				end
				return base, specified_stem2 or stem2, rettype, new_subtypes
			end
		end
	end
	if decltype == "" then
		error("Unrecognized ending for adjective: " .. lemma)
	else
		error("Unrecognized ending for declension-" .. decltype .. " adjective: " .. lemma)
	end
end

-- Autodetect the type and subtype of an adjective given all the information
-- specified by the user: lemma, stem2, declension type and specified subtypes.
-- Four values are returned: the lemma base (i.e. the stem of the lemma, as
-- required by the declension functions), the value of stem2 to pass to the
-- declension function, the declension type and the autodetected subtypes.
-- Note that this will not detect a given subtype if -SUBTYPE is specified for
-- any subtype that would be returned, or if SUBTYPE is specified and -SUBTYPE
-- is among the subtypes that would be returned (such subtypes are filtered out
-- of the returned subtypes).
function export.detect_type_and_subtype(lemma, stem2, typ, subtypes)
	if not rfind(typ, "^") then
		subtypes = mw.clone(subtypes)
		subtypes = true
		typ = ""
	end
	if typ == "" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"us", "1&2", {}},
			{"a", "1&2", {}},
			{"um", "1&2", {}},
			{"ī", "1&2", {}},
			{"ae", "1&2", {}},
			-- Nearly all -os adjective are greekA
			{"os", "1&2", {"greekA", "-greekE"}},
			{"ē", "1&2", {"greekE", "-greekA"}},
			{"on", "1&2", {"greekA", "-greekE"}},
			{"er", "1&2", {"er"}},
			{"ur", "1&2", {"er"}},
			{"is", "3-2", {}},
			{"e", "3-2", {}},
			{"ior", "3-C", {}},
			{"jor", "3-C", {}, "j"},
			{"^(mi)nor$", "3-C", {}, "n"},
			{"", "3-1", {"I"}},
		})
	elseif typ == "3" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"er", "3-3", {}},
			{"is", "3-2", {}},
			{"e", "3-2", {}},
			{"ior", "3-C", {}},
			{"jor", "3-C", {}, "j"},
			{"^(mi)nor$", "3-C", {}, "n"},
			{"", "3-1", {}},
		})
	elseif typ == "1&2" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"us", "1&2", {}},
			{"a", "1&2", {}},
			{"um", "1&2", {}},
			{"ī", "1&2", {}},
			{"ae", "1&2", {}},
			-- Nearly all -os adjective are greekA
			{"os", "1&2", {"greekA", "-greekE"}},
			{"ē", "1&2", {"greekE", "-greekA"}},
			{"on", "1&2", {"greekA", "-greekE"}},
			{"er", "1&2", {"er"}},
			{"ur", "1&2", {"er"}},
		})
	elseif typ == "1-1" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"a", "1-1", {}},
			{"ae", "1-1", {}},
		})
	elseif typ == "2-2" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"us", "2-2", {}},
			{"um", "2-2", {}},
			{"ī", "2-2", {}},
			{"a", "2-2", {}},
			{"os", "2-2", {"greek"}},
			{"on", "2-2", {"greek"}},
			{"oe", "2-2", {"greek"}},
		})
	elseif typ == "3-2" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"is", "3-2", {}},
			{"e", "3-2", {}},
		})
	elseif typ == "3-C" then
		return get_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"ior", "3-C", {}},
			{"jor", "3-C", {}, "j"},
			{"^(mi)nor$", "3-C", {}, "n"},
		})
	else
		return lemma, stem2, typ, {}
	end
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet:
Module:User:Benwing2/la-adj

Wikious

Boobota

Sagapedia