Module:la-nominal

This module lacks a documentation subpage. Please create it.
Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}


--[=[

Authorship: Ben Wing <benwing2>, with many ideas and a little code coming from
the old ] by KC Kenny Lau.

]=]

-- TODO:
-- (DONE) Eliminate specification of noteindex from la-adj/data
-- (DONE?) Finish autodetection of adjectives
-- (DONE) Remove old noun code
-- (DONE) Implement <.sufn>
-- (DONE) Look into adj voc=false
-- (DONE) Handle loc in adjectives
-- Error on bad subtypes
-- Make sure Google Books link still works.
-- (DONE) Make sure .sufn triggers insertion of 'with m optionally -> n in compounds' in title.
-- (DONE) Make sure title returned to la-adj lowercases the first letter even with a custom title.

--[=[

TERMINOLOGY:

-- "slot" = A particular case/number combination (for nouns) or
	 case/number/gender combination (for adjectives). Example slot names are
	 "abl_sg" (for noun) or "acc_pl_f" (for adjectives). Each slot is filled
	 with zero or more forms.

-- "form" = The declined Latin form representing the value of a given slot.
	 For example, rēge is a form, representing the value of the abl_sg slot of
	 the lemma rēx.

-- "lemma" = The dictionary form of a given Latin term. For nouns, it's
	 generally the nominative singular, but will be the nominative plural of
	 plurale tantum nouns (e.g. ]), and may occasionally be another
	 form (e.g. the genitive singular) if the nominative singular is missing.
	 For adjectives, it's generally the masculine nominative singular, but
	 will be the masculine nominative plural of plurale tantum adjectives
	 (e.g. ]).

-- "plurale tantum" (plural "pluralia tantum") = A noun or adjective that
	 exists only in the plural. Examples are castra "army camp", faucēs "throat",
	 and dēnī "ten each" (used for counting pluralia tantum nouns).

-- "singulare tantum" (plural "singularia tantum") = A noun or adjective that
	 exists only in the singular. Examples are geōlogia "geology" (and in
	 general most non-count nouns) and the adjective ūnus "one".

]=]

local debug_track_module = "Module:debug/track"
local en_utilities_module = "Module:en-utilities"
local headword_data_module = "Module:headword/data"
local json_module = "Module:JSON"
local la_adj_data_module = "Module:la-adj/data"
local la_adj_table_module = "Module:la-adj/table"
local la_noun_data_module = "Module:la-noun/data"
local la_noun_table_module = "Module:la-noun/table"
local la_utilities_module = "Module:la-utilities"
local languages_module = "Module:languages"
local links_module = "Module:links"
local load_module = "Module:load"
local parameters_module = "Module:parameters"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"

local concat = table.concat
local insert = table.insert
local iter_adj_slots -- defined below
local iter_noun_slots -- defined below
local umatch = mw.ustring.match

local function add_indefinite_article(...)
	add_indefinite_article = require(en_utilities_module).add_indefinite_article
	return add_indefinite_article(...)
end

local function contains(...)
	contains = require(table_module).contains
	return contains(...)
end

local function debug_track(...)
	debug_track = require(debug_track_module)
	return debug_track(...)
end

local function deep_copy(...)
	deep_copy = require(table_module).deepCopy
	return deep_copy(...)
end

local function deep_equals(...)
	deep_equals = require(table_module).deepEquals
	return deep_equals(...)
end

local function full_link(...)
	full_link = require(links_module).full_link
	return full_link(...)
end

local function insert_if_not(...)
	insert_if_not = require(table_module).insertIfNot
	return insert_if_not(...)
end

local function lcfirst(...)
	lcfirst = require(string_utilities_module).lcfirst
	return lcfirst(...)
end

local function load_data(...)
	load_data = require(load_module).load_data
	return load_data(...)
end

local function make_adj_table(...)
	make_adj_table = require(la_adj_table_module).make_table
	return make_adj_table(...)
end

local function make_noun_table(...)
	make_noun_table = require(la_noun_table_module).make_table
	return make_noun_table(...)
end

local function make_noun_table_sg(...)
	make_noun_table_sg = require(la_noun_table_module).make_table_sg
	return make_noun_table_sg(...)
end

local function make_noun_table_pl(...)
	make_noun_table_pl = require(la_noun_table_module).make_table_pl
	return make_noun_table_pl(...)
end

local function make_stem2(...)
	make_stem2 = require(la_utilities_module).make_stem2
	return make_stem2(...)
end

local function normalize_form(...)
	normalize_form = require(la_utilities_module).normalize_form
	return normalize_form(...)
end

local function process_params(...)
	process_params = require(parameters_module).process
	return process_params(...)
end

local function remove_links(...)
	remove_links = require(links_module).remove_links
	return remove_links(...)
end

local function singularize(...)
	singularize = require(en_utilities_module).singularize
	return singularize(...)
end

local function split(...)
	split = require(string_utilities_module).split
	return split(...)
end

local function ucfirst(...)
	ucfirst = require(string_utilities_module).ucfirst
	return ucfirst(...)
end

local m_adj_decl
local function get_m_adj_decl()
	m_adj_decl, get_m_adj_decl = require(la_adj_data_module), nil
	return m_adj_decl
end

local m_noun_decl
local function get_m_noun_decl()
	m_noun_decl, get_m_noun_decl = require(la_noun_data_module), nil
	return m_noun_decl
end

local lang
local function get_lang()
	lang, get_lang = require(languages_module).getByCode("la")
	return lang
end

local namespace
local function get_namespace()
	namespace, get_namespace = load_data(headword_data_module).page.namespace, nil
	return namespace
end

local pagename
local function get_pagename()
	pagename, get_pagename = load_data(headword_data_module).pagename, nil
	return pagename
end

local ligatures = {
	 = 'Æ',
	 = 'æ',
	 = 'Œ',
	 = 'œ',
}

local cases = {
	"nom", "gen", "acc", "dat", "abl", "voc", "loc"
}
local cases_n = #cases

local nums = {
	"sg", "pl"
}
local nums_n = #nums

local genders = {
	"m", "f", "n"
}
local genders_n = #genders

local declension_to_english = setmetatable({
	 = "first",
	 = "first and second",
	 = "second",
	 = "third",
	 = "fourth",
	 = "fifth",
}, {
	__index = function(t, k)
		return rawget(t, k:match("^*"))
	end
})

local number_to_english = {
	"one", "two", "three", "four", "five"
}
local linked_prefixes = {
	"", "linked_"
}

function export.iter_potential_noun_lemma_slots()
	local num, case = 1, 0
	return function()
		case = case + 1
		if case > 3 then
			case = 1
			num = num + 1
			if num > nums_n then
				return nil
			end
		end
		return cases .. "_" .. nums
	end
end

local potential_noun_lemma_slots = {}
for slot in export.iter_potential_noun_lemma_slots() do
	insert(potential_noun_lemma_slots, slot)
end

local linked_to_non_linked_noun_slots = {}
for _, slot in ipairs(potential_noun_lemma_slots) do
	linked_to_non_linked_noun_slots = slot
end

-- Iterate over all the "slots" associated with a noun declension, where a slot
-- is a particular case/number combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg, linked_nom_pl), which aren't overridable.
function export.iter_noun_slots(overridable_only)
	local case, num, linked_variant = 1, 1, 0
	return function()
		linked_variant = linked_variant + 1
		local max_linked_variant = (overridable_only or case > 3) and 1 or 2
		if linked_variant > max_linked_variant then
			linked_variant = 1
			num = num + 1
			if num > nums_n then
				num = 1
				case = case + 1
				if case > cases_n then
					return nil
				end
			end
		end
		return linked_prefixes .. cases .. "_" .. nums
	end
end
iter_noun_slots = export.iter_noun_slots

function export.iter_potential_adj_lemma_slots()
	local num, case, gen = 1, 1, 0
	return function()
		gen = gen + 1
		if gen > genders_n then
			gen = 1
			case = case + 1
			if case > 3 then
				case = 1
				num = num + 1
				if num > nums_n then
					return nil
				end
			end
		end
		return cases .. "_" .. nums .. "_" .. genders
	end
end

-- List of adjective slots for which we generate linked variants. Include
-- feminine and neuter variants because they will be needed if the adjective
-- is part of a multiword feminine or neuter noun.
local potential_adj_lemma_slots = {}
for slot in export.iter_potential_adj_lemma_slots() do
	insert(potential_adj_lemma_slots, slot)
end

local linked_to_non_linked_adj_slots = {}
for _, slot in ipairs(potential_adj_lemma_slots) do
	linked_to_non_linked_adj_slots = slot
end

-- Iterate over all the "slots" associated with an adjective declension, where a slot
-- is a particular case/number/gender combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg_m, linked_nom_pl_m, etc.), which aren't overridable.
function export.iter_adj_slots(overridable_only)
	local case, num, gen, linked_variant = 1, 1, 1, 0
	return function()
		linked_variant = linked_variant + 1
		local max_linked_variant = (overridable_only or case > 3) and 1 or 2
		if linked_variant > max_linked_variant then
			linked_variant = 1
			gen = gen + 1
			if gen > genders_n then
				gen = 1
				num = num + 1
				if num > nums_n then
					num = 1
					case = case + 1
					if case > cases_n then
						return nil
					end
				end
			end
		end
		return linked_prefixes .. cases .. "_" .. nums .. "_" .. genders
	end
end
iter_adj_slots = export.iter_adj_slots

-- Iterate over all the "slots" associated with a noun or adjective declension (depending on
-- the value of IS_ADJ), where a slot is a particular case/number combination (in the case of
-- nouns) or case/number/gender combination (in the case of adjectives). If OVERRIDABLE_ONLY
-- is specified, only include overridable slots (not including linked_ variants).
local function iter_slots(is_adj, overridable_only)
	if is_adj then
		return iter_adj_slots(overridable_only)
	end
	return iter_noun_slots(overridable_only)
end

local function concat_forms_in_slot(forms)
	if forms and forms ~= "" and forms ~= "—" and #forms > 0 then
		local new_vals = {}
		for _, v in ipairs(forms) do
			insert(new_vals, (v:gsub("|", "<!>")))
		end
		return concat(new_vals, ",")
	end
end

local function glossary_link(anchor, text)
	text = text or anchor
	return "]"
end

local function track(page)
	debug_track("la-nominal/" .. page)
	return true
end

local function set_union(sets)
	local union = {}
	for _, set in ipairs(sets) do
		for key, _ in pairs(set) do
			union = true
		end
	end
	return union
end

local function set_difference(set1, set2)
	local diff = {}
	for key, _ in pairs(set1) do
		if not set2 then
			diff = true
		end
	end
	return diff
end

-- If a form is set as '*', that means it is unattested
-- but should still be generated
-- TODO: handle asterisks in forms stored in the data
local function unattested_forms(data, args, is_adj)
	for slot in iter_slots(is_adj) do
		local arg = args
		if arg ~= nil then
			arg = arg:match("^*(.*)")
			if arg then
				data.unattested = true
				args = arg ~= "" and arg or nil
			end
		end
	end
end

-- Make a link only if the form is attested
local function link_if_attested(form, accel, is_unattested)
	local data = {lang = lang or get_lang()}
	if is_unattested then
		data.alt = "*" .. form
	else
		data.term = form
		data.accel = accel
	end
	return full_link(data)
end

local function process_form(slot, data, args, linked_to_non_linked)
	local forms = data.forms
	-- If nomf=1 passed, clear out all masculine and feminine forms.
	if data.nomf and slot:match("%f%f") then
		forms = nil
	end
	-- If noneut=1 passed, clear out all neuter forms.
	if data.noneut and slot:match("%fn%f") then
		forms = nil
	end
	local val
	if args then
		val = args
		data.user_specified = true
	else
		-- Overridding nom_sg/nom_sg_m etc. should override linked_nom_sg
		-- so that the correct value gets displayed in the headword, which
		-- uses linked_nom_sg.
		local non_linked_equiv_slot = linked_to_non_linked
		if non_linked_equiv_slot and args then
			val = args
			data.user_specified = true
		else
			val = forms
		end
	end
	if val then
		if type(val) == "string" then
			val = split(val, "/", true, true)
		end
		local num = data.num
		if (
			(num == "pl" and slot:find("sg", nil, true)) or
			(num == "sg" and slot:find("pl", nil, true))
		) then
			forms = nil
		elseif val == "" or val == "-" or val == "—" then
			forms = "—"
			if val then
				error("Cannot specify additional forms for " .. slot .. ' if it has been cancelled with "-"')
			end
		else
			forms = val
		end
	end
end

local function process_noun_forms_and_overrides(data, args, generate_type)
	local redlink = false
	unattested_forms(data, args);

	-- Process overrides and canonicalize forms.
	for slot in iter_noun_slots() do
		process_form(slot, data, args, linked_to_non_linked_noun_slots)
	end

	-- No accel forms or red link checking if generate_type == "bare".
	if generate_type == "bare" then
		return
	end

	-- Compute the lemma for accelerators. Do this after processing
	-- overrides in case we overrode the lemma form(s).
	local accel_lemma
	if data.num and data.num ~= "" then
		accel_lemma = data.forms
	else
		accel_lemma = data.forms
	end
	if type(accel_lemma) == "table" then
		accel_lemma = accel_lemma
	end

	-- Set the accelerators, and determine if there are red links.
	for slot in iter_noun_slots() do
		local val = data.forms
		if val and val ~= "" and val ~= "—" and #val > 0 then
			for _, form in ipairs(val) do
				local accel_form = slot
				accel_form = accel_form:gsub("_()$", "|%1")

				data.accel = {form = accel_form, lemma = accel_lemma}
				if not redlink and namespace == "" then
					local title = ((lang or get_lang()):makeEntryName(form))
					local t = mw.title.new(title)
					if t and not t.exists then
						insert(data.categories, "Latin " .. data.pos .. " with red links in their inflection tables")
						redlink = true
					end
				end
			end
		end
	end
end

local function process_adj_forms_and_overrides(data, args, generate_type)
	local redlink = false
	unattested_forms(data, args, true)

	-- Process overrides and canonicalize forms.
	for slot in iter_adj_slots() do
		process_form(slot, data, args, linked_to_non_linked_adj_slots)
	end

	-- See if the masculine and feminine/neuter are the same across all slots.
	-- If so, blank out the feminine/neuter so we use a table that combines
	-- masculine and feminine, or masculine/feminine/neuter.
	for _, gender in ipairs({"f", "n"}) do
		local other_is_masc = true
		for _, case in ipairs(cases) do
			for _, num in ipairs(nums) do
				if not deep_equals(data.forms,
						data.forms) then
					other_is_masc = false
					break
				end
			end
			if not other_is_masc then
				break
			end
		end

		if other_is_masc then
			for _, case in ipairs(cases) do
				for _, num in ipairs(nums) do
					data.forms = nil
				end
			end
		end
	end

	-- No accel forms or red link checking if generate_type == "bare".
	if generate_type == "bare" then
		return
	end

	-- Compute the lemma for accelerators. Do this after processing
	-- overrides in case we overrode the lemma form(s).
	local accel_lemma, accel_lemma_f
	if data.num and data.num ~= "" then
		accel_lemma = data.forms
		accel_lemma_f = data.forms
	else
		accel_lemma = data.forms
		accel_lemma_f = data.forms
	end
	if type(accel_lemma) == "table" then
		accel_lemma = accel_lemma
	end
	if type(accel_lemma_f) == "table" then
		accel_lemma_f = accel_lemma_f
	end

	-- Set the accelerators, and determine if there are red links.
	for slot in iter_adj_slots() do
		local val = data.forms
		if val and val ~= "" and val ~= "—" and #val > 0 then
			for _, form in ipairs(val) do
				local accel_form = slot
				accel_form = accel_form:gsub("_()_", "|%1|")

				if data.noneut then
					-- If noneut=1, we're being asked to do a noun like
					-- Aquītānus or Rōmānus that has masculine and feminine
					-- variants, not an adjective. In that case, make the
					-- accelerators correspond to nominal case/number forms
					-- without the gender, and use the feminine as the
					-- lemma for feminine forms.
					if slot:find("_f", nil, true) then
						data.accel = {form = accel_form:gsub("|f$", ""), lemma = accel_lemma_f}
					else
						data.accel = {form = accel_form:gsub("|m$", ""), lemma = accel_lemma}
					end
				else
					if not data.forms.nom_sg_n and not data.forms.nom_pl_n then
						-- use multipart tags if called for
						accel_form = accel_form:gsub("|m$", "|m//f//n")
					elseif not data.forms.nom_sg_f and not data.forms.nom_pl_f then
						accel_form = accel_form:gsub("|m$", "|m//f")
					end

					-- use the order nom|m|s, which is more standard than nom|s|m
					accel_form = accel_form:gsub("|(.-)|(.-)$", "|%2|%1")

					data.accel = {form = accel_form, lemma = accel_lemma}
				end
				if not redlink and namespace == "" then
					local title = ((lang or get_lang()):makeEntryName(form))
					local t = mw.title.new(title)
					if t and not t.exists then
						insert(data.categories, "Latin " .. data.pos .. " with red links in their inflection tables")
						redlink = true
					end
				end
			end
		end
	end
end

-- Convert data.forms for all slots into displayable text. This is
-- an older function, still currently used for nouns but not for adjectives.
-- For adjectives, the adjective table module has special code to combine
-- adjacent slots, and needs the original forms plus other text that will
-- go into the displayable text for the slot; this is handled below by
-- partial_show_forms() and finish_show_form().
local function show_forms(data, is_adj)
	local noteindex = 1
	local notes = {}
	local seen_notes = {}
	for slot in iter_slots(is_adj) do
		local val = data.forms
		if val and val ~= "" and val ~= "—" then
			for i, form in ipairs(val) do
				local link = link_if_attested(form, data.accel, data.unattested)
				local this_notes = data.notes
				if this_notes and not data.user_specified then
					if type(this_notes) == "string" then
						this_notes = {this_notes}
					end
					local link_indices = {}
					for _, this_note in ipairs(this_notes) do
						local this_noteindex = seen_notes
						if not this_noteindex then
							-- Generate a footnote index.
							this_noteindex = noteindex
							noteindex = noteindex + 1
							insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
							seen_notes = this_noteindex
						end
						insert_if_not(link_indices, this_noteindex)
					end
					val = link .. '<sup style="color: red">' .. concat(link_indices, ",") .. '</sup>'
				else
					val = link
				end
			end
			-- FIXME, do we want this difference?
			data.forms = concat(val, is_adj and ", " or "<br />")
		end
	end
	for _, footnote in ipairs(data.footnotes) do
		insert(notes, footnote)
	end
	data.footnotes = concat(notes, "<br />")
end

-- Generate the display form for a set of slots with identical content. We
-- verify that the slots are actually identical, and throw an assertion error
-- if not. The display form is as in show_forms() but combines together all the
-- accelerator forms for all the slots.
local function finish_show_form(data, slots, is_adj)
	assert(#slots > 0)
	local slot1 = slots
	local forms = data.forms
	local notetext = data.notetext
	for _, slot in ipairs(slots) do
		if not deep_equals(data.forms, forms) then
			error("data.forms = " .. (concat_forms_in_slot(forms) or "nil") ..
				", but data.forms = " .. (concat_forms_in_slot(data.forms) or "nil"))
		end
		assert(deep_equals(data.notetext, notetext))
	end
	if not forms then
		return "—"
	else
		local accel_forms = {}
		local accel_lemma = data.accel.lemma
		for _, slot in ipairs(slots) do
			assert(data.accel.lemma == accel_lemma)
			insert(accel_forms, data.accel.form)
		end
		local combined_accel_form = concat(accel_forms, "|;|")
		local accel = {form = combined_accel_form, lemma = accel_lemma}
		local formtext = {}
		for i, form in ipairs(forms) do
			insert(formtext, link_if_attested(form, accel, data.unattested) .. notetext)
		end
		-- FIXME, do we want this difference?
		return concat(formtext, is_adj and ", " or "<br />")
	end
end

-- Used by the adjective table module. This does some of the work of
-- show_forms(); in particular, it converts all empty forms of any format
-- (nil, "", "—") to nil and, if the forms aren't empty, generates the footnote
-- text associated with each form.
local function partial_show_forms(data, is_adj)
	local noteindex = 1
	local notes = {}
	local seen_notes = {}
	data.notetext = {}
	-- Store this function in DATA so that it can be called from the adjective
	-- table module without needing to require this module, which will (or
	-- could) lead to recursive module requiring.
	data.finish_show_form = finish_show_form
	for slot in iter_slots(is_adj) do
		local val = data.forms
		if not val or val == "" or val == "—" then
			data.forms = nil
		else
			local notetext = {}
			for i in ipairs(val) do
				local this_notes = data.notes
				if this_notes and not data.user_specified then
					if type(this_notes) == "string" then
						this_notes = {this_notes}
					end
					local link_indices = {}
					for _, this_note in ipairs(this_notes) do
						local this_noteindex = seen_notes
						if not this_noteindex then
							-- Generate a footnote index.
							this_noteindex = noteindex
							noteindex = noteindex + 1
							insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
							seen_notes = this_noteindex
						end
						insert_if_not(link_indices, this_noteindex)
					end
					insert(notetext, '<sup style="color: red">' .. concat(link_indices, ",") .. '</sup>')
				else
					insert(notetext, "")
				end
			end
			data.notetext = notetext
		end
	end
	for _, footnote in ipairs(data.footnotes) do
		insert(notes, footnote)
	end
	data.footnotes = concat(notes, "<br />")
end

-- Given an ending (or possibly a full regex matching the entire lemma, if
-- a regex group is present), return the base minus the ending, or nil if
-- the ending doesn't match.
local function extract_base(lemma, ending)
	if ending:find("(", nil, true) then
		return umatch(lemma, ending)
	end
	return umatch(lemma, "^(.*)" .. ending .. "$")
end

-- Given ENDINGS_AND_SUBTYPES (a list of pairs of endings with associated
-- subtypes, where each pair consists of a single ending spec and a list of
-- subtypes), check each ending in turn against LEMMA. If it matches, return
-- the pair BASE, STEM2, SUBTYPES where BASE is the remainder of LEMMA minus
-- the ending, STEM2 is as passed in, and SUBTYPES is the subtypes associated
-- with the ending. But don't return SUBTYPES if any of the subtypes in the
-- list is specifically canceled in SPECIFIED_SUBTYPES (a set, i.e. a table
-- where the keys are strings and the value is always true); instead, consider
-- the next ending in turn. If no endings match, throw an error if DECLTYPE is
-- non-nil, mentioning the DECLTYPE (the user-specified declension); but if
-- DECLTYPE is nil, just return nil, nil, nil.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
--    the beginning with ^ and at the end with $), and contains a single
--    capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
--    SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
--    STEM2_ENDING is a string specifying the corresponding ending that must
--    be present in STEM2. If this form is used, the combination of
--    base + STEM2_ENDING must exactly match STEM2 in order for this entry
--    to be considered a match. An example is {"is", ""}, which will match
--    lemma == "follis", stem2 == "foll", but not lemma == "lapis",
--    stem2 == "lapid".
local function get_noun_subtype_by_ending(lemma, stem2, decltype, specified_subtypes,
		endings_and_subtypes)
	for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
		local ending = ending_and_subtypes
		local subtypes = ending_and_subtypes
		local not_this_subtype = false
		if (
			specified_subtypes.pl and not contains(subtypes, "pl") or
			contains(subtypes, "both") and not specified_subtypes.both
		) then
			-- We now require that plurale tantum terms specify a plural-form lemma.
			-- The autodetected subtypes will include 'pl' for such lemmas; if not,
			-- we fail this entry. Additionally, if the rule contains 'both', it
			-- must be explicitly specified to match.
			not_this_subtype = true
		else
			for _, subtype in ipairs(subtypes) do
				-- A subtype is directly canceled by specifying -SUBTYPE.
				-- In addition, M or F as a subtype is canceled by N, and
				-- vice-versa, but M doesn't cancel F or vice-versa; instead,
				-- we simply ignore the conflicting gender specification when
				-- constructing the combination of specified and inferred subtypes.
				-- The reason for this is that neuters have distinct declensions
				-- from masculines and feminines, but masculines and feminines have
				-- the same declension, and various nouns in Latin that are
				-- normally masculine are exceptionally feminine and vice-versa
				-- (nauta, agricola, fraxinus, malus "apple tree", manus, rēs,
				-- etc.).
				--
				-- In addition, sg as a subtype is canceled by pl and vice-versa.
				-- It's also possible to specify both, which will override sg but
				-- not cancel it (in the sense that it won't prevent the relevant
				-- rule from matching). For example, there's a rule specifying that
				-- lemmas beginning with a capital letter and ending in -ius take
				-- the ius.voci.sg subtypes. Specifying such a lemma with the
				-- subtype both will result in the ius.voci.both subtypes, whereas
				-- specifying such a lemma with the subtype pl will cause this rule
				-- not to match, and it will fall through to a less specific rule
				-- that returns just the ius subtype, which will be combined with
				-- the explicitly specified pl subtype to produce ius.pl.
				if specified_subtypes or
					subtype == "N" and (specified_subtypes.M or specified_subtypes.F) or
					(subtype == "M" or subtype == "F") and specified_subtypes.N or
					subtype == "sg" and specified_subtypes.pl or
					subtype == "pl" and specified_subtypes.sg then
					not_this_subtype = true
					break
				end
			end
		end
		if not not_this_subtype then
			if type(ending) == "table" then
				local lemma_ending = ending
				local stem2_ending = ending
				local base = extract_base(lemma, lemma_ending)
				if base and base .. stem2_ending == stem2 then
					return base, stem2, subtypes
				end
			else
				local base = extract_base(lemma, ending)
				if base then
					return base, stem2, subtypes
				end
			end
		end
	end
	if decltype then
		error("Unrecognized ending for declension-" .. decltype .. " noun: " .. lemma)
	end
	return nil, nil, nil
end

-- Autodetect the subtype of a noun given all the information specified by the
-- user: lemma, stem2, declension type and specified subtypes. Three values are
-- returned: the lemma base (i.e. the stem of the lemma, as required by the
-- declension functions), the new stem2 and the autodetected subtypes. Note
-- that this will not detect a given subtype if the explicitly specified
-- subtypes are incompatible (i.e. if -SUBTYPE is specified for any subtype
-- that would be returned; or if M or F is specified when N would be returned,
-- and vice-versa; or if pl is specified when sg would be returned, and
-- vice-versa).
--
-- NOTE: This function has intimate knowledge of the way that the declension
-- functions handle subtypes, particularly for the third declension.
local function detect_noun_subtype(lemma, stem2, typ, subtypes)
	local base, _

	if typ == "1" then
		return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"ām", {"F", "am"}},
			{"ās", {"M", "Greek", "Ma"}},
			{"ēs", {"M", "Greek", "Me"}},
			{"ē", {"F", "Greek"}},
			{"ae", {"F", "pl"}},
			{"a", {"F"}},
		})
	elseif typ == "2" then
		local detected_subtypes
		lemma, stem2, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"^(.*r)$", {"M", "er"}},
			{"^(.*v)os$", {"M", "vos"}},
			{"^(.*v)om$", {"N", "vom"}},
			-- If the lemma ends in -os and the user said N or -M, then the
			-- following won't apply, and the second (neuter) -os will applly.
			{"os", {"M", "Greek"}},
			{"os", {"N", "Greek", "us"}},
			{"on", {"N", "Greek"}},
			-- -ius beginning with a capital letter is assumed a proper name,
			-- and takes the voci subtype (vocative in -ī) along with the ius
			-- subtype and sg-only. Other nouns in -ius just take the ius
			-- subtype. Explicitly specify "sg" so that if .pl is given,
			-- this rule won't apply.
			{"^(%u.*)ius$", {"M", "ius", "voci", "sg"}},
			{"ius", {"M", "ius"}},
			{"ium", {"N", "ium"}},
			-- If the lemma ends in -us and the user said N or -M, then the
			-- following won't apply, and the second (neuter) -us will applly.
			{"us", {"M"}},
			{"us", {"N", "us"}},
			{"um", {"N"}},
			{"iī", {"M", "ius", "pl"}},
			{"ia", {"N", "ium", "pl"}},
			-- If the lemma ends in -ī and the user said N or -M, then the
			-- following won't apply, and the second (neuter) -ī will applly.
			{"ī", {"M", "pl"}},
			{"ī", {"N", "us", "pl"}},
			{"oe", {"M", "Greek", "pl"}},
			{"a", {"N", "pl"}},
		})
		stem2 = stem2 or lemma
		return lemma, stem2, detected_subtypes
	elseif typ == "3" then
		if subtypes.pl then
			if subtypes.Greek then
				base = lemma:match("^(.*)erēs$")
				if base then
					return base .. "ēr", base .. "er", {"er"}
				end
				base = lemma:match("^(.*)ontēs$")
				if base then
					return base .. "ōn", base .. "ont", {"on"}
				end
				base = lemma:match("^(.*)es$")
				if base then
					return "foo", stem2 or base, {}
				end
				error("Unrecognized ending for declension-3 plural Greek noun: " .. lemma)
			end
			base = lemma:match("^(.*)ia$")
			if base then
				return "foo", stem2 or base, {"N", "I", "pure"}
			end
			base = lemma:match("^(.*)a$")
			if base then
				return "foo", stem2 or base, {"N"}
			end
			base = lemma:match("^(.*)ēs$")
			if base then
				return "foo", stem2 or base, {}
			end
			error("Unrecognized ending for declension-3 plural noun: " .. lemma)
		end

		stem2 = stem2 or make_stem2(lemma)
		local detected_subtypes
		if subtypes.Greek then
			base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
				{{"is", ""}, {"I"}},
				{"ēr", {"er"}},
				{"ōn", {"on"}},
			})
			if base then
				return lemma, stem2, detected_subtypes
			end
			return lemma, stem2, {}
		end

		if not subtypes.N then
			base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
				{{"^(%u.*pol)is$", ""}, {"F", "polis", "sg", "loc"}},
				{{"tūdō", "tūdin"}, {"F"}},
				{{"tās", "tāt"}, {"F"}},
				{{"tūs", "tūt"}, {"F"}},
				{{"tiō", "tiōn"}, {"F"}},
				{{"siō", "siōn"}, {"F"}},
				{{"xiō", "xiōn"}, {"F"}},
				{{"gō", "gin"}, {"F"}},
				{{"or", "ōr"}, {"M"}},
				{{"trx", "trīc"}, {"F"}},
				{{"is", ""}, {"I"}},
				{{"^(%l.*)ēs$", ""}, {"I"}},
			})
			if base then
				return lemma, stem2, detected_subtypes
			end
		end

		base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
			{{"us", "or"}, {"N"}},
			{{"us", "er"}, {"N"}},
			{{"ma", "mat"}, {"N"}},
			{{"men", "min"}, {"N"}},
			{{"^(%u.*)e$", ""}, {"N", "sg"}},
			{{"e", ""}, {"N", "I", "pure"}},
			{{"al", "āl"}, {"N", "I", "pure"}},
			{{"ar", "ār"}, {"N", "I", "pure"}},
		})
		if base then
			return lemma, stem2, detected_subtypes
		end
		return lemma, stem2, {}
	elseif typ == "4" then
		if subtypes.echo or subtypes.Callisto then
			base = lemma:match("^(.*)ō$")
			if not base then
				error("Declension-4 noun of subtype .echo or .Callisto should end in -ō: " .. lemma)
			end
			if subtypes.Callisto then
				return base, nil, {"F", "sg"}
			else
				return base, nil, {"F"}
			end
		end
		return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"us", {"M"}},
			{"ū̆", {"N"}},
			{"ūs", {"M", "pl"}},
			{"ua", {"N", "pl"}},
		})
	elseif typ == "5" then
		return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"iēs", {"F", "i"}},
			{"iēs", {"F", "i", "pl"}},
			{"ēs", {"F"}},
			{"ēs", {"F", "pl"}},
		})
	elseif typ == "sgpl" then
		return lemma, stem2, {}
	elseif typ == "irreg" and lemma == "domus" then
		-- ] auto-sets data.loc = true, but we need to know this
		-- before declining the noun so we can propagate it to other segments.
		return lemma, nil, {"loc"}
	elseif typ == "indecl" or typ == "irreg" and (
		lemma == "Deus" or umatch(lemma, "^ēss$") or
		lemma == "Athōs" or lemma == "vēnum"
	) then
		-- Indeclinable nouns, and certain irregular nouns, set data.num = "sg",
		-- but we need to know this before declining the noun so we can
		-- propagate it to other segments.
		return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"", {"both"}},
			{"", {"sg"}},
			{"", {"pl"}},
		})
	else
		return lemma, nil, {}
	end
end

-- Given ENDINGS_AND_SUBTYPES (a list of four-tuples of ENDING, RETTYPE,
-- SUBTYPES, PROCESS_RETVAL), check each ENDING in turn against LEMMA and
-- STEM2. If it matches, return a four-tuple BASE, STEM2, RETTYPE, NEW_SUBTYPES
-- where BASE is normally the remainder of LEMMA minus the ending, STEM2 is
-- as passed in, RETTYPE is as passed in, and NEW_SUBTYPES is the same as
-- SUBTYPES minus any subtypes beginning with a hyphen. If no endings match,
-- throw an error if DECLTYPPE is non-nil, mentioning the DECLTYPE
-- (user-specified declension); but if DECLTYPE is nil, just return the tuple
-- nil, nil, nil, nil.
--
-- In order for a given entry to match, ENDING must match and also the subtypes
-- in SUBTYPES (a list) must not be incompatible with the passed-in
-- user-specified subtypes SPECIFIED_SUBTYPES (a set, i.e. a table where the
-- keys are strings and the value is always true). "Incompatible" means that
-- a given SUBTYPE is specified in either one and -SUBTYPE in the other, or
-- that "pl" is found in SPECIFIED_SUBTYPES and not in SUBTYPES.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
--    the beginning with ^ and at the end with $), and contains a single
--    capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
--    SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
--    STEM2_ENDING is a string specifying the corresponding ending that must
--    be present in STEM2. If this form is used, the combination of
--    base + STEM2_ENDING must exactly match STEM2 in order for this entry
--    to be considered a match. An example is {"is", ""}, which will match
--    lemma == "follis", stem2 == "foll", but not lemma == "lapis",
--    stem2 == "lapid".
--
-- If PROCESS_STEM2 is given and the returned STEM2 would be nil, call
-- process_stem2(BASE) to get the STEM2 to return.
local function get_adj_type_and_subtype_by_ending(lemma, stem2, decltype,
		specified_subtypes, endings_and_subtypes, process_stem2)
	for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
		local ending = ending_and_subtypes
		local rettype = ending_and_subtypes
		local subtypes = ending_and_subtypes
		local process_retval = ending_and_subtypes
		local not_this_subtype = false
		if (
			specified_subtypes.pl and not contains(subtypes, "pl") or
			contains(subtypes, "both") and not specified_subtypes.both
		) then
			-- We now require that plurale tantum terms specify a plural-form lemma.
			-- The autodetected subtypes will include 'pl' for such lemmas; if not,
			-- we fail this entry. Additionally, if the rule contains 'both', it
			-- must be explicitly specified to match.
			not_this_subtype = true
		else
			for _, subtype in ipairs(subtypes) do
				-- A subtype is directly canceled by specifying -SUBTYPE.
				if specified_subtypes then
					not_this_subtype = true
					break
				end
				-- A subtype is canceled if the user specified SUBTYPE and
				-- -SUBTYPE is given in the to-be-returned subtypes.
				local must_not_be_present = subtype:match("^%-(.*)$")
				if must_not_be_present and specified_subtypes then
					not_this_subtype = true
					break
				end
			end
		end
		if not not_this_subtype then
			local base
			if type(ending) == "table" then
				local lemma_ending = ending
				local stem2_ending = ending
				base = extract_base(lemma, lemma_ending)
				if base and base .. stem2_ending ~= stem2 then
					base = nil
				end
			else
				base = extract_base(lemma, ending)
			end
			if base then
				-- Remove subtypes of the form -SUBTYPE from the subtypes
				-- to be returned.
				local new_subtypes = {}
				for _, subtype in ipairs(subtypes) do
					if subtype:sub(1, 1) ~= "-" then
						insert(new_subtypes, subtype)
					end
				end
				if process_retval then
					base, stem2 = process_retval(base, stem2)
				end
				if process_stem2 then
					stem2 = stem2 or process_stem2(base)
				end
				return base, stem2, rettype, new_subtypes
			end
		end
	end
	if not decltype then
		return nil, nil, nil, nil
	elseif decltype == "" then
		error("Unrecognized ending for adjective: " .. lemma)
	else
		error("Unrecognized ending for declension-" .. decltype .. " adjective: " .. lemma)
	end
end

-- Autodetect the type and subtype of an adjective given all the information
-- specified by the user: lemma, stem2, declension type and specified subtypes.
-- Four values are returned: the lemma base (i.e. the stem of the lemma, as
-- required by the declension functions), the value of stem2 to pass to the
-- declension function, the declension type and the autodetected subtypes.
-- Note that this will not detect a given subtype if -SUBTYPE is specified for
-- any subtype that would be returned, or if SUBTYPE is specified and -SUBTYPE
-- is among the subtypes that would be returned (such subtypes are filtered out
-- of the returned subtypes).
local function detect_adj_type_and_subtype(lemma, stem2, typ, subtypes)

	-- FIXME: not clear why "foo" is in production code.
	local function base_as_stem2(base, stem2)
		return "foo", base
	end

	local function constant_base(baseval)
		return function(base, stem2)
			return baseval, nil
		end
	end

	local function decl12_stem2(base)
		return base
	end
	
	local function decl3_stem2(base)
		return make_stem2(base)
	end
		
	local decl12_entries = {
		{"us", "1&2+", {}},
		{"a", "1&2+", {}},
		{"um", "1&2+", {}},
		{"ī", "1&2+", {"pl"}},
		{"ae", "1&2+", {"pl"}},
		{"a", "1&2+", {"pl"}},
		-- Nearly all -os adjectives are greekA
		{"os", "1&2+", {"greekA", "-greekE"}},
		{"os", "1&2+", {"greekE", "-greekA"}},
		{"ē", "1&2+", {"greekE", "-greekA"}},
		{"on", "1&2+", {"greekA", "-greekE"}},
		{"on", "1&2+", {"greekE", "-greekA"}},
		{"^(.*er)$", "1&2+", {"er"}},
		{"^(.*ur)$", "1&2+", {"er"}},
		{"^(h)ic$", "1&2+", {"ic"}},
	}

	local decl3_entries = {
		{"^(.*er)$", "3-3+", {}},
		{"is", "3-2+", {}},
		{"e", "3-2+", {}},
		{"^(.*)or$", "3-C+", {}},
		{"^(min)or$", "3-C+", {}},
		-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
		-- not specified. If we don't do this, the later entry for
		-- -ēs will auto-infer .pl whenever -ēs is specified (which
		-- won't work for adjectives like quadripēs, volucripēs).
		-- Essentially, for declension-3 adjectives, we require that
		-- .pl is given if the lemma is plural.
		--
		-- Most 3-1 adjectives are i-stem (e.g. audāx) so we require -I
		-- to be given with non-i-stem adjectives. The first entry below
		-- will apply when -I isn't given, the second when it is given.
		{"^(.*ēs)$", "3-1+", {"I"}},
		{"^(.*ēs)$", "3-1+", {"par"}},
		{"^(.*)ōrēs$", "3-C+", {"pl"}},
		{"^(min)ōrēs$", "3-C+", {"pl"}},
		-- If .pl with -ēs, we don't know if the adjective is 3-1, 3-2
		-- or 3-3. Since 3-2 is probably the most common, we infer it
		-- (as well as the fact that these adjectives *are* in a sense
		-- 3-2 since they have a distinct neuter in -(i)a. Note that
		-- we have two entries here; the first one will apply unless
		-- -I is given, and will infer an i-stem adjective; the second
		-- one will apply otherwise (and infer a non-i-stem 3-1 adjective).
		{"ēs", "3-2+", {"pl", "I"}},
		{"ēs", "3-1+", {"pl", "par"}, base_as_stem2},
		-- Same for neuters.
		{"ia", "3-2+", {"pl", "I"}},
		{"a", "3-1+", {"pl", "par"}, base_as_stem2},
		-- As above for -ēs but for miscellaneous singulars.
		{"", "3-1+", {"I"}},
		{"", "3-1+", {"par"}},
	}

	if typ == "+" then
		local base, new_stem2, rettype, new_subtypes = get_adj_type_and_subtype_by_ending(lemma, stem2, nil, subtypes, decl12_entries, decl12_stem2)
		if base then
			return base, new_stem2, rettype, new_subtypes
		else
			return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2)
		end
	elseif typ == "3+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2)
	elseif typ == "1&2+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl12_entries, decl12_stem2)
	elseif typ == "1-1+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"a", typ, {}},
			{"ae", typ, {"pl"}},
		})
	elseif typ == "2-2+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"us", typ, {}},
			{"um", typ, {}},
			{"ī", typ, {"pl"}},
			{"a", typ, {"pl"}},
			{"os", typ, {"greek"}},
			{"on", typ, {"greek"}},
			{"oe", typ, {"greek", "pl"}},
		})
	elseif typ == "3-1+" then
		-- This will cancel out the I if -I is specified in subtypes, and the
		-- resulting lack of I will get converted to "par".
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
			-- not specified. If we don't do this, the later entry for
			-- -ēs will auto-infer .pl whenever -ēs is specified.
			-- Essentially, for declension-3 adjectives, we require that
			-- .pl is given if the lemma is plural.
			-- We have two entries here; the first one will apply unless
			-- -I is given, and will infer an i-stem adjective; the second
			-- one will apply otherwise.
			{"^(.*ēs)$", typ, {"I"}},
			{"^(.*ēs)$", typ, {"par"}},
			{"ēs", typ, {"pl", "I"}, base_as_stem2},
			{"ēs", typ, {"pl", "par"}, base_as_stem2},
			{"ia", typ, {"pl", "I"}, base_as_stem2},
			{"a", typ, {"pl", "par"}, base_as_stem2},
			{"", typ, {"I"}},
			{"", typ, {"par"}},
		}, decl3_stem2)
	elseif typ == "3-2+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"is", typ, {}},
			{"e", typ, {}},
			-- Detect -ēs as 3-2 without auto-inferring .pl if .pl
			-- not specified. If we don't do this, the later entry for
			-- -ēs will auto-infer .pl whenever -ēs is specified (which
			-- won't work for adjectives like isoscelēs). Essentially,
			-- for declension-3 adjectives, we require that .pl is given
			-- if the lemma is plural.
			{"ēs", typ, {}},
			{"ēs", typ, {"pl"}},
			{"ia", typ, {"pl"}},
		}, decl3_stem2)
	elseif typ == "3-3+" or typ == "3-P+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"ēs", typ, {"pl"}, base_as_stem2},
			{"ia", typ, {"pl"}, base_as_stem2},
			{"", typ, {}},
		}, decl3_stem2)
	elseif typ == "3-C+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"^(.*)or$", typ, {}},
			{"^(min)or$", typ, {}},
			{"^(.*)ōrēs$", typ, {"pl"}},
			{"^(min)ōrēs$", typ, {"pl"}},
		}, decl3_stem2)
	elseif typ == "irreg+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"^(duo)$", typ, {"pl"}},
			{"^(ambō)$", typ, {"pl"}},
			{"^(mīll?ia)$", typ, {"N", "pl"}, constant_base("mīlle")},
			-- match ea
			{"^(ea)$", typ, {}, constant_base("is")},
			-- match id
			{"^(id)$", typ, {}, constant_base("is")},
			-- match plural eī, iī
			{"^(ī)$", typ, {"pl"}, constant_base("is")},
			-- match plural ea, eae
			{"^(eae?)$", typ, {"pl"}, constant_base("is")},
			-- match eadem
			{"^(eadem)$", typ, {}, constant_base("īdem")},
			-- match īdem, idem
			{"^(dem)$", typ, {}, constant_base("īdem")},
			-- match plural īdem
			{"^(īdem)$", typ, {"pl"}},
			-- match plural eadem, eaedem
			{"^(eae?dem)$", typ, {"pl"}, constant_base("īdem")},
			-- match illa, ipsa, ista; it doesn't matter if we overmatch because
			-- we'll get an error as we use the stem itself in the returned base
			{"^(i)a$", typ, {}, function(base, stem2) return base .. "e", nil end},
			-- match illud, istud; as above, it doesn't matter if we overmatch
			{"^(i)ud$", typ, {}, function(base, stem2) return base .. "e", nil end},
			-- match ipsum
			{"^(ipsum)$", typ, {}, constant_base("ipse")},
			-- match plural illī, ipsī, istī; as above, it doesn't matter if we
			-- overmatch
			{"^(i)ī$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
			-- match plural illa, illae, ipsa, ipsae, ista, istae; as above, it
			-- doesn't matter if we overmatch
			{"^(i)ae?$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
			-- Detect quī as non-plural unless .pl specified.
			{"^(quī)$", typ, {}},
			-- Otherwise detect quī as plural.
			{"^(quī)$", typ, {"pl"}},
			-- Same for quae.
			{"^(quae)$", typ, {}, constant_base("quī")},
			{"^(quae)$", typ, {"pl"}, constant_base("quī")},
			{"^(quid)$", typ, {}, constant_base("quis")},
			{"^(quod)$", typ, {}, constant_base("quī")},
			{"^(quiquid)$", typ, {}, constant_base("quisquis")},
			{"^(quīquī)$", typ, {"pl"}, constant_base("quisquis")},
			{"^(quaequae)$", typ, {"pl"}, constant_base("quisquis")},
			-- match all remaining lemmas in lemma form
			{"", typ, {}},
		})
	elseif typ == "indecl+" then
		return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
			{"", typ, {"both"}},
			{"", typ, {"sg"}},
			{"", typ, {"pl"}},
		})
	else -- 0+
		return lemma, nil, typ, {}
	end
end

-- Parse a segment (e.g. "lūna<1>", "aegis/aegid<3.Greek>", "bōs<irreg.F>",
-- bonus<+>", or "]/veter<3+.-I>"), consisting of a lemma (or optionally
-- a lemma/stem) and declension+subtypes, where a + in the declension indicates
-- an adjective. Brackets can be present to indicate links, for use in
-- {{la-noun}} and {{la-adj}}. The return value is a table, e.g.:
-- {
--   decl = "1",
--   is_adj = false,
--   orig_lemma = "lūna",
--   lemma = "lūna",
--   stem2 = nil,
--   gender = "F",
--   types = { = true},
--   args = {"lūn"}
-- }
--
-- or
--
-- {
--   decl = "3",
--   is_adj = false,
--   orig_lemma = "aegis",
--   lemma = "aegis",
--   stem2 = "aegid",
--   gender = nil,
--   types = { = true},
--   args = {"aegis", "aegid"}
-- }
--
-- or
--
-- {
--   decl = "irreg",
--   is_adj = false,
--   orig_lemma = "bōs",
--   lemma = "bōs",
--   stem2 = nil,
--   gender = "F",
--   types = { = true},
--   args = {"bōs"}
-- }
-- or
--
-- {
--   decl = "1&2+",
--   is_adj = true,
--   orig_lemma = "bonus",
--   lemma = "bonus",
--   stem2 = nil,
--   gender = nil,
--   types = {},
--   args = {"bon"}
-- }
--
-- or
--
-- {
--   decl = "3-1+",
--   is_adj = true,
--   orig_lemma = "]",
--   lemma = "vetus",
--   stem2 = "veter",
--   gender = nil,
--   types = {},
--   args = {"vetus", "veter"}
-- }
local function parse_segment(segment)
	local stem_part, spec_part = segment:match("^(.*)<(.-)>$")
	local stems = split(stem_part, "/", true, true)
	local specs = split(spec_part, ".", true, true)

	local types = {}
	local num = nil
	local loc = false

	local args = {}

	local decl
	for j, spec in ipairs(specs) do
		if j == 1 then
			decl = spec
		else
			local begins_with_hyphen
			begins_with_hyphen, spec = spec:match("^(%-?)(.*)$")
			spec = begins_with_hyphen .. spec:gsub("%-", "_")
			types = true
		end
	end

	local orig_lemma = stems
	if not orig_lemma or orig_lemma == "" then
		orig_lemma = pagename or get_pagename()
	end
	local lemma = remove_links(orig_lemma)
	local stem2 = stems
	if stem2 == "" then
		stem2 = nil
	end
	if #stems > 2 then
		error("Too many stems, at most 2 should be given: " .. stem_part)
	end

	local base, detected_subtypes
	local is_adj = false
	local gender = nil

	if decl:find("+", nil, true) then
		base, stem2, decl, detected_subtypes = detect_adj_type_and_subtype(lemma, stem2, decl, types)
		is_adj = true

		for _, subtype in ipairs(detected_subtypes) do
			if types then
				-- if a "cancel subtype" spec is given, remove the cancel spec
				-- and don't apply the subtype
				types = nil
			else
				types = true
			end
		end
	else
		base, stem2, detected_subtypes = detect_noun_subtype(lemma, stem2, decl, types)

		for _, subtype in ipairs(detected_subtypes) do
			if types then
				-- if a "cancel subtype" spec is given, remove the cancel spec
				-- and don't apply the subtype
				types = nil
			elseif (subtype == "M" or subtype == "F" or subtype == "N") and
					(types.M or types.F or types.N) then
				-- if gender already specified, don't create conflicting gender spec
			elseif (subtype == "sg" or subtype == "pl" or subtype == "both") and
					(types.sg or types.pl or types.both) then
				-- if number restriction already specified, don't create conflicting
				-- number restriction spec
			else
				types = true
			end
		end

		if not types.pl and not types.both and umatch(lemma, "^%u") then
			types.sg = true
		end
	end

	if types.loc then
		loc = true
		types.loc = nil
	end

	if types.M then
		gender = "M"
	elseif types.F then
		gender = "F"
	elseif types.N then
		gender = "N"
	end

	if types.pl then
		num = "pl"
		types.pl = nil
	elseif types.sg then
		num = "sg"
		types.sg = nil
	end

	args = base
	args = stem2

	return {
		decl = decl,
		is_adj = is_adj,
		gender = gender,
		orig_lemma = orig_lemma,
		lemma = lemma,
		stem2 = stem2,
		types = types,
		num = num,
		loc = loc,
		args = args,
	}
end

-- Parse a segment run (i.e. a string with zero or more segments [see
-- parse_segment] and optional surrounding text, e.g. "foenum<2>-graecum<2>"
-- or "]/part<3.abl-e-occ-i> ]"). The segment run
-- currently cannot contain any alternants (e.g. "((epulum<2.sg>,epulae<1>))").
-- The return value is a table of the following form:
-- {
--   segments = PARSED_SEGMENTS (a list of parsed segments),
--   loc = LOC (a boolean indicating whether any of the individual segments
--     has a locative),
--   num = NUM (the first specified value for a number restriction, or nil if
--     no number restrictions),
--   gender = GENDER (the first specified or inferred gender, or nil if none),
--   is_adj = IS_ADJ (true if all segments are adjective segments, false if
--     there's at least one noun segment, nil if only raw-text segments),
--   propses = PROPSES (list of per-word properties, where each element is an
--     object {
--       decl = DECL (declension),
--       types = TYPES (set describing the subtypes of a given word),
--     }
-- }
-- Each element in PARSED_SEGMENTS is as returned by parse_segment() but will
-- have an additional .orig_prefix field indicating the text before the segment
-- (including bracketed links) and corresponding .prefix field indicating the text
-- with bracketed links resolved. If there is trailing text, the last element will
-- have only .orig_prefix and .prefix fields containing that trailing text.
local function parse_segment_run(segment_run)
	local loc = nil
	local num = nil
	local is_adj = nil
	-- If the segment run begins with a hyphen, include the hyphen in the
	-- set of allowed characters for a declined segment. This way, e.g. the
	-- suffix ] can be declared as {{la-ndecl|-cen/-cin<3>}} rather than
	-- {{la-ndecl|-cen/cin<3>}}, which is less intuitive.
	local is_suffix = segment_run:sub(1, 1) == "-"
	local segments = {}
	local propses = {}
	-- We want to not break up a bracketed link followed by <> even if it has a space or
	-- hyphen in it. So we do an outer capturing split to find the bracketed links followed
	-- by <>, then do inner capturing splits on all the remaining text to find the other
	-- declined terms.
	local bracketed_segments = split(segment_run, "(%]-%]%]<.->)")
	for i, bracketed_segment in ipairs(bracketed_segments) do
		if i % 2 == 0 then
			insert(segments, bracketed_segment)
		else
			for _, subsegment in ipairs(split(bracketed_segment, is_suffix and "(+<.->)" or "(+<.->)")) do
				insert(segments, subsegment)
			end
		end
	end
	local parsed_segments = {}
	local gender = nil
	for i = 2, (#segments - 1), 2 do
		local parsed_segment = parse_segment(segments)
		-- Overall locative is true if any segments call for locative.
		loc = loc or parsed_segment.loc
		-- The first specified value for num is used becomes the overall value.
		num = num or parsed_segment.num
		if is_adj == nil then
			is_adj = parsed_segment.is_adj
		else
			is_adj = is_adj and parsed_segment.is_adj
		end
		gender = gender or parsed_segment.gender
		parsed_segment.orig_prefix = segments
		parsed_segment.prefix = remove_links(segments)
		insert(parsed_segments, parsed_segment)
		insert(propses, {
			decl = parsed_segment.decl,
			types = parsed_segment.types,
		})
	end
	if segments ~= "" then
		insert(parsed_segments, {
			orig_prefix = segments,
			prefix = remove_links(segments),
		})
	end
	return {
		segments = parsed_segments,
		loc = loc,
		num = num,
		is_adj = is_adj,
		gender = gender,
		propses = propses,
	}
end

-- Parse an alternant, e.g. "((epulum<2.sg>,epulae<1>))",
-- "((Serapis<3>,Serapis/Serapid<3>))" or
-- "((rēs<5>pūblica<1>,rēspūblica<1>))". The return value is a table of the form
-- {
--   alternants = PARSED_ALTERNANTS (a list of segment runs, each of which is a
--     list of parsed segments as returned by parse_segment_run()),
--   loc = LOC (a boolean indicating whether any of the individual segment runs
--     has a locative),
--   num = NUM (the overall number restriction, one of "sg", "pl" or "both"),
--   gender = GENDER (the first specified or inferred gender, or nil if none),
--   is_adj = IS_ADJ (true if all non-constant alternants are adjectives, false
--     if all nouns, nil if only constant alternants; conflicting alternants
--     cause an error),
--   propses = PROPSES (list of lists of per-word property objecs),
-- }
local function parse_alternant(alternant)
	local parsed_alternants = {}
	local alternant_spec = alternant:match("^%(%((.-)%)%)$")
	local alternants = split(alternant_spec, ",", true, true)
	local loc = false
	local num = nil
	local gender = nil
	local is_adj = nil
	local propses = {}
	for i, alternant in ipairs(alternants) do
		local parsed_run = parse_segment_run(alternant)
		insert(parsed_alternants, parsed_run)
		loc = loc or parsed_run.loc
		-- First time through, set the overall num to the num of the first run,
		-- even if nil. After that, if we ever see a run with a different value
		-- of num, set the overall num to "both". That way, if all alternants
		-- don't specify a num, we get an unspecified num, but if some do and
		-- some don't, we get both, because an unspecified num defaults to
		-- both.
		if i == 1 then
			num = parsed_run.num
		elseif num ~= parsed_run.num then
			-- FIXME, this needs to be rethought to allow for
			-- adjective alternants.
			num = "both"
		end
		gender = gender or parsed_run.gender
		if is_adj == nil then
			is_adj = parsed_run.is_adj
		elseif parsed_run.is_adj ~= nil and parsed_run.is_adj ~= is_adj then
			error("Saw both noun and adjective alternants; not allowed")
		end
		insert(propses, parsed_run.propses)
	end
	return {
		alternants = parsed_alternants,
		loc = loc,
		num = num,
		gender = gender,
		is_adj = is_adj,
		propses = propses,
	}
end

-- Parse a segment run (see parse_segment_run()). Unlike for
-- parse_segment_run(), this can contain alternants such as
-- "((epulum<2.sg>,epulae<1>))" or "((Serapis<3.sg>,Serapis/Serapid<3.sg>))"
-- embedded in it to indicate words composed of multiple declensions.
-- The return value is a table of the following form:
-- {
--   segments = PARSED_SEGMENTS (a list of parsed segments),
--   loc = LOC (a boolean indicating whether any of the individual segments has
--     a locative),
--   num = NUM (the first specified value for a number restriction, or nil if
--     no number restrictions),
--   gender = GENDER (the first specified or inferred gender, or nil if none),
--   is_adj = IS_ADJ (true if all segments are adjective segments, false if
--     there's at least one noun segment, nil if only raw-text segments),
--   propses = PROPSES (list of either per-word property objects or lists of
--		lists of such objects),
-- }.
-- Each element in PARSED_SEGMENTS is one of three types:
--
-- 1. A regular segment, as returned by parse_segment() but with additional
--    .prefix and .orig_prefix fields indicating the text before the segment, as per
--    the return value of parse_segment_run().
-- 2. A raw-text segment, i.e. a table with only .prefix and .orig_prefix fields
--    containing the raw text.
-- 3. An alternating segment, as returned by parse_alternant().
-- Note that each alternant is a segment run rather than a single parsed
-- segment to allow for alternants like "((rēs<5>pūblica<1>,rēspūblica<1>))".
-- The parsed segment runs in PARSED_SEGMENT_RUNS are tables as returned by
-- parse_segment_run() (of the same form as the overall return value of
-- parse_segment_run_allowing_alternants()).
local function parse_segment_run_allowing_alternants(segment_run)
	if segment_run:find(" ", nil, true) then
		track("has-space")
	end
	if segment_run:find("((", nil, true) then
		track("has-alternant")
	end
	local alternating_segments = split(segment_run, "(%(%(.-%)%))")
	local parsed_segments = {} 
	local loc = false
	local num = nil
	local gender = nil
	local is_adj = nil
	local propses = {}
	for i = 1, #alternating_segments do
		local alternating_segment = alternating_segments
		if alternating_segment ~= "" then
			local this_is_adj
			if i % 2 == 1 then
				local parsed_run = parse_segment_run(alternating_segment)
				for _, parsed_segment in ipairs(parsed_run.segments) do
					insert(parsed_segments, parsed_segment)
				end
				loc = loc or parsed_run.loc
				num = num or parsed_run.num
				gender = gender or parsed_run.gender
				this_is_adj = parsed_run.is_adj
				for _, props in ipairs(parsed_run.propses) do
					insert(propses, props)
				end
			else
				local parsed_alternating_segment = parse_alternant(alternating_segment)
				insert(parsed_segments, parsed_alternating_segment)
				loc = loc or parsed_alternating_segment.loc
				num = num or parsed_alternating_segment.num
				gender = gender or parsed_alternating_segment.gender
				this_is_adj = parsed_alternating_segment.is_adj
				insert(propses, parsed_alternating_segment.propses)
			end
			if is_adj == nil then
				is_adj = this_is_adj
			elseif this_is_adj ~= nil then
				is_adj = is_adj and this_is_adj
			end
		end
	end

	if #parsed_segments > 1 then
		track("multiple-segments")
	end
	
	return {
		segments = parsed_segments,
		loc = loc,
		num = num,
		gender = gender,
		is_adj = is_adj,
		propses = propses,
	}
end

-- Combine each form in FORMS (a list of forms associated with a slot) with each
-- form in NEW_FORMS (either a single string for a single form, or a list of
-- forms) by concatenating EXISTING_FORM .. PREFIX .. NEW_FORM. Also combine
-- NOTES (a table specifying the footnotes associated with each existing form,
-- i.e. a map from form indices to lists of footnotes) with NEW_NOTES (new
-- footnotes associated with the new forms, in the same format as NOTES). Return
-- a pair NEW_FORMS, NEW_NOTES where either or both of FORMS and NOTES (but not
-- the sublists in NOTES) may be destructively modified to generate the return
-- values.
local function append_form(forms, notes, new_forms, new_notes, prefix)
	if forms == nil then
		return
	end
	new_forms = new_forms or ""
	notes = notes or {}
	new_notes = new_notes or {}
	prefix = prefix or ""
	if type(new_forms) == "table" and #new_forms == 1 then
		new_forms = new_forms
	end
	if type(new_forms) == "string" then
		-- If there's only one new form, destructively modify the existing
		-- forms and notes for this new form and its footnotes.
		for i = 1, #forms do
			forms = forms .. prefix .. new_forms
			if new_notes then
				if not notes then
					notes = new_notes
				else
					local combined_notes = deep_copy(notes)
					for _, note in ipairs(new_notes) do
						insert(combined_notes, note)
					end
					notes = combined_notes
				end
			end
		end
		return forms, notes
	else
		-- If there are multiple new forms, we need to loop over all
		-- combinations of new and old forms. In that case, use new tables
		-- for the combined forms and notes.
		local ret_forms = {}
		local ret_notes = {}
		for i=1, #forms do
			for j=1, #new_forms do
				insert(ret_forms, forms .. prefix .. new_forms)
				if new_notes then
					if not notes then
						-- We are constructing a linearized matrix of size
						-- NI x NJ where J is in the inner loop. If I and J
						-- are zero-based, the linear index of (I, J) is
						-- I * NJ + J. However, we are one-based, so the
						-- same formula won't work. Instead, we effectively
						-- need to convert to zero-based indices, compute
						-- the zero-based linear index, and then convert it
						-- back to a one-based index, i.e.
						--
						-- (I - 1) * NJ + (J - 1) + 1
						--
						-- i.e. (I - 1) * NJ + J.
						ret_notes = new_notes
					else
						local combined_notes = deep_copy(notes)
						for _, note in ipairs(new_notes) do
							insert(combined_notes, note)
						end
						ret_notes = combined_notes
					end
				end
			end
		end
		return ret_forms, ret_notes
	end
end

-- Destructively modify any forms in FORMS (a map from a slot to a form or a
-- list of forms) by converting sequences of ae, oe, Ae or Oe to the
-- appropriate ligatures.
local function apply_ligatures(forms, is_adj)
	for slot in iter_slots(is_adj) do
		if type(forms) == "string" then
			forms = forms:gsub("e", ligatures)
		elseif type(forms) == "table" then
			for i = 1, #forms do
				forms = forms:gsub("e", ligatures)
			end
		end
	end
end

-- Modify any forms in FORMS (a map from a slot to a form or a list of forms) by
-- converting final m to optional n or m.
local function apply_sufn(forms, is_adj)
	for slot in iter_slots(is_adj) do
		if type(forms) == "string" then
			if forms:sub(-1) == "m" then
				forms = {forms:gsub("m$", "n"), forms}
			end
		elseif type(forms) == "table" then
			-- See if there are any final m's.
			local final_m
			for i = 1, #forms do
				if forms:sub(-1) == "m" then
					final_m = true
					break
				end
			end
			if final_m then
				local newval = {}
				for i = 1, #forms do
					if forms:sub(-1) == "m" then
						insert(newval, (forms:gsub("m$", "n")))
					end
					insert(newval, forms)
				end
				forms = newval
			end
		end
	end
end

-- If NUM == "sg", copy the singular forms to the plural ones; vice-versa if
-- NUM == "pl". This should allow for the equivalent of plural
-- "alpha and omega" formed from two singular nouns, and for the equivalent of
-- plural "St. Vincent and the Grenadines" formed from a singular noun and a
-- plural noun. (These two examples actually occur in Russian, at least.)
local function propagate_number_restrictions(forms, num, is_adj)
	if num == "sg" or num == "pl" then
		for slot in iter_slots(is_adj) do
			if slot:find(num, nil, true) then
				local other_num_slot = num == "sg" and slot:gsub("sg", "pl") or slot:gsub("pl", "sg")
				forms = type(forms) == "table" and deep_copy(forms) or forms
			end
		end
	end
end

local function join_sentences(sentences, joiner)
	-- Lowercase the first letter of all but the first sentence, and remove the
	-- final period from all but the last sentence. Then join together with the
	-- joiner (e.g. " and " or " or ").
	-- FIXME: Should we join three or more as e.g. "foo, bar and baz"?
	local sentences_to_join = {}
	for i, sentence in ipairs(sentences) do
		if i < #sentences then
			sentence = sentence:gsub("%.$", "")
		end
		if i > 1 then
			sentence = lcfirst(sentence)
		end
		insert(sentences_to_join, sentence)
	end
	return concat(sentences_to_join, joiner)
end

-- Construct the declension of a parsed segment run of the form returned by
-- parse_segment_run() or parse_segment_run_allowing_alternants(). Return value
-- is a table
-- {
--   forms = FORMS (keyed by slot, list of forms for that slot),
--   notes = NOTES (keyed by slot, map from form indices to lists of footnotes),
--   title = TITLE (list of titles for each segment in the run),
--   categories = CATEGORIES (combined categories for all segments),
-- }
local function decline_segment_run(parsed_run, pos, is_adj)
	local declensions = {
		-- For each possible slot (e.g. "abl_sg"), list of possible forms.
		forms = {},
		-- Keyed by slot (e.g. "abl_sg"). Value is a table indicating the footnotes
		-- corresponding to the forms for that slot. Each such table maps indices
		-- (the index of the corresponding form) to a list of one or more
		-- footnotes.
		notes = {},
		title = {},
		unattested = {},
		subtitleses = {},
		orig_titles = {},
		categories = {},
		footnotes = {},
		-- May be set true if declining a 1-1 adjective
		loc = false,
		noneut = false,
		nomf = false,
	}

	for slot in iter_slots(is_adj) do
		declensions.forms = {""}
	end

	for i, seg in ipairs(parsed_run.segments) do
		local decl = seg.decl
		if decl then -- not an alternant, not a constant segment
			seg.loc = parsed_run.loc
			seg.num = seg.num or parsed_run.num
			seg.gender = seg.gender or parsed_run.gender

			local data, potential_lemma_slots
			if seg.is_adj then
				if not (m_adj_decl or get_m_adj_decl()) then
					error("Unrecognized declension '" .. decl .. "'")
				end

				potential_lemma_slots = potential_adj_lemma_slots

				data = {
					subtitles = {},
					num = seg.num or "",
					gender = seg.gender,
					loc = seg.loc,
					noneut = false,
					nomf = false,
					pos = is_adj and pos or "adjectives",
					forms = {},
					types = seg.types,
					unattested = {},
					categories = {},
					notes = {},
				}
				(m_adj_decl or get_m_adj_decl())(data, seg.args)
				local apparent_decl = data.decl or decl
				if data.loc then
					declensions.loc = true
				end
				if data.noneut then
					declensions.noneut = true
				end
				if data.nomf then
					declensions.nomf = true
				end
				-- Construct title out of "original title" and subtitles.
				if not data.title then
					if decl == "irreg+" and apparent_decl ~= decl and #data.subtitles == 0 then
						insert(data.subtitles, glossary_link("irregular"))
					end
					if declension_to_english then
						local english = declension_to_english
						data.title = "]"
					elseif apparent_decl == "irreg+" then
						data.title = glossary_link("irregular")
					elseif apparent_decl == "indecl+" or apparent_decl == "0+" then
						data.title = glossary_link("indeclinable")
					else
						error("Internal error! Don't recognize adjective declension " .. apparent_decl)
					end
					data.title = data.title .. " " .. singularize(data.pos)
				end
				if data.types.sufn then
					insert(data.subtitles, {"with", " ''m'' optionally → ''n'' in compounds"})
				elseif data.types.not_sufn then
					insert(data.subtitles, {"without", " ''m'' optionally → ''n'' in compounds"})
				end
				-- Record original title and subtitles for use in alternant title-constructing code.
				insert(declensions.orig_titles, data.title)
				if #data.subtitles > 0 then
					local subtitles = {}
					for _, subtitle in ipairs(data.subtitles) do
						if type(subtitle) == "table" then
							-- Occurs e.g. with ''idem'', ''quīdam''
							insert(subtitles, concat(subtitle))
						else
							insert(subtitles, subtitle)
						end
					end
					data.title = data.title .. " (" .. concat(subtitles, ", ") .. ")"
				end
				insert(declensions.subtitleses, data.subtitles)
			else
				if not (m_noun_decl or get_m_noun_decl()) then
					error("Unrecognized declension '" .. decl .. "'")
				end
				potential_lemma_slots = potential_noun_lemma_slots
				data = {
					subtitles = {},
					num = seg.num or "",
					loc = seg.loc,
					pos = pos,
					forms = {},
					types = seg.types,
					unattested = {},
					categories = {},
					notes = {},
				}
				(m_noun_decl or get_m_noun_decl())(data, seg.args)
				local apparent_decl = data.decl or decl
				parsed_run.propses.headword_decl = apparent_decl
				-- Construct title out of "original title" and subtitles.
				if not data.title then
					if decl == "irreg" and apparent_decl ~= decl and #data.subtitles == 0 then
						insert(data.subtitles, glossary_link("irregular"))
					end
					if declension_to_english then
						local english = declension_to_english
						data.title = "]"
					elseif apparent_decl == "irreg" then
						data.title = glossary_link("irregular")
					elseif apparent_decl == "indecl" or apparent_decl == "0" or apparent_decl == "sgpl" then
						data.title = glossary_link("indeclinable")
					else
						error("Internal error! Don't recognize noun declension " .. apparent_decl)
					end
					data.title = data.title .. " " .. singularize(data.pos)
				end
				if data.types.sufn then
					insert(data.subtitles, {"with", " ''m'' optionally → ''n'' in compounds"})
				elseif data.types.not_sufn then
					insert(data.subtitles, {"without", " ''m'' optionally → ''n'' in compounds"})
				end
				-- Record original title and subtitles for use in alternant title-constructing code.
				insert(declensions.orig_titles, data.title)
				if #data.subtitles > 0 then
					local subtitles = {}
					for _, subtitle in ipairs(data.subtitles) do
						if type(subtitle) == "table" then
							-- Occurs e.g. with 1st-declension ''-ābus'' ending where
							-- we want a common prefix to be extracted out if possible
							-- in the alternant title-generating code.
							insert(subtitles, concat(subtitle))
						else
							insert(subtitles, subtitle)
						end
					end
					data.title = data.title .. " (" .. concat(subtitles, ", ") .. ")"
				end
				insert(declensions.subtitleses, data.subtitles)
			end

			-- Generate linked variants of slots that may be the lemma.
			-- If the form is the same as the lemma (with links removed),
			-- substitute the original lemma (with links included).
			for _, slot in ipairs(potential_lemma_slots) do
				local forms = data.forms
				if forms then
					local linked_forms = {}
					if type(forms) ~= "table" then
						forms = {forms}
					end
					for _, form in ipairs(forms) do
						if form == seg.lemma then
							insert(linked_forms, seg.orig_lemma)
						else
							insert(linked_forms, form)
						end
					end
					data.forms = linked_forms
				end
			end

			if seg.types.lig then
				apply_ligatures(data.forms, is_adj)
			end

			if seg.types.sufn then
				apply_sufn(data.forms, is_adj)
			end

			propagate_number_restrictions(data.forms, seg.num, is_adj)

			for slot in iter_slots(is_adj) do
				-- 1. Select the forms to append to the existing ones.

				local new_forms
				if is_adj then
					if not seg.is_adj then
						error("Can't decline noun '" .. seg.lemma .. "' when overall term is an adjective")
					end
					new_forms = data.forms
					if not new_forms and slot:find("_$") then
						new_forms = data.forms$", "_m")]
					end
				elseif seg.is_adj then
					if not seg.gender then
						error("Declining modifying adjective " .. seg.lemma .. " but don't know gender of associated noun")
					end
					-- Select the appropriately gendered equivalent of the case/number
					-- combination. Some adjectives won't have feminine or neuter
					-- variants, though (e.g. 3-1 and 3-2 adjectives don't have a
					-- distinct feminine), so in that case select the masculine.
					new_forms = data.forms
						or data.forms
				else
					new_forms = data.forms
				end

				-- 2. Extract the new footnotes in the format we require, which is
				-- different from the format passed in by the declension functions.

				local new_notes = {}

				if type(new_forms) == "string" and data.notes then
					new_notes = {data.notes}
				elseif new_forms then
					for j = 1, #new_forms do
						if data.notes then
							new_notes = {data.notes}
						end
					end
				end

				-- 3. Append new forms and footnotes to the existing ones.
				new_forms = normalize_form(new_forms)
				if new_forms == nil then
					declensions.forms = nil
					declensions.notes = nil
				else
					declensions.forms, declensions.notes = append_form(
						declensions.forms, declensions.notes, new_forms,
						new_notes, slot:find("linked", nil, true) and seg.orig_prefix or seg.prefix)
				end
			end

			for slot, v in pairs(data.unattested) do
				if v then
					declensions.unattested = true
				end
			end

			if not seg.types.nocat and (is_adj or not seg.is_adj) then
				for _, cat in ipairs(data.categories) do
					insert_if_not(declensions.categories, cat)
				end
			end

			if data.footnote then
				insert(declensions.footnotes, data.footnote)
			end

			if seg.prefix ~= "" and seg.prefix ~= "-" and seg.prefix ~= " " then
				insert(declensions.title, glossary_link("indeclinable") .. " portion")
			end
			insert(declensions.title, data.title)
		elseif seg.alternants then
			local seg_declensions = nil
			local seg_titles = {}
			local seg_subtitleses = {}
			local seg_stems_seen = {}
			local seg_unattested = {}
			local seg_categories = {}
			local seg_footnotes = {}
			-- If all alternants have exactly one non-constant segment and all are
			-- of the same declension, we use special code that displays the
			-- differences in the subtitles. Otherwise we use more general code
			-- that displays the full title and subtitles of each segment,
			-- separating segment combined titles by "and" and the segment-run
			-- combined titles by "or".
			local title_the_hard_way = false
			local alternant_decl = nil
			local alternant_decl_title = nil
			for _, this_parsed_run in ipairs(seg.alternants) do
				local num_non_constant_segments = 0
				for _, segment in ipairs(this_parsed_run.segments) do
					if segment.decl then
						if not alternant_decl then
							alternant_decl = segment.decl
						elseif alternant_decl ~= segment.decl then
							title_the_hard_way = true
							num_non_constant_segments = 500
							break
						end
						num_non_constant_segments = num_non_constant_segments + 1
					end
				end
				if num_non_constant_segments ~= 1 then
					title_the_hard_way = true
					break
				end
			end
			if not title_the_hard_way then
				-- If using the special-purpose code, find the subtypes that are
				-- not present in a given alternant but are present in at least
				-- one other, and record "negative" variants of these subtypes
				-- so that the declension-construction code can record subtitles
				-- for these negative variants (so we can construct text like
				-- "i-stem or imparisyllabic non-i-stem").
				local subtypeses = {}
				for _, this_parsed_run in ipairs(seg.alternants) do
					for _, segment in ipairs(this_parsed_run.segments) do
						if segment.decl then
							insert(subtypeses, segment.types)
							insert_if_not(seg_stems_seen, segment.stem2)
						end
					end
				end
				local union = set_union(subtypeses)
				for _, this_parsed_run in ipairs(seg.alternants) do
					for _, segment in ipairs(this_parsed_run.segments) do
						if segment.decl then
							local neg_subtypes = set_difference(union, segment.types)
							for neg_subtype, _ in pairs(neg_subtypes) do
								segment.types = true
							end
						end
					end
				end
			end

			for _, this_parsed_run in ipairs(seg.alternants) do
				this_parsed_run.loc = seg.loc
				this_parsed_run.num = this_parsed_run.num or seg.num
				this_parsed_run.gender = this_parsed_run.gender or seg.gender
				local this_declensions = decline_segment_run(this_parsed_run, pos, is_adj)
				if this_declensions.noneut then
					declensions.noneut = true
				end
				if this_declensions.nomf then
					declensions.nomf = true
				end
				-- If there's a number restriction on the segment run, blank
				-- out the forms outside the restriction. This allows us to
				-- e.g. construct heteroclites that decline one way in the
				-- singular and a different way in the plural.
				if this_parsed_run.num == "sg" or this_parsed_run.num == "pl" then
					for slot in iter_slots(is_adj) do
						if this_parsed_run.num == "sg" and slot:find("pl", nil, true) or
							this_parsed_run.num == "pl" and slot:find("sg", nil, true) then
							this_declensions.forms = {}
							this_declensions.notes = nil
						end
					end
				end
				if not seg_declensions then
					seg_declensions = this_declensions
				else
					for slot in iter_slots(is_adj) do
						-- For a given slot, combine the existing and new forms.
						-- We do this by checking to see whether a new form is
						-- already present and not adding it if so; in the
						-- process, we keep a map from indices in the new forms
						-- to indices in the combined forms, for use in
						-- combining footnotes below.
						local curforms = seg_declensions.forms or {}
						local newforms = this_declensions.forms or {}
						local newform_index_to_new_index = {}
						for newj, form in ipairs(newforms) do
							local did_break = false
							for j = 1, #curforms do
								if curforms == form then
									newform_index_to_new_index = j
									did_break = true
									break
								end
							end
							if not did_break then
								insert(curforms, form)
								newform_index_to_new_index = #curforms
							end
						end
						seg_declensions.forms = curforms
						-- Now combine the footnotes. Keep in mind that
						-- each form may have its own set of footnotes, and
						-- in some cases we didn't add a form from the new
						-- list of forms because it already occurred in the
						-- existing list of forms; in that case, we combine
						-- footnotes from the two sources.
						local curnotes = seg_declensions.notes
						local newnotes = this_declensions.notes
						if newnotes then
							if not curnotes then
								curnotes = {}
							end
							for index, notes in pairs(newnotes) do
								local combined_index = newform_index_to_new_index
								if not curnotes then
									curnotes = notes
								else
									local combined = mw.clone(curnotes)
									for _, note in ipairs(newnotes) do
										insert_if_not(combined, note)
									end
									curnotes = combined
								end
							end
						end
					end
				end
				for slot, v in pairs(this_declensions.unattested) do
					if v then
						seg_unattested = true
					end
				end
				for _, cat in ipairs(this_declensions.categories) do
					insert_if_not(seg_categories, cat)
				end
				for _, footnote in ipairs(this_declensions.footnotes) do
					insert_if_not(seg_footnotes, footnote)
				end
				insert_if_not(seg_titles, this_declensions.title)
				for _, subtitles in ipairs(this_declensions.subtitleses) do
					insert(seg_subtitleses, subtitles)
				end
				if not alternant_decl_title then
					alternant_decl_title = this_declensions.orig_titles
				end
			end

			-- If overall run is singular, copy singular to plural, and
			-- vice-versa. See propagate_number_restrictions() for rationale;
			-- also, this should eliminate cases of empty forms, which will
			-- cause the overall set of forms for that slot to be empty.
			propagate_number_restrictions(seg_declensions.forms, parsed_run.num,
				is_adj)

			for slot in iter_slots(is_adj) do
				local new_forms = normalize_form(seg_declensions.forms)
				if new_forms == nil then
					declensions.forms = nil
					declensions.notes = nil
				else
					declensions.forms, declensions.notes = append_form(
						declensions.forms, declensions.notes,
						new_forms, seg_declensions.notes, nil)
				end
			end

			for slot, v in pairs(seg_unattested) do
				if v then
					declensions.unattested = true
				end
			end
			if is_adj or not seg.is_adj then
				for _, cat in ipairs(seg_categories) do
					insert_if_not(declensions.categories, cat)
				end
			end
			for _, footnote in ipairs(seg_footnotes) do
				insert_if_not(declensions.footnotes, footnote)
			end

			local title_to_insert
			if title_the_hard_way then
				title_to_insert = join_sentences(seg_titles, " or ")
			else
				-- Special-purpose title-generation code, for the common
				-- situation where each alternant has single-segment runs and
				-- all segments belong to the same declension.
				--
				-- 1. Find the initial subtitles common to all segments.
				local first_subtitles = seg_subtitleses
				local num_common_subtitles = #first_subtitles
				for j = 2, #seg_subtitleses do
					local this_subtitles = seg_subtitleses
					for k = 1, num_common_subtitles do
						if not deep_equals(first_subtitles, this_subtitles) then
							num_common_subtitles = k - 1
							break
						end
					end
				end
				-- 2. Construct the portion of the text based on the common subtitles.
				local common_subtitles = {}
				for j = 1, num_common_subtitles do
					if type(first_subtitles) == "table" then
						insert(common_subtitles, concat(first_subtitles))
					else
						insert(common_subtitles, first_subtitles)
					end
				end
				local common_subtitle_portion = concat(common_subtitles, ", ")
				local non_common_subtitle_portion
				-- 3. Special-case the situation where there's one non-common
				--    subtitle in each segment and a common prefix or suffix to
				--    all of them.
				local common_prefix, common_suffix
				for j = 1, #seg_subtitleses do
					local this_subtitles = seg_subtitleses
					if #this_subtitles ~= num_common_subtitles + 1 or
						type(this_subtitles) ~= "table" or
						#this_subtitles ~= 2 then
						break
					end
					if j == 1 then
						common_prefix = this_subtitles
						common_suffix = this_subtitles
					else
						local this_prefix = this_subtitles
						local this_suffix = this_subtitles
						if this_prefix ~= common_prefix then
							common_prefix = nil
						end
						if this_suffix ~= common_suffix then
							common_suffix = nil
						end
						if not common_prefix and not common_suffix then
							break
						end
					end
				end
				if common_prefix or common_suffix then
					if common_prefix and common_suffix then
						error("Something is wrong, first non-common subtitle is actually common to all segments")
					end
					if common_prefix then
						local non_common_parts = {}
						for j = 1, #seg_subtitleses do
							insert(non_common_parts, seg_subtitleses)
						end
						non_common_subtitle_portion = common_prefix .. concat(non_common_parts, " or ")
					else
						local non_common_parts = {}
						for j = 1, #seg_subtitleses do
							insert(non_common_parts, seg_subtitleses)
						end
						non_common_subtitle_portion = concat(non_common_parts, " or ") .. common_suffix
					end
				else
					-- 4. Join the subtitles that differ from segment to segment.
					--    Record whether there are any such differing subtitles.
					--    If some segments have differing subtitles and others don't,
					--    we use the text "otherwise" for the segments without
					--    differing subtitles.
					local saw_non_common_subtitles = false
					local non_common_subtitles = {}
					for j = 1, #seg_subtitleses do
						local this_subtitles = seg_subtitleses
						local this_non_common_subtitles = {}
						for k = num_common_subtitles + 1, #this_subtitles do
							if type(this_subtitles) == "table" then
								insert(this_non_common_subtitles, concat(this_subtitles))
							else
								insert(this_non_common_subtitles, this_subtitles)
							end
						end
						if #this_non_common_subtitles > 0 then
							insert(non_common_subtitles, concat(this_non_common_subtitles, ", "))
							saw_non_common_subtitles = true
						else
							insert(non_common_subtitles, "otherwise")
						end
					end
					non_common_subtitle_portion =
						saw_non_common_subtitles and concat(non_common_subtitles, " or ") or ""
				end
				-- 5. Combine the common and non-common subtitle portions.
				local subtitle_portions = {}
				if common_subtitle_portion ~= "" then
					insert(subtitle_portions, common_subtitle_portion)
				end
				if non_common_subtitle_portion ~= "" then
					insert(subtitle_portions, non_common_subtitle_portion)
				end
				if #seg_stems_seen > 1 then
					insert(subtitle_portions,
						(number_to_english or "" .. #seg_stems_seen) .. " different stems"
					)
				end
				local subtitle_portion = concat(subtitle_portions, "; ")
				if subtitle_portion ~= "" then
					title_to_insert = alternant_decl_title .. " (" .. subtitle_portion .. ")"
				else
					title_to_insert = alternant_decl_title
				end
			end
			-- Don't insert blank title (happens e.g. with "((ali))quis<irreg+>").
			if title_to_insert ~= "" then
				insert(declensions.title, title_to_insert)
			end
		else
			for slot in iter_slots(is_adj) do
				declensions.forms, declensions.notes = append_form(
					declensions.forms, declensions.notes,
					slot:find("linked", nil, true) and seg.orig_prefix or seg.prefix)
			end
			insert(declensions.title, glossary_link("indeclinable") .. " portion")
		end
	end

	-- First title is uppercase, remainder have an indefinite article, joined
	-- using "with".
	local titles = {}
	for i, title in ipairs(declensions.title) do
		if i == 1 then
			insert(titles, ucfirst(title))
		else
			insert(titles, add_indefinite_article(title))
		end
	end
	declensions.title = concat(titles, " with ")

	return declensions
end

local function construct_title(args_title, declensions_title, generate_type, parsed_run)
	if args_title then
		declensions_title = args_title:gsub("<1>", "]")
		declensions_title = declensions_title:gsub("<1&2>", "]/]")
		declensions_title = declensions_title:gsub("<2>", "]")
		declensions_title = declensions_title:gsub("<3>", "]")
		declensions_title = declensions_title:gsub("<4>", "]")
		declensions_title = declensions_title:gsub("<5>", "]")
		if generate_type == "headword" then
			declensions_title = lcfirst((declensions_title:gsub("%.$", "")))
		else
			declensions_title = ucfirst(declensions_title)
		end
	else
		local post_text_parts = {}
		if parsed_run.loc then
			insert(post_text_parts, ", with locative")
		end
		if parsed_run.num == "sg" then
			insert(post_text_parts, ", singular only")
		elseif parsed_run.num == "pl" then
			insert(post_text_parts, ", plural only")
		end

		local post_text = concat(post_text_parts)
		if generate_type == "headword" then
			declensions_title = lcfirst(declensions_title) .. post_text
		else
			declensions_title = ucfirst(declensions_title) .. post_text .. "."
		end
	end

	return declensions_title
end

function export.do_generate_noun_forms(parent_args, pos, generate_type, def)
	local params = {
		 = {required = true, default = def or "aqua<1>"},
		footnote = true,
		title = true,
		num = true,
		json = {type = "boolean"},
	}
	for slot in iter_noun_slots() do
		params = true
	end
	if generate_type == "headword" then
		local list = {list = true}
		local sublist = {sublist = "/"}
		params.lemma = list
		params.id = true
		params.cat = list
		params.m = sublist
		params.f = sublist
		params.g = list
		params.indecl = {type = "boolean"}
	end
	if pos == "numerals" then
		params = true
	end

	local args = process_params(parent_args, params)

	if args.title then
		track("overriding-title")
	end

	local parsed_run = parse_segment_run_allowing_alternants(args)
	parsed_run.loc = parsed_run.loc or not not (args.loc_sg or args.loc_pl)
	parsed_run.num = args.num or parsed_run.num

	local declensions = decline_segment_run(parsed_run, pos, false)

	if not parsed_run.loc then
		declensions.forms.loc_sg = nil
		declensions.forms.loc_pl = nil
	end

	declensions.title = construct_title(args.title, declensions.title, generate_type, parsed_run)

	local all_data = {
		title = declensions.title,
		footnotes = {},
		num = parsed_run.num or "",
		gender = parsed_run.gender,
		propses = parsed_run.propses,
		forms = declensions.forms,
		unattested = declensions.unattested,
		categories = declensions.categories,
		notes = {},
		user_specified = {},
		overriding_lemma = args.lemma,
		id = args.id,
		pos = pos,
		cat = args.cat,
		indecl = args.indecl,
		m = args.m,
		f = args.f,
		overriding_genders = args.g,
		num_type = args,
	}

	if generate_type ~= "bare" then
		all_data.accel = {}
	end

	if args.footnote then
		insert_if_not(all_data.footnotes, args.footnote)
	end
	for _, footnote in ipairs(declensions.footnotes) do
		insert_if_not(all_data.footnotes, footnote)
	end
		
	for slot in iter_noun_slots() do
		if declensions.notes then
			for index, notes in pairs(declensions.notes) do
				all_data.notes = notes
			end
		end
	end

	process_noun_forms_and_overrides(all_data, args, generate_type)

	if args.json then
		return require(json_module).toJSON(all_data)
	end
	return all_data
end

function export.do_generate_adj_forms(parent_args, pos, generate_type, degree, def)
	local boolean = {type = "boolean"}
	local params = {
		 = {required = true, default = def or "bonus"},
		footnote = true,
		title = true,
		num = true,
		noneut = boolean,
		nomf = boolean,
		json = boolean,
	}
	for slot in iter_adj_slots() do
		params = true
	end
	if generate_type == "headword" then
		local list = {list = true}
		local sublist = {sublist = "/"}
		params.lemma = list
		params.adv = sublist
		params.id = true
		params.cat = list
		params.indecl = boolean
		if degree == "comparative" or degree == "superlative" then
			params.positive = sublist
		end
		if degree ~= "comparative" then
			params.comp = sublist
		end
		if degree ~= "superlative" then
			params.sup = sublist
		end
	end
	if pos == "numerals" then
		params = true
	end

	local args = process_params(parent_args, params)

	if args.title then
		track("overriding-title")
	end
	
	local segment_run = args
	if not segment_run:match("") then
		-- If the segment run doesn't have any explicit declension specs or alternants,
		-- add a default declension spec of <+> to it (or <0+> for indeclinable
		-- adjectives). This allows the majority of adjectives to just specify
		-- the lemma.
		segment_run = segment_run .. (args.indecl and "<0+>" or "<+>")
	end
	local parsed_run = parse_segment_run_allowing_alternants(segment_run)
	parsed_run.loc = parsed_run.loc or not not (
		args.loc_sg_m or args.loc_sg_f or args.loc_sg_n or args.loc_pl_m or args.loc_pl_f or args.loc_pl_n
	)
	parsed_run.num = args.num or parsed_run.num

	local declensions = decline_segment_run(parsed_run, pos, true)

	if not parsed_run.loc then
		declensions.forms.loc_sg_m = nil
		declensions.forms.loc_sg_f = nil
		declensions.forms.loc_sg_n = nil
		declensions.forms.loc_pl_m = nil
		declensions.forms.loc_pl_f = nil
		declensions.forms.loc_pl_n = nil
	end

	declensions.title = construct_title(args.title, declensions.title, generate_type, parsed_run)

	local all_data = {
		title = declensions.title,
		footnotes = {},
		num = parsed_run.num or "",
		propses = parsed_run.propses,
		forms = declensions.forms,
		unattested = declensions.unattested,
		categories = declensions.categories,
		notes = {},
		user_specified = {},
		accel = {},
		loc = declensions.loc,
		noneut = args.noneut or declensions.noneut,
		nomf = args.nomf or declensions.nomf,
		overriding_lemma = args.lemma,
		positive = args.positive,
		comp = args.comp,
		sup = args.sup,
		adv = args.adv,
		id = args.id,
		pos = pos,
		cat = args.cat,
		indecl = args.indecl,
		num_type = args,
	}
	
	if generate_type ~= "bare" then
		all_data.accel = {}
	end

	if args.footnote then
		insert_if_not(all_data.footnotes, args.footnote)
	end
	for _, footnote in ipairs(declensions.footnotes) do
		insert_if_not(all_data.footnotes, footnote)
	end

	for slot in iter_adj_slots() do
		if declensions.notes then
			for index, notes in pairs(declensions.notes) do
				all_data.notes = notes
			end
		end
	end

	process_adj_forms_and_overrides(all_data, args, generate_type)

	if args.json then
		return require(json_module).toJSON(all_data)
	end
	return all_data
end

function export.show_noun(frame)
	local parent_args = frame:getParent().args
	local data = export.do_generate_noun_forms(parent_args, "nouns")
	if type(data) == "string" then -- JSON
		return data
	end

	show_forms(data, false)

	local num = data.num
	if num == "sg" then
		return make_noun_table_sg(data)
	elseif num == "pl" then
		return make_noun_table_pl(data)
	end
	return make_noun_table(data)
end

function export.show_adj(frame)
	local parent_args = frame:getParent().args
	local data = export.do_generate_adj_forms(parent_args, "adjectives")
	if type(data) == "string" then -- JSON
		return data
	end

	partial_show_forms(data, true)

	return make_adj_table(data)
end

return export
Module:la-nominal

Wikious

Boobota

Sagapedia