local export = {}
--[=[
Authorship: Ben Wing <benwing2>, with many ideas and a little code coming from
the old ] by KC Kenny Lau.
]=]
-- TODO:
-- (DONE) Eliminate specification of noteindex from la-adj/data
-- (DONE?) Finish autodetection of adjectives
-- (DONE) Remove old noun code
-- (DONE) Implement <.sufn>
-- (DONE) Look into adj voc=false
-- (DONE) Handle loc in adjectives
-- Error on bad subtypes
-- Make sure Google Books link still works.
-- (DONE) Make sure .sufn triggers insertion of 'with m optionally -> n in compounds' in title.
-- (DONE) Make sure title returned to la-adj lowercases the first letter even with a custom title.
--[=[
TERMINOLOGY:
-- "slot" = A particular case/number combination (for nouns) or
case/number/gender combination (for adjectives). Example slot names are
"abl_sg" (for noun) or "acc_pl_f" (for adjectives). Each slot is filled
with zero or more forms.
-- "form" = The declined Latin form representing the value of a given slot.
For example, rēge is a form, representing the value of the abl_sg slot of
the lemma rēx.
-- "lemma" = The dictionary form of a given Latin term. For nouns, it's
generally the nominative singular, but will be the nominative plural of
plurale tantum nouns (e.g. ]), and may occasionally be another
form (e.g. the genitive singular) if the nominative singular is missing.
For adjectives, it's generally the masculine nominative singular, but
will be the masculine nominative plural of plurale tantum adjectives
(e.g. ]).
-- "plurale tantum" (plural "pluralia tantum") = A noun or adjective that
exists only in the plural. Examples are castra "army camp", faucēs "throat",
and dēnī "ten each" (used for counting pluralia tantum nouns).
-- "singulare tantum" (plural "singularia tantum") = A noun or adjective that
exists only in the singular. Examples are geōlogia "geology" (and in
general most non-count nouns) and the adjective ūnus "one".
]=]
local debug_track_module = "Module:debug/track"
local en_utilities_module = "Module:en-utilities"
local headword_data_module = "Module:headword/data"
local json_module = "Module:JSON"
local la_adj_data_module = "Module:la-adj/data"
local la_adj_table_module = "Module:la-adj/table"
local la_noun_data_module = "Module:la-noun/data"
local la_noun_table_module = "Module:la-noun/table"
local la_utilities_module = "Module:la-utilities"
local languages_module = "Module:languages"
local links_module = "Module:links"
local load_module = "Module:load"
local parameters_module = "Module:parameters"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local concat = table.concat
local insert = table.insert
local iter_adj_slots -- defined below
local iter_noun_slots -- defined below
local umatch = mw.ustring.match
local function add_indefinite_article(...)
add_indefinite_article = require(en_utilities_module).add_indefinite_article
return add_indefinite_article(...)
end
local function contains(...)
contains = require(table_module).contains
return contains(...)
end
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
local function deep_copy(...)
deep_copy = require(table_module).deepCopy
return deep_copy(...)
end
local function deep_equals(...)
deep_equals = require(table_module).deepEquals
return deep_equals(...)
end
local function full_link(...)
full_link = require(links_module).full_link
return full_link(...)
end
local function insert_if_not(...)
insert_if_not = require(table_module).insertIfNot
return insert_if_not(...)
end
local function lcfirst(...)
lcfirst = require(string_utilities_module).lcfirst
return lcfirst(...)
end
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end
local function make_adj_table(...)
make_adj_table = require(la_adj_table_module).make_table
return make_adj_table(...)
end
local function make_noun_table(...)
make_noun_table = require(la_noun_table_module).make_table
return make_noun_table(...)
end
local function make_noun_table_sg(...)
make_noun_table_sg = require(la_noun_table_module).make_table_sg
return make_noun_table_sg(...)
end
local function make_noun_table_pl(...)
make_noun_table_pl = require(la_noun_table_module).make_table_pl
return make_noun_table_pl(...)
end
local function make_stem2(...)
make_stem2 = require(la_utilities_module).make_stem2
return make_stem2(...)
end
local function normalize_form(...)
normalize_form = require(la_utilities_module).normalize_form
return normalize_form(...)
end
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end
local function remove_links(...)
remove_links = require(links_module).remove_links
return remove_links(...)
end
local function singularize(...)
singularize = require(en_utilities_module).singularize
return singularize(...)
end
local function split(...)
split = require(string_utilities_module).split
return split(...)
end
local function ucfirst(...)
ucfirst = require(string_utilities_module).ucfirst
return ucfirst(...)
end
local m_adj_decl
local function get_m_adj_decl()
m_adj_decl, get_m_adj_decl = require(la_adj_data_module), nil
return m_adj_decl
end
local m_noun_decl
local function get_m_noun_decl()
m_noun_decl, get_m_noun_decl = require(la_noun_data_module), nil
return m_noun_decl
end
local lang
local function get_lang()
lang, get_lang = require(languages_module).getByCode("la")
return lang
end
local namespace
local function get_namespace()
namespace, get_namespace = load_data(headword_data_module).page.namespace, nil
return namespace
end
local pagename
local function get_pagename()
pagename, get_pagename = load_data(headword_data_module).pagename, nil
return pagename
end
local ligatures = {
= 'Æ',
= 'æ',
= 'Œ',
= 'œ',
}
local cases = {
"nom", "gen", "acc", "dat", "abl", "voc", "loc"
}
local cases_n = #cases
local nums = {
"sg", "pl"
}
local nums_n = #nums
local genders = {
"m", "f", "n"
}
local genders_n = #genders
local declension_to_english = setmetatable({
= "first",
= "first and second",
= "second",
= "third",
= "fourth",
= "fifth",
}, {
__index = function(t, k)
return rawget(t, k:match("^*"))
end
})
local number_to_english = {
"one", "two", "three", "four", "five"
}
local linked_prefixes = {
"", "linked_"
}
function export.iter_potential_noun_lemma_slots()
local num, case = 1, 0
return function()
case = case + 1
if case > 3 then
case = 1
num = num + 1
if num > nums_n then
return nil
end
end
return cases .. "_" .. nums
end
end
local potential_noun_lemma_slots = {}
for slot in export.iter_potential_noun_lemma_slots() do
insert(potential_noun_lemma_slots, slot)
end
local linked_to_non_linked_noun_slots = {}
for _, slot in ipairs(potential_noun_lemma_slots) do
linked_to_non_linked_noun_slots = slot
end
-- Iterate over all the "slots" associated with a noun declension, where a slot
-- is a particular case/number combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg, linked_nom_pl), which aren't overridable.
function export.iter_noun_slots(overridable_only)
local case, num, linked_variant = 1, 1, 0
return function()
linked_variant = linked_variant + 1
local max_linked_variant = (overridable_only or case > 3) and 1 or 2
if linked_variant > max_linked_variant then
linked_variant = 1
num = num + 1
if num > nums_n then
num = 1
case = case + 1
if case > cases_n then
return nil
end
end
end
return linked_prefixes .. cases .. "_" .. nums
end
end
iter_noun_slots = export.iter_noun_slots
function export.iter_potential_adj_lemma_slots()
local num, case, gen = 1, 1, 0
return function()
gen = gen + 1
if gen > genders_n then
gen = 1
case = case + 1
if case > 3 then
case = 1
num = num + 1
if num > nums_n then
return nil
end
end
end
return cases .. "_" .. nums .. "_" .. genders
end
end
-- List of adjective slots for which we generate linked variants. Include
-- feminine and neuter variants because they will be needed if the adjective
-- is part of a multiword feminine or neuter noun.
local potential_adj_lemma_slots = {}
for slot in export.iter_potential_adj_lemma_slots() do
insert(potential_adj_lemma_slots, slot)
end
local linked_to_non_linked_adj_slots = {}
for _, slot in ipairs(potential_adj_lemma_slots) do
linked_to_non_linked_adj_slots = slot
end
-- Iterate over all the "slots" associated with an adjective declension, where a slot
-- is a particular case/number/gender combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg_m, linked_nom_pl_m, etc.), which aren't overridable.
function export.iter_adj_slots(overridable_only)
local case, num, gen, linked_variant = 1, 1, 1, 0
return function()
linked_variant = linked_variant + 1
local max_linked_variant = (overridable_only or case > 3) and 1 or 2
if linked_variant > max_linked_variant then
linked_variant = 1
gen = gen + 1
if gen > genders_n then
gen = 1
num = num + 1
if num > nums_n then
num = 1
case = case + 1
if case > cases_n then
return nil
end
end
end
end
return linked_prefixes .. cases .. "_" .. nums .. "_" .. genders
end
end
iter_adj_slots = export.iter_adj_slots
-- Iterate over all the "slots" associated with a noun or adjective declension (depending on
-- the value of IS_ADJ), where a slot is a particular case/number combination (in the case of
-- nouns) or case/number/gender combination (in the case of adjectives). If OVERRIDABLE_ONLY
-- is specified, only include overridable slots (not including linked_ variants).
local function iter_slots(is_adj, overridable_only)
if is_adj then
return iter_adj_slots(overridable_only)
end
return iter_noun_slots(overridable_only)
end
local function concat_forms_in_slot(forms)
if forms and forms ~= "" and forms ~= "—" and #forms > 0 then
local new_vals = {}
for _, v in ipairs(forms) do
insert(new_vals, (v:gsub("|", "<!>")))
end
return concat(new_vals, ",")
end
end
local function glossary_link(anchor, text)
text = text or anchor
return "]"
end
local function track(page)
debug_track("la-nominal/" .. page)
return true
end
local function set_union(sets)
local union = {}
for _, set in ipairs(sets) do
for key, _ in pairs(set) do
union = true
end
end
return union
end
local function set_difference(set1, set2)
local diff = {}
for key, _ in pairs(set1) do
if not set2 then
diff = true
end
end
return diff
end
-- If a form is set as '*', that means it is unattested
-- but should still be generated
-- TODO: handle asterisks in forms stored in the data
local function unattested_forms(data, args, is_adj)
for slot in iter_slots(is_adj) do
local arg = args
if arg ~= nil then
arg = arg:match("^*(.*)")
if arg then
data.unattested = true
args = arg ~= "" and arg or nil
end
end
end
end
-- Make a link only if the form is attested
local function link_if_attested(form, accel, is_unattested)
local data = {lang = lang or get_lang()}
if is_unattested then
data.alt = "*" .. form
else
data.term = form
data.accel = accel
end
return full_link(data)
end
local function process_form(slot, data, args, linked_to_non_linked)
local forms = data.forms
-- If nomf=1 passed, clear out all masculine and feminine forms.
if data.nomf and slot:match("%f%f") then
forms = nil
end
-- If noneut=1 passed, clear out all neuter forms.
if data.noneut and slot:match("%fn%f") then
forms = nil
end
local val
if args then
val = args
data.user_specified = true
else
-- Overridding nom_sg/nom_sg_m etc. should override linked_nom_sg
-- so that the correct value gets displayed in the headword, which
-- uses linked_nom_sg.
local non_linked_equiv_slot = linked_to_non_linked
if non_linked_equiv_slot and args then
val = args
data.user_specified = true
else
val = forms
end
end
if val then
if type(val) == "string" then
val = split(val, "/", true, true)
end
local num = data.num
if (
(num == "pl" and slot:find("sg", nil, true)) or
(num == "sg" and slot:find("pl", nil, true))
) then
forms = nil
elseif val == "" or val == "-" or val == "—" then
forms = "—"
if val then
error("Cannot specify additional forms for " .. slot .. ' if it has been cancelled with "-"')
end
else
forms = val
end
end
end
local function process_noun_forms_and_overrides(data, args, generate_type)
local redlink = false
unattested_forms(data, args);
-- Process overrides and canonicalize forms.
for slot in iter_noun_slots() do
process_form(slot, data, args, linked_to_non_linked_noun_slots)
end
-- No accel forms or red link checking if generate_type == "bare".
if generate_type == "bare" then
return
end
-- Compute the lemma for accelerators. Do this after processing
-- overrides in case we overrode the lemma form(s).
local accel_lemma
if data.num and data.num ~= "" then
accel_lemma = data.forms
else
accel_lemma = data.forms
end
if type(accel_lemma) == "table" then
accel_lemma = accel_lemma
end
-- Set the accelerators, and determine if there are red links.
for slot in iter_noun_slots() do
local val = data.forms
if val and val ~= "" and val ~= "—" and #val > 0 then
for _, form in ipairs(val) do
local accel_form = slot
accel_form = accel_form:gsub("_()$", "|%1")
data.accel = {form = accel_form, lemma = accel_lemma}
if not redlink and namespace == "" then
local title = ((lang or get_lang()):makeEntryName(form))
local t = mw.title.new(title)
if t and not t.exists then
insert(data.categories, "Latin " .. data.pos .. " with red links in their inflection tables")
redlink = true
end
end
end
end
end
end
local function process_adj_forms_and_overrides(data, args, generate_type)
local redlink = false
unattested_forms(data, args, true)
-- Process overrides and canonicalize forms.
for slot in iter_adj_slots() do
process_form(slot, data, args, linked_to_non_linked_adj_slots)
end
-- See if the masculine and feminine/neuter are the same across all slots.
-- If so, blank out the feminine/neuter so we use a table that combines
-- masculine and feminine, or masculine/feminine/neuter.
for _, gender in ipairs({"f", "n"}) do
local other_is_masc = true
for _, case in ipairs(cases) do
for _, num in ipairs(nums) do
if not deep_equals(data.forms,
data.forms) then
other_is_masc = false
break
end
end
if not other_is_masc then
break
end
end
if other_is_masc then
for _, case in ipairs(cases) do
for _, num in ipairs(nums) do
data.forms = nil
end
end
end
end
-- No accel forms or red link checking if generate_type == "bare".
if generate_type == "bare" then
return
end
-- Compute the lemma for accelerators. Do this after processing
-- overrides in case we overrode the lemma form(s).
local accel_lemma, accel_lemma_f
if data.num and data.num ~= "" then
accel_lemma = data.forms
accel_lemma_f = data.forms
else
accel_lemma = data.forms
accel_lemma_f = data.forms
end
if type(accel_lemma) == "table" then
accel_lemma = accel_lemma
end
if type(accel_lemma_f) == "table" then
accel_lemma_f = accel_lemma_f
end
-- Set the accelerators, and determine if there are red links.
for slot in iter_adj_slots() do
local val = data.forms
if val and val ~= "" and val ~= "—" and #val > 0 then
for _, form in ipairs(val) do
local accel_form = slot
accel_form = accel_form:gsub("_()_", "|%1|")
if data.noneut then
-- If noneut=1, we're being asked to do a noun like
-- Aquītānus or Rōmānus that has masculine and feminine
-- variants, not an adjective. In that case, make the
-- accelerators correspond to nominal case/number forms
-- without the gender, and use the feminine as the
-- lemma for feminine forms.
if slot:find("_f", nil, true) then
data.accel = {form = accel_form:gsub("|f$", ""), lemma = accel_lemma_f}
else
data.accel = {form = accel_form:gsub("|m$", ""), lemma = accel_lemma}
end
else
if not data.forms.nom_sg_n and not data.forms.nom_pl_n then
-- use multipart tags if called for
accel_form = accel_form:gsub("|m$", "|m//f//n")
elseif not data.forms.nom_sg_f and not data.forms.nom_pl_f then
accel_form = accel_form:gsub("|m$", "|m//f")
end
-- use the order nom|m|s, which is more standard than nom|s|m
accel_form = accel_form:gsub("|(.-)|(.-)$", "|%2|%1")
data.accel = {form = accel_form, lemma = accel_lemma}
end
if not redlink and namespace == "" then
local title = ((lang or get_lang()):makeEntryName(form))
local t = mw.title.new(title)
if t and not t.exists then
insert(data.categories, "Latin " .. data.pos .. " with red links in their inflection tables")
redlink = true
end
end
end
end
end
end
-- Convert data.forms for all slots into displayable text. This is
-- an older function, still currently used for nouns but not for adjectives.
-- For adjectives, the adjective table module has special code to combine
-- adjacent slots, and needs the original forms plus other text that will
-- go into the displayable text for the slot; this is handled below by
-- partial_show_forms() and finish_show_form().
local function show_forms(data, is_adj)
local noteindex = 1
local notes = {}
local seen_notes = {}
for slot in iter_slots(is_adj) do
local val = data.forms
if val and val ~= "" and val ~= "—" then
for i, form in ipairs(val) do
local link = link_if_attested(form, data.accel, data.unattested)
local this_notes = data.notes
if this_notes and not data.user_specified then
if type(this_notes) == "string" then
this_notes = {this_notes}
end
local link_indices = {}
for _, this_note in ipairs(this_notes) do
local this_noteindex = seen_notes
if not this_noteindex then
-- Generate a footnote index.
this_noteindex = noteindex
noteindex = noteindex + 1
insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
seen_notes = this_noteindex
end
insert_if_not(link_indices, this_noteindex)
end
val = link .. '<sup style="color: red">' .. concat(link_indices, ",") .. '</sup>'
else
val = link
end
end
-- FIXME, do we want this difference?
data.forms = concat(val, is_adj and ", " or "<br />")
end
end
for _, footnote in ipairs(data.footnotes) do
insert(notes, footnote)
end
data.footnotes = concat(notes, "<br />")
end
-- Generate the display form for a set of slots with identical content. We
-- verify that the slots are actually identical, and throw an assertion error
-- if not. The display form is as in show_forms() but combines together all the
-- accelerator forms for all the slots.
local function finish_show_form(data, slots, is_adj)
assert(#slots > 0)
local slot1 = slots
local forms = data.forms
local notetext = data.notetext
for _, slot in ipairs(slots) do
if not deep_equals(data.forms, forms) then
error("data.forms = " .. (concat_forms_in_slot(forms) or "nil") ..
", but data.forms = " .. (concat_forms_in_slot(data.forms) or "nil"))
end
assert(deep_equals(data.notetext, notetext))
end
if not forms then
return "—"
else
local accel_forms = {}
local accel_lemma = data.accel.lemma
for _, slot in ipairs(slots) do
assert(data.accel.lemma == accel_lemma)
insert(accel_forms, data.accel.form)
end
local combined_accel_form = concat(accel_forms, "|;|")
local accel = {form = combined_accel_form, lemma = accel_lemma}
local formtext = {}
for i, form in ipairs(forms) do
insert(formtext, link_if_attested(form, accel, data.unattested) .. notetext)
end
-- FIXME, do we want this difference?
return concat(formtext, is_adj and ", " or "<br />")
end
end
-- Used by the adjective table module. This does some of the work of
-- show_forms(); in particular, it converts all empty forms of any format
-- (nil, "", "—") to nil and, if the forms aren't empty, generates the footnote
-- text associated with each form.
local function partial_show_forms(data, is_adj)
local noteindex = 1
local notes = {}
local seen_notes = {}
data.notetext = {}
-- Store this function in DATA so that it can be called from the adjective
-- table module without needing to require this module, which will (or
-- could) lead to recursive module requiring.
data.finish_show_form = finish_show_form
for slot in iter_slots(is_adj) do
local val = data.forms
if not val or val == "" or val == "—" then
data.forms = nil
else
local notetext = {}
for i in ipairs(val) do
local this_notes = data.notes
if this_notes and not data.user_specified then
if type(this_notes) == "string" then
this_notes = {this_notes}
end
local link_indices = {}
for _, this_note in ipairs(this_notes) do
local this_noteindex = seen_notes
if not this_noteindex then
-- Generate a footnote index.
this_noteindex = noteindex
noteindex = noteindex + 1
insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
seen_notes = this_noteindex
end
insert_if_not(link_indices, this_noteindex)
end
insert(notetext, '<sup style="color: red">' .. concat(link_indices, ",") .. '</sup>')
else
insert(notetext, "")
end
end
data.notetext = notetext
end
end
for _, footnote in ipairs(data.footnotes) do
insert(notes, footnote)
end
data.footnotes = concat(notes, "<br />")
end
-- Given an ending (or possibly a full regex matching the entire lemma, if
-- a regex group is present), return the base minus the ending, or nil if
-- the ending doesn't match.
local function extract_base(lemma, ending)
if ending:find("(", nil, true) then
return umatch(lemma, ending)
end
return umatch(lemma, "^(.*)" .. ending .. "$")
end
-- Given ENDINGS_AND_SUBTYPES (a list of pairs of endings with associated
-- subtypes, where each pair consists of a single ending spec and a list of
-- subtypes), check each ending in turn against LEMMA. If it matches, return
-- the pair BASE, STEM2, SUBTYPES where BASE is the remainder of LEMMA minus
-- the ending, STEM2 is as passed in, and SUBTYPES is the subtypes associated
-- with the ending. But don't return SUBTYPES if any of the subtypes in the
-- list is specifically canceled in SPECIFIED_SUBTYPES (a set, i.e. a table
-- where the keys are strings and the value is always true); instead, consider
-- the next ending in turn. If no endings match, throw an error if DECLTYPE is
-- non-nil, mentioning the DECLTYPE (the user-specified declension); but if
-- DECLTYPE is nil, just return nil, nil, nil.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
-- the beginning with ^ and at the end with $), and contains a single
-- capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
-- SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
-- STEM2_ENDING is a string specifying the corresponding ending that must
-- be present in STEM2. If this form is used, the combination of
-- base + STEM2_ENDING must exactly match STEM2 in order for this entry
-- to be considered a match. An example is {"is", ""}, which will match
-- lemma == "follis", stem2 == "foll", but not lemma == "lapis",
-- stem2 == "lapid".
local function get_noun_subtype_by_ending(lemma, stem2, decltype, specified_subtypes,
endings_and_subtypes)
for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
local ending = ending_and_subtypes
local subtypes = ending_and_subtypes
local not_this_subtype = false
if (
specified_subtypes.pl and not contains(subtypes, "pl") or
contains(subtypes, "both") and not specified_subtypes.both
) then
-- We now require that plurale tantum terms specify a plural-form lemma.
-- The autodetected subtypes will include 'pl' for such lemmas; if not,
-- we fail this entry. Additionally, if the rule contains 'both', it
-- must be explicitly specified to match.
not_this_subtype = true
else
for _, subtype in ipairs(subtypes) do
-- A subtype is directly canceled by specifying -SUBTYPE.
-- In addition, M or F as a subtype is canceled by N, and
-- vice-versa, but M doesn't cancel F or vice-versa; instead,
-- we simply ignore the conflicting gender specification when
-- constructing the combination of specified and inferred subtypes.
-- The reason for this is that neuters have distinct declensions
-- from masculines and feminines, but masculines and feminines have
-- the same declension, and various nouns in Latin that are
-- normally masculine are exceptionally feminine and vice-versa
-- (nauta, agricola, fraxinus, malus "apple tree", manus, rēs,
-- etc.).
--
-- In addition, sg as a subtype is canceled by pl and vice-versa.
-- It's also possible to specify both, which will override sg but
-- not cancel it (in the sense that it won't prevent the relevant
-- rule from matching). For example, there's a rule specifying that
-- lemmas beginning with a capital letter and ending in -ius take
-- the ius.voci.sg subtypes. Specifying such a lemma with the
-- subtype both will result in the ius.voci.both subtypes, whereas
-- specifying such a lemma with the subtype pl will cause this rule
-- not to match, and it will fall through to a less specific rule
-- that returns just the ius subtype, which will be combined with
-- the explicitly specified pl subtype to produce ius.pl.
if specified_subtypes or
subtype == "N" and (specified_subtypes.M or specified_subtypes.F) or
(subtype == "M" or subtype == "F") and specified_subtypes.N or
subtype == "sg" and specified_subtypes.pl or
subtype == "pl" and specified_subtypes.sg then
not_this_subtype = true
break
end
end
end
if not not_this_subtype then
if type(ending) == "table" then
local lemma_ending = ending
local stem2_ending = ending
local base = extract_base(lemma, lemma_ending)
if base and base .. stem2_ending == stem2 then
return base, stem2, subtypes
end
else
local base = extract_base(lemma, ending)
if base then
return base, stem2, subtypes
end
end
end
end
if decltype then
error("Unrecognized ending for declension-" .. decltype .. " noun: " .. lemma)
end
return nil, nil, nil
end
-- Autodetect the subtype of a noun given all the information specified by the
-- user: lemma, stem2, declension type and specified subtypes. Three values are
-- returned: the lemma base (i.e. the stem of the lemma, as required by the
-- declension functions), the new stem2 and the autodetected subtypes. Note
-- that this will not detect a given subtype if the explicitly specified
-- subtypes are incompatible (i.e. if -SUBTYPE is specified for any subtype
-- that would be returned; or if M or F is specified when N would be returned,
-- and vice-versa; or if pl is specified when sg would be returned, and
-- vice-versa).
--
-- NOTE: This function has intimate knowledge of the way that the declension
-- functions handle subtypes, particularly for the third declension.
local function detect_noun_subtype(lemma, stem2, typ, subtypes)
local base, _
if typ == "1" then
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"ām", {"F", "am"}},
{"ās", {"M", "Greek", "Ma"}},
{"ēs", {"M", "Greek", "Me"}},
{"ē", {"F", "Greek"}},
{"ae", {"F", "pl"}},
{"a", {"F"}},
})
elseif typ == "2" then
local detected_subtypes
lemma, stem2, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(.*r)$", {"M", "er"}},
{"^(.*v)os$", {"M", "vos"}},
{"^(.*v)om$", {"N", "vom"}},
-- If the lemma ends in -os and the user said N or -M, then the
-- following won't apply, and the second (neuter) -os will applly.
{"os", {"M", "Greek"}},
{"os", {"N", "Greek", "us"}},
{"on", {"N", "Greek"}},
-- -ius beginning with a capital letter is assumed a proper name,
-- and takes the voci subtype (vocative in -ī) along with the ius
-- subtype and sg-only. Other nouns in -ius just take the ius
-- subtype. Explicitly specify "sg" so that if .pl is given,
-- this rule won't apply.
{"^(%u.*)ius$", {"M", "ius", "voci", "sg"}},
{"ius", {"M", "ius"}},
{"ium", {"N", "ium"}},
-- If the lemma ends in -us and the user said N or -M, then the
-- following won't apply, and the second (neuter) -us will applly.
{"us", {"M"}},
{"us", {"N", "us"}},
{"um", {"N"}},
{"iī", {"M", "ius", "pl"}},
{"ia", {"N", "ium", "pl"}},
-- If the lemma ends in -ī and the user said N or -M, then the
-- following won't apply, and the second (neuter) -ī will applly.
{"ī", {"M", "pl"}},
{"ī", {"N", "us", "pl"}},
{"oe", {"M", "Greek", "pl"}},
{"a", {"N", "pl"}},
})
stem2 = stem2 or lemma
return lemma, stem2, detected_subtypes
elseif typ == "3" then
if subtypes.pl then
if subtypes.Greek then
base = lemma:match("^(.*)erēs$")
if base then
return base .. "ēr", base .. "er", {"er"}
end
base = lemma:match("^(.*)ontēs$")
if base then
return base .. "ōn", base .. "ont", {"on"}
end
base = lemma:match("^(.*)es$")
if base then
return "foo", stem2 or base, {}
end
error("Unrecognized ending for declension-3 plural Greek noun: " .. lemma)
end
base = lemma:match("^(.*)ia$")
if base then
return "foo", stem2 or base, {"N", "I", "pure"}
end
base = lemma:match("^(.*)a$")
if base then
return "foo", stem2 or base, {"N"}
end
base = lemma:match("^(.*)ēs$")
if base then
return "foo", stem2 or base, {}
end
error("Unrecognized ending for declension-3 plural noun: " .. lemma)
end
stem2 = stem2 or make_stem2(lemma)
local detected_subtypes
if subtypes.Greek then
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"is", ""}, {"I"}},
{"ēr", {"er"}},
{"ōn", {"on"}},
})
if base then
return lemma, stem2, detected_subtypes
end
return lemma, stem2, {}
end
if not subtypes.N then
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"^(%u.*pol)is$", ""}, {"F", "polis", "sg", "loc"}},
{{"tūdō", "tūdin"}, {"F"}},
{{"tās", "tāt"}, {"F"}},
{{"tūs", "tūt"}, {"F"}},
{{"tiō", "tiōn"}, {"F"}},
{{"siō", "siōn"}, {"F"}},
{{"xiō", "xiōn"}, {"F"}},
{{"gō", "gin"}, {"F"}},
{{"or", "ōr"}, {"M"}},
{{"trx", "trīc"}, {"F"}},
{{"is", ""}, {"I"}},
{{"^(%l.*)ēs$", ""}, {"I"}},
})
if base then
return lemma, stem2, detected_subtypes
end
end
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"us", "or"}, {"N"}},
{{"us", "er"}, {"N"}},
{{"ma", "mat"}, {"N"}},
{{"men", "min"}, {"N"}},
{{"^(%u.*)e$", ""}, {"N", "sg"}},
{{"e", ""}, {"N", "I", "pure"}},
{{"al", "āl"}, {"N", "I", "pure"}},
{{"ar", "ār"}, {"N", "I", "pure"}},
})
if base then
return lemma, stem2, detected_subtypes
end
return lemma, stem2, {}
elseif typ == "4" then
if subtypes.echo or subtypes.Callisto then
base = lemma:match("^(.*)ō$")
if not base then
error("Declension-4 noun of subtype .echo or .Callisto should end in -ō: " .. lemma)
end
if subtypes.Callisto then
return base, nil, {"F", "sg"}
else
return base, nil, {"F"}
end
end
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"us", {"M"}},
{"ū̆", {"N"}},
{"ūs", {"M", "pl"}},
{"ua", {"N", "pl"}},
})
elseif typ == "5" then
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"iēs", {"F", "i"}},
{"iēs", {"F", "i", "pl"}},
{"ēs", {"F"}},
{"ēs", {"F", "pl"}},
})
elseif typ == "sgpl" then
return lemma, stem2, {}
elseif typ == "irreg" and lemma == "domus" then
-- ] auto-sets data.loc = true, but we need to know this
-- before declining the noun so we can propagate it to other segments.
return lemma, nil, {"loc"}
elseif typ == "indecl" or typ == "irreg" and (
lemma == "Deus" or umatch(lemma, "^ēss$") or
lemma == "Athōs" or lemma == "vēnum"
) then
-- Indeclinable nouns, and certain irregular nouns, set data.num = "sg",
-- but we need to know this before declining the noun so we can
-- propagate it to other segments.
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"", {"both"}},
{"", {"sg"}},
{"", {"pl"}},
})
else
return lemma, nil, {}
end
end
-- Given ENDINGS_AND_SUBTYPES (a list of four-tuples of ENDING, RETTYPE,
-- SUBTYPES, PROCESS_RETVAL), check each ENDING in turn against LEMMA and
-- STEM2. If it matches, return a four-tuple BASE, STEM2, RETTYPE, NEW_SUBTYPES
-- where BASE is normally the remainder of LEMMA minus the ending, STEM2 is
-- as passed in, RETTYPE is as passed in, and NEW_SUBTYPES is the same as
-- SUBTYPES minus any subtypes beginning with a hyphen. If no endings match,
-- throw an error if DECLTYPPE is non-nil, mentioning the DECLTYPE
-- (user-specified declension); but if DECLTYPE is nil, just return the tuple
-- nil, nil, nil, nil.
--
-- In order for a given entry to match, ENDING must match and also the subtypes
-- in SUBTYPES (a list) must not be incompatible with the passed-in
-- user-specified subtypes SPECIFIED_SUBTYPES (a set, i.e. a table where the
-- keys are strings and the value is always true). "Incompatible" means that
-- a given SUBTYPE is specified in either one and -SUBTYPE in the other, or
-- that "pl" is found in SPECIFIED_SUBTYPES and not in SUBTYPES.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
-- the beginning with ^ and at the end with $), and contains a single
-- capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
-- SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
-- STEM2_ENDING is a string specifying the corresponding ending that must
-- be present in STEM2. If this form is used, the combination of
-- base + STEM2_ENDING must exactly match STEM2 in order for this entry
-- to be considered a match. An example is {"is", ""}, which will match
-- lemma == "follis", stem2 == "foll", but not lemma == "lapis",
-- stem2 == "lapid".
--
-- If PROCESS_STEM2 is given and the returned STEM2 would be nil, call
-- process_stem2(BASE) to get the STEM2 to return.
local function get_adj_type_and_subtype_by_ending(lemma, stem2, decltype,
specified_subtypes, endings_and_subtypes, process_stem2)
for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
local ending = ending_and_subtypes
local rettype = ending_and_subtypes
local subtypes = ending_and_subtypes
local process_retval = ending_and_subtypes
local not_this_subtype = false
if (
specified_subtypes.pl and not contains(subtypes, "pl") or
contains(subtypes, "both") and not specified_subtypes.both
) then
-- We now require that plurale tantum terms specify a plural-form lemma.
-- The autodetected subtypes will include 'pl' for such lemmas; if not,
-- we fail this entry. Additionally, if the rule contains 'both', it
-- must be explicitly specified to match.
not_this_subtype = true
else
for _, subtype in ipairs(subtypes) do
-- A subtype is directly canceled by specifying -SUBTYPE.
if specified_subtypes then
not_this_subtype = true
break
end
-- A subtype is canceled if the user specified SUBTYPE and
-- -SUBTYPE is given in the to-be-returned subtypes.
local must_not_be_present = subtype:match("^%-(.*)$")
if must_not_be_present and specified_subtypes then
not_this_subtype = true
break
end
end
end
if not not_this_subtype then
local base
if type(ending) == "table" then
local lemma_ending = ending
local stem2_ending = ending
base = extract_base(lemma, lemma_ending)
if base and base .. stem2_ending ~= stem2 then
base = nil
end
else
base = extract_base(lemma, ending)
end
if base then
-- Remove subtypes of the form -SUBTYPE from the subtypes
-- to be returned.
local new_subtypes = {}
for _, subtype in ipairs(subtypes) do
if subtype:sub(1, 1) ~= "-" then
insert(new_subtypes, subtype)
end
end
if process_retval then
base, stem2 = process_retval(base, stem2)
end
if process_stem2 then
stem2 = stem2 or process_stem2(base)
end
return base, stem2, rettype, new_subtypes
end
end
end
if not decltype then
return nil, nil, nil, nil
elseif decltype == "" then
error("Unrecognized ending for adjective: " .. lemma)
else
error("Unrecognized ending for declension-" .. decltype .. " adjective: " .. lemma)
end
end
-- Autodetect the type and subtype of an adjective given all the information
-- specified by the user: lemma, stem2, declension type and specified subtypes.
-- Four values are returned: the lemma base (i.e. the stem of the lemma, as
-- required by the declension functions), the value of stem2 to pass to the
-- declension function, the declension type and the autodetected subtypes.
-- Note that this will not detect a given subtype if -SUBTYPE is specified for
-- any subtype that would be returned, or if SUBTYPE is specified and -SUBTYPE
-- is among the subtypes that would be returned (such subtypes are filtered out
-- of the returned subtypes).
local function detect_adj_type_and_subtype(lemma, stem2, typ, subtypes)
-- FIXME: not clear why "foo" is in production code.
local function base_as_stem2(base, stem2)
return "foo", base
end
local function constant_base(baseval)
return function(base, stem2)
return baseval, nil
end
end
local function decl12_stem2(base)
return base
end
local function decl3_stem2(base)
return make_stem2(base)
end
local decl12_entries = {
{"us", "1&2+", {}},
{"a", "1&2+", {}},
{"um", "1&2+", {}},
{"ī", "1&2+", {"pl"}},
{"ae", "1&2+", {"pl"}},
{"a", "1&2+", {"pl"}},
-- Nearly all -os adjectives are greekA
{"os", "1&2+", {"greekA", "-greekE"}},
{"os", "1&2+", {"greekE", "-greekA"}},
{"ē", "1&2+", {"greekE", "-greekA"}},
{"on", "1&2+", {"greekA", "-greekE"}},
{"on", "1&2+", {"greekE", "-greekA"}},
{"^(.*er)$", "1&2+", {"er"}},
{"^(.*ur)$", "1&2+", {"er"}},
{"^(h)ic$", "1&2+", {"ic"}},
}
local decl3_entries = {
{"^(.*er)$", "3-3+", {}},
{"is", "3-2+", {}},
{"e", "3-2+", {}},
{"^(.*)or$", "3-C+", {}},
{"^(min)or$", "3-C+", {}},
-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified (which
-- won't work for adjectives like quadripēs, volucripēs).
-- Essentially, for declension-3 adjectives, we require that
-- .pl is given if the lemma is plural.
--
-- Most 3-1 adjectives are i-stem (e.g. audāx) so we require -I
-- to be given with non-i-stem adjectives. The first entry below
-- will apply when -I isn't given, the second when it is given.
{"^(.*ēs)$", "3-1+", {"I"}},
{"^(.*ēs)$", "3-1+", {"par"}},
{"^(.*)ōrēs$", "3-C+", {"pl"}},
{"^(min)ōrēs$", "3-C+", {"pl"}},
-- If .pl with -ēs, we don't know if the adjective is 3-1, 3-2
-- or 3-3. Since 3-2 is probably the most common, we infer it
-- (as well as the fact that these adjectives *are* in a sense
-- 3-2 since they have a distinct neuter in -(i)a. Note that
-- we have two entries here; the first one will apply unless
-- -I is given, and will infer an i-stem adjective; the second
-- one will apply otherwise (and infer a non-i-stem 3-1 adjective).
{"ēs", "3-2+", {"pl", "I"}},
{"ēs", "3-1+", {"pl", "par"}, base_as_stem2},
-- Same for neuters.
{"ia", "3-2+", {"pl", "I"}},
{"a", "3-1+", {"pl", "par"}, base_as_stem2},
-- As above for -ēs but for miscellaneous singulars.
{"", "3-1+", {"I"}},
{"", "3-1+", {"par"}},
}
if typ == "+" then
local base, new_stem2, rettype, new_subtypes = get_adj_type_and_subtype_by_ending(lemma, stem2, nil, subtypes, decl12_entries, decl12_stem2)
if base then
return base, new_stem2, rettype, new_subtypes
else
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2)
end
elseif typ == "3+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl3_entries, decl3_stem2)
elseif typ == "1&2+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, decl12_entries, decl12_stem2)
elseif typ == "1-1+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"a", typ, {}},
{"ae", typ, {"pl"}},
})
elseif typ == "2-2+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"us", typ, {}},
{"um", typ, {}},
{"ī", typ, {"pl"}},
{"a", typ, {"pl"}},
{"os", typ, {"greek"}},
{"on", typ, {"greek"}},
{"oe", typ, {"greek", "pl"}},
})
elseif typ == "3-1+" then
-- This will cancel out the I if -I is specified in subtypes, and the
-- resulting lack of I will get converted to "par".
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified.
-- Essentially, for declension-3 adjectives, we require that
-- .pl is given if the lemma is plural.
-- We have two entries here; the first one will apply unless
-- -I is given, and will infer an i-stem adjective; the second
-- one will apply otherwise.
{"^(.*ēs)$", typ, {"I"}},
{"^(.*ēs)$", typ, {"par"}},
{"ēs", typ, {"pl", "I"}, base_as_stem2},
{"ēs", typ, {"pl", "par"}, base_as_stem2},
{"ia", typ, {"pl", "I"}, base_as_stem2},
{"a", typ, {"pl", "par"}, base_as_stem2},
{"", typ, {"I"}},
{"", typ, {"par"}},
}, decl3_stem2)
elseif typ == "3-2+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"is", typ, {}},
{"e", typ, {}},
-- Detect -ēs as 3-2 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified (which
-- won't work for adjectives like isoscelēs). Essentially,
-- for declension-3 adjectives, we require that .pl is given
-- if the lemma is plural.
{"ēs", typ, {}},
{"ēs", typ, {"pl"}},
{"ia", typ, {"pl"}},
}, decl3_stem2)
elseif typ == "3-3+" or typ == "3-P+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"ēs", typ, {"pl"}, base_as_stem2},
{"ia", typ, {"pl"}, base_as_stem2},
{"", typ, {}},
}, decl3_stem2)
elseif typ == "3-C+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(.*)or$", typ, {}},
{"^(min)or$", typ, {}},
{"^(.*)ōrēs$", typ, {"pl"}},
{"^(min)ōrēs$", typ, {"pl"}},
}, decl3_stem2)
elseif typ == "irreg+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(duo)$", typ, {"pl"}},
{"^(ambō)$", typ, {"pl"}},
{"^(mīll?ia)$", typ, {"N", "pl"}, constant_base("mīlle")},
-- match ea
{"^(ea)$", typ, {}, constant_base("is")},
-- match id
{"^(id)$", typ, {}, constant_base("is")},
-- match plural eī, iī
{"^(ī)$", typ, {"pl"}, constant_base("is")},
-- match plural ea, eae
{"^(eae?)$", typ, {"pl"}, constant_base("is")},
-- match eadem
{"^(eadem)$", typ, {}, constant_base("īdem")},
-- match īdem, idem
{"^(dem)$", typ, {}, constant_base("īdem")},
-- match plural īdem
{"^(īdem)$", typ, {"pl"}},
-- match plural eadem, eaedem
{"^(eae?dem)$", typ, {"pl"}, constant_base("īdem")},
-- match illa, ipsa, ista; it doesn't matter if we overmatch because
-- we'll get an error as we use the stem itself in the returned base
{"^(i)a$", typ, {}, function(base, stem2) return base .. "e", nil end},
-- match illud, istud; as above, it doesn't matter if we overmatch
{"^(i)ud$", typ, {}, function(base, stem2) return base .. "e", nil end},
-- match ipsum
{"^(ipsum)$", typ, {}, constant_base("ipse")},
-- match plural illī, ipsī, istī; as above, it doesn't matter if we
-- overmatch
{"^(i)ī$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
-- match plural illa, illae, ipsa, ipsae, ista, istae; as above, it
-- doesn't matter if we overmatch
{"^(i)ae?$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
-- Detect quī as non-plural unless .pl specified.
{"^(quī)$", typ, {}},
-- Otherwise detect quī as plural.
{"^(quī)$", typ, {"pl"}},
-- Same for quae.
{"^(quae)$", typ, {}, constant_base("quī")},
{"^(quae)$", typ, {"pl"}, constant_base("quī")},
{"^(quid)$", typ, {}, constant_base("quis")},
{"^(quod)$", typ, {}, constant_base("quī")},
{"^(quiquid)$", typ, {}, constant_base("quisquis")},
{"^(quīquī)$", typ, {"pl"}, constant_base("quisquis")},
{"^(quaequae)$", typ, {"pl"}, constant_base("quisquis")},
-- match all remaining lemmas in lemma form
{"", typ, {}},
})
elseif typ == "indecl+" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"", typ, {"both"}},
{"", typ, {"sg"}},
{"", typ, {"pl"}},
})
else -- 0+
return lemma, nil, typ, {}
end
end
-- Parse a segment (e.g. "lūna<1>", "aegis/aegid<3.Greek>", "bōs<irreg.F>",
-- bonus<+>", or "]/veter<3+.-I>"), consisting of a lemma (or optionally
-- a lemma/stem) and declension+subtypes, where a + in the declension indicates
-- an adjective. Brackets can be present to indicate links, for use in
-- {{la-noun}} and {{la-adj}}. The return value is a table, e.g.:
-- {
-- decl = "1",
-- is_adj = false,
-- orig_lemma = "lūna",
-- lemma = "lūna",
-- stem2 = nil,
-- gender = "F",
-- types = { = true},
-- args = {"lūn"}
-- }
--
-- or
--
-- {
-- decl = "3",
-- is_adj = false,
-- orig_lemma = "aegis",
-- lemma = "aegis",
-- stem2 = "aegid",
-- gender = nil,
-- types = { = true},
-- args = {"aegis", "aegid"}
-- }
--
-- or
--
-- {
-- decl = "irreg",
-- is_adj = false,
-- orig_lemma = "bōs",
-- lemma = "bōs",
-- stem2 = nil,
-- gender = "F",
-- types = { = true},
-- args = {"bōs"}
-- }
-- or
--
-- {
-- decl = "1&2+",
-- is_adj = true,
-- orig_lemma = "bonus",
-- lemma = "bonus",
-- stem2 = nil,
-- gender = nil,
-- types = {},
-- args = {"bon"}
-- }
--
-- or
--
-- {
-- decl = "3-1+",
-- is_adj = true,
-- orig_lemma = "]",
-- lemma = "vetus",
-- stem2 = "veter",
-- gender = nil,
-- types = {},
-- args = {"vetus", "veter"}
-- }
local function parse_segment(segment)
local stem_part, spec_part = segment:match("^(.*)<(.-)>$")
local stems = split(stem_part, "/", true, true)
local specs = split(spec_part, ".", true, true)
local types = {}
local num = nil
local loc = false
local args = {}
local decl
for j, spec in ipairs(specs) do
if j == 1 then
decl = spec
else
local begins_with_hyphen
begins_with_hyphen, spec = spec:match("^(%-?)(.*)$")
spec = begins_with_hyphen .. spec:gsub("%-", "_")
types = true
end
end
local orig_lemma = stems
if not orig_lemma or orig_lemma == "" then
orig_lemma = pagename or get_pagename()
end
local lemma = remove_links(orig_lemma)
local stem2 = stems
if stem2 == "" then
stem2 = nil
end
if #stems > 2 then
error("Too many stems, at most 2 should be given: " .. stem_part)
end
local base, detected_subtypes
local is_adj = false
local gender = nil
if decl:find("+", nil, true) then
base, stem2, decl, detected_subtypes = detect_adj_type_and_subtype(lemma, stem2, decl, types)
is_adj = true
for _, subtype in ipairs(detected_subtypes) do
if types then
-- if a "cancel subtype" spec is given, remove the cancel spec
-- and don't apply the subtype
types = nil
else
types = true
end
end
else
base, stem2, detected_subtypes = detect_noun_subtype(lemma, stem2, decl, types)
for _, subtype in ipairs(detected_subtypes) do
if types then
-- if a "cancel subtype" spec is given, remove the cancel spec
-- and don't apply the subtype
types = nil
elseif (subtype == "M" or subtype == "F" or subtype == "N") and
(types.M or types.F or types.N) then
-- if gender already specified, don't create conflicting gender spec
elseif (subtype == "sg" or subtype == "pl" or subtype == "both") and
(types.sg or types.pl or types.both) then
-- if number restriction already specified, don't create conflicting
-- number restriction spec
else
types = true
end
end
if not types.pl and not types.both and umatch(lemma, "^%u") then
types.sg = true
end
end
if types.loc then
loc = true
types.loc = nil
end
if types.M then
gender = "M"
elseif types.F then
gender = "F"
elseif types.N then
gender = "N"
end
if types.pl then
num = "pl"
types.pl = nil
elseif types.sg then
num = "sg"
types.sg = nil
end
args = base
args = stem2
return {
decl = decl,
is_adj = is_adj,
gender = gender,
orig_lemma = orig_lemma,
lemma = lemma,
stem2 = stem2,
types = types,
num = num,
loc = loc,
args = args,
}
end
-- Parse a segment run (i.e. a string with zero or more segments [see
-- parse_segment] and optional surrounding text, e.g. "foenum<2>-graecum<2>"
-- or "]/part<3.abl-e-occ-i> ]"). The segment run
-- currently cannot contain any alternants (e.g. "((epulum<2.sg>,epulae<1>))").
-- The return value is a table of the following form:
-- {
-- segments = PARSED_SEGMENTS (a list of parsed segments),
-- loc = LOC (a boolean indicating whether any of the individual segments
-- has a locative),
-- num = NUM (the first specified value for a number restriction, or nil if
-- no number restrictions),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- is_adj = IS_ADJ (true if all segments are adjective segments, false if
-- there's at least one noun segment, nil if only raw-text segments),
-- propses = PROPSES (list of per-word properties, where each element is an
-- object {
-- decl = DECL (declension),
-- types = TYPES (set describing the subtypes of a given word),
-- }
-- }
-- Each element in PARSED_SEGMENTS is as returned by parse_segment() but will
-- have an additional .orig_prefix field indicating the text before the segment
-- (including bracketed links) and corresponding .prefix field indicating the text
-- with bracketed links resolved. If there is trailing text, the last element will
-- have only .orig_prefix and .prefix fields containing that trailing text.
local function parse_segment_run(segment_run)
local loc = nil
local num = nil
local is_adj = nil
-- If the segment run begins with a hyphen, include the hyphen in the
-- set of allowed characters for a declined segment. This way, e.g. the
-- suffix ] can be declared as {{la-ndecl|-cen/-cin<3>}} rather than
-- {{la-ndecl|-cen/cin<3>}}, which is less intuitive.
local is_suffix = segment_run:sub(1, 1) == "-"
local segments = {}
local propses = {}
-- We want to not break up a bracketed link followed by <> even if it has a space or
-- hyphen in it. So we do an outer capturing split to find the bracketed links followed
-- by <>, then do inner capturing splits on all the remaining text to find the other
-- declined terms.
local bracketed_segments = split(segment_run, "(%]-%]%]<.->)")
for i, bracketed_segment in ipairs(bracketed_segments) do
if i % 2 == 0 then
insert(segments, bracketed_segment)
else
for _, subsegment in ipairs(split(bracketed_segment, is_suffix and "(+<.->)" or "(+<.->)")) do
insert(segments, subsegment)
end
end
end
local parsed_segments = {}
local gender = nil
for i = 2, (#segments - 1), 2 do
local parsed_segment = parse_segment(segments)
-- Overall locative is true if any segments call for locative.
loc = loc or parsed_segment.loc
-- The first specified value for num is used becomes the overall value.
num = num or parsed_segment.num
if is_adj == nil then
is_adj = parsed_segment.is_adj
else
is_adj = is_adj and parsed_segment.is_adj
end
gender = gender or parsed_segment.gender
parsed_segment.orig_prefix = segments
parsed_segment.prefix = remove_links(segments)
insert(parsed_segments, parsed_segment)
insert(propses, {
decl = parsed_segment.decl,
types = parsed_segment.types,
})
end
if segments ~= "" then
insert(parsed_segments, {
orig_prefix = segments,
prefix = remove_links(segments),
})
end
return {
segments = parsed_segments,
loc = loc,
num = num,
is_adj = is_adj,
gender = gender,
propses = propses,
}
end
-- Parse an alternant, e.g. "((epulum<2.sg>,epulae<1>))",
-- "((Serapis<3>,Serapis/Serapid<3>))" or
-- "((rēs<5>pūblica<1>,rēspūblica<1>))". The return value is a table of the form
-- {
-- alternants = PARSED_ALTERNANTS (a list of segment runs, each of which is a
-- list of parsed segments as returned by parse_segment_run()),
-- loc = LOC (a boolean indicating whether any of the individual segment runs
-- has a locative),
-- num = NUM (the overall number restriction, one of "sg", "pl" or "both"),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- is_adj = IS_ADJ (true if all non-constant alternants are adjectives, false
-- if all nouns, nil if only constant alternants; conflicting alternants
-- cause an error),
-- propses = PROPSES (list of lists of per-word property objecs),
-- }
local function parse_alternant(alternant)
local parsed_alternants = {}
local alternant_spec = alternant:match("^%(%((.-)%)%)$")
local alternants = split(alternant_spec, ",", true, true)
local loc = false
local num = nil
local gender = nil
local is_adj = nil
local propses = {}
for i, alternant in ipairs(alternants) do
local parsed_run = parse_segment_run(alternant)
insert(parsed_alternants, parsed_run)
loc = loc or parsed_run.loc
-- First time through, set the overall num to the num of the first run,
-- even if nil. After that, if we ever see a run with a different value
-- of num, set the overall num to "both". That way, if all alternants
-- don't specify a num, we get an unspecified num, but if some do and
-- some don't, we get both, because an unspecified num defaults to
-- both.
if i == 1 then
num = parsed_run.num
elseif num ~= parsed_run.num then
-- FIXME, this needs to be rethought to allow for
-- adjective alternants.
num = "both"
end
gender = gender or parsed_run.gender
if is_adj == nil then
is_adj = parsed_run.is_adj
elseif parsed_run.is_adj ~= nil and parsed_run.is_adj ~= is_adj then
error("Saw both noun and adjective alternants; not allowed")
end
insert(propses, parsed_run.propses)
end
return {
alternants = parsed_alternants,
loc = loc,
num = num,
gender = gender,
is_adj = is_adj,
propses = propses,
}
end
-- Parse a segment run (see parse_segment_run()). Unlike for
-- parse_segment_run(), this can contain alternants such as
-- "((epulum<2.sg>,epulae<1>))" or "((Serapis<3.sg>,Serapis/Serapid<3.sg>))"
-- embedded in it to indicate words composed of multiple declensions.
-- The return value is a table of the following form:
-- {
-- segments = PARSED_SEGMENTS (a list of parsed segments),
-- loc = LOC (a boolean indicating whether any of the individual segments has
-- a locative),
-- num = NUM (the first specified value for a number restriction, or nil if
-- no number restrictions),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- is_adj = IS_ADJ (true if all segments are adjective segments, false if
-- there's at least one noun segment, nil if only raw-text segments),
-- propses = PROPSES (list of either per-word property objects or lists of
-- lists of such objects),
-- }.
-- Each element in PARSED_SEGMENTS is one of three types:
--
-- 1. A regular segment, as returned by parse_segment() but with additional
-- .prefix and .orig_prefix fields indicating the text before the segment, as per
-- the return value of parse_segment_run().
-- 2. A raw-text segment, i.e. a table with only .prefix and .orig_prefix fields
-- containing the raw text.
-- 3. An alternating segment, as returned by parse_alternant().
-- Note that each alternant is a segment run rather than a single parsed
-- segment to allow for alternants like "((rēs<5>pūblica<1>,rēspūblica<1>))".
-- The parsed segment runs in PARSED_SEGMENT_RUNS are tables as returned by
-- parse_segment_run() (of the same form as the overall return value of
-- parse_segment_run_allowing_alternants()).
local function parse_segment_run_allowing_alternants(segment_run)
if segment_run:find(" ", nil, true) then
track("has-space")
end
if segment_run:find("((", nil, true) then
track("has-alternant")
end
local alternating_segments = split(segment_run, "(%(%(.-%)%))")
local parsed_segments = {}
local loc = false
local num = nil
local gender = nil
local is_adj = nil
local propses = {}
for i = 1, #alternating_segments do
local alternating_segment = alternating_segments
if alternating_segment ~= "" then
local this_is_adj
if i % 2 == 1 then
local parsed_run = parse_segment_run(alternating_segment)
for _, parsed_segment in ipairs(parsed_run.segments) do
insert(parsed_segments, parsed_segment)
end
loc = loc or parsed_run.loc
num = num or parsed_run.num
gender = gender or parsed_run.gender
this_is_adj = parsed_run.is_adj
for _, props in ipairs(parsed_run.propses) do
insert(propses, props)
end
else
local parsed_alternating_segment = parse_alternant(alternating_segment)
insert(parsed_segments, parsed_alternating_segment)
loc = loc or parsed_alternating_segment.loc
num = num or parsed_alternating_segment.num
gender = gender or parsed_alternating_segment.gender
this_is_adj = parsed_alternating_segment.is_adj
insert(propses, parsed_alternating_segment.propses)
end
if is_adj == nil then
is_adj = this_is_adj
elseif this_is_adj ~= nil then
is_adj = is_adj and this_is_adj
end
end
end
if #parsed_segments > 1 then
track("multiple-segments")
end
return {
segments = parsed_segments,
loc = loc,
num = num,
gender = gender,
is_adj = is_adj,
propses = propses,
}
end
-- Combine each form in FORMS (a list of forms associated with a slot) with each
-- form in NEW_FORMS (either a single string for a single form, or a list of
-- forms) by concatenating EXISTING_FORM .. PREFIX .. NEW_FORM. Also combine
-- NOTES (a table specifying the footnotes associated with each existing form,
-- i.e. a map from form indices to lists of footnotes) with NEW_NOTES (new
-- footnotes associated with the new forms, in the same format as NOTES). Return
-- a pair NEW_FORMS, NEW_NOTES where either or both of FORMS and NOTES (but not
-- the sublists in NOTES) may be destructively modified to generate the return
-- values.
local function append_form(forms, notes, new_forms, new_notes, prefix)
if forms == nil then
return
end
new_forms = new_forms or ""
notes = notes or {}
new_notes = new_notes or {}
prefix = prefix or ""
if type(new_forms) == "table" and #new_forms == 1 then
new_forms = new_forms
end
if type(new_forms) == "string" then
-- If there's only one new form, destructively modify the existing
-- forms and notes for this new form and its footnotes.
for i = 1, #forms do
forms = forms .. prefix .. new_forms
if new_notes then
if not notes then
notes = new_notes
else
local combined_notes = deep_copy(notes)
for _, note in ipairs(new_notes) do
insert(combined_notes, note)
end
notes = combined_notes
end
end
end
return forms, notes
else
-- If there are multiple new forms, we need to loop over all
-- combinations of new and old forms. In that case, use new tables
-- for the combined forms and notes.
local ret_forms = {}
local ret_notes = {}
for i=1, #forms do
for j=1, #new_forms do
insert(ret_forms, forms .. prefix .. new_forms)
if new_notes then
if not notes then
-- We are constructing a linearized matrix of size
-- NI x NJ where J is in the inner loop. If I and J
-- are zero-based, the linear index of (I, J) is
-- I * NJ + J. However, we are one-based, so the
-- same formula won't work. Instead, we effectively
-- need to convert to zero-based indices, compute
-- the zero-based linear index, and then convert it
-- back to a one-based index, i.e.
--
-- (I - 1) * NJ + (J - 1) + 1
--
-- i.e. (I - 1) * NJ + J.
ret_notes = new_notes
else
local combined_notes = deep_copy(notes)
for _, note in ipairs(new_notes) do
insert(combined_notes, note)
end
ret_notes = combined_notes
end
end
end
end
return ret_forms, ret_notes
end
end
-- Destructively modify any forms in FORMS (a map from a slot to a form or a
-- list of forms) by converting sequences of ae, oe, Ae or Oe to the
-- appropriate ligatures.
local function apply_ligatures(forms, is_adj)
for slot in iter_slots(is_adj) do
if type(forms) == "string" then
forms = forms:gsub("e", ligatures)
elseif type(forms) == "table" then
for i = 1, #forms do
forms = forms:gsub("e", ligatures)
end
end
end
end
-- Modify any forms in FORMS (a map from a slot to a form or a list of forms) by
-- converting final m to optional n or m.
local function apply_sufn(forms, is_adj)
for slot in iter_slots(is_adj) do
if type(forms) == "string" then
if forms:sub(-1) == "m" then
forms = {forms:gsub("m$", "n"), forms}
end
elseif type(forms) == "table" then
-- See if there are any final m's.
local final_m
for i = 1, #forms do
if forms:sub(-1) == "m" then
final_m = true
break
end
end
if final_m then
local newval = {}
for i = 1, #forms do
if forms:sub(-1) == "m" then
insert(newval, (forms:gsub("m$", "n")))
end
insert(newval, forms)
end
forms = newval
end
end
end
end
-- If NUM == "sg", copy the singular forms to the plural ones; vice-versa if
-- NUM == "pl". This should allow for the equivalent of plural
-- "alpha and omega" formed from two singular nouns, and for the equivalent of
-- plural "St. Vincent and the Grenadines" formed from a singular noun and a
-- plural noun. (These two examples actually occur in Russian, at least.)
local function propagate_number_restrictions(forms, num, is_adj)
if num == "sg" or num == "pl" then
for slot in iter_slots(is_adj) do
if slot:find(num, nil, true) then
local other_num_slot = num == "sg" and slot:gsub("sg", "pl") or slot:gsub("pl", "sg")
forms = type(forms) == "table" and deep_copy(forms) or forms
end
end
end
end
local function join_sentences(sentences, joiner)
-- Lowercase the first letter of all but the first sentence, and remove the
-- final period from all but the last sentence. Then join together with the
-- joiner (e.g. " and " or " or ").
-- FIXME: Should we join three or more as e.g. "foo, bar and baz"?
local sentences_to_join = {}
for i, sentence in ipairs(sentences) do
if i < #sentences then
sentence = sentence:gsub("%.$", "")
end
if i > 1 then
sentence = lcfirst(sentence)
end
insert(sentences_to_join, sentence)
end
return concat(sentences_to_join, joiner)
end
-- Construct the declension of a parsed segment run of the form returned by
-- parse_segment_run() or parse_segment_run_allowing_alternants(). Return value
-- is a table
-- {
-- forms = FORMS (keyed by slot, list of forms for that slot),
-- notes = NOTES (keyed by slot, map from form indices to lists of footnotes),
-- title = TITLE (list of titles for each segment in the run),
-- categories = CATEGORIES (combined categories for all segments),
-- }
local function decline_segment_run(parsed_run, pos, is_adj)
local declensions = {
-- For each possible slot (e.g. "abl_sg"), list of possible forms.
forms = {},
-- Keyed by slot (e.g. "abl_sg"). Value is a table indicating the footnotes
-- corresponding to the forms for that slot. Each such table maps indices
-- (the index of the corresponding form) to a list of one or more
-- footnotes.
notes = {},
title = {},
unattested = {},
subtitleses = {},
orig_titles = {},
categories = {},
footnotes = {},
-- May be set true if declining a 1-1 adjective
loc = false,
noneut = false,
nomf = false,
}
for slot in iter_slots(is_adj) do
declensions.forms = {""}
end
for i, seg in ipairs(parsed_run.segments) do
local decl = seg.decl
if decl then -- not an alternant, not a constant segment
seg.loc = parsed_run.loc
seg.num = seg.num or parsed_run.num
seg.gender = seg.gender or parsed_run.gender
local data, potential_lemma_slots
if seg.is_adj then
if not (m_adj_decl or get_m_adj_decl()) then
error("Unrecognized declension '" .. decl .. "'")
end
potential_lemma_slots = potential_adj_lemma_slots
data = {
subtitles = {},
num = seg.num or "",
gender = seg.gender,
loc = seg.loc,
noneut = false,
nomf = false,
pos = is_adj and pos or "adjectives",
forms = {},
types = seg.types,
unattested = {},
categories = {},
notes = {},
}
(m_adj_decl or get_m_adj_decl())(data, seg.args)
local apparent_decl = data.decl or decl
if data.loc then
declensions.loc = true
end
if data.noneut then
declensions.noneut = true
end
if data.nomf then
declensions.nomf = true
end
-- Construct title out of "original title" and subtitles.
if not data.title then
if decl == "irreg+" and apparent_decl ~= decl and #data.subtitles == 0 then
insert(data.subtitles, glossary_link("irregular"))
end
if declension_to_english then
local english = declension_to_english
data.title = "]"
elseif apparent_decl == "irreg+" then
data.title = glossary_link("irregular")
elseif apparent_decl == "indecl+" or apparent_decl == "0+" then
data.title = glossary_link("indeclinable")
else
error("Internal error! Don't recognize adjective declension " .. apparent_decl)
end
data.title = data.title .. " " .. singularize(data.pos)
end
if data.types.sufn then
insert(data.subtitles, {"with", " ''m'' optionally → ''n'' in compounds"})
elseif data.types.not_sufn then
insert(data.subtitles, {"without", " ''m'' optionally → ''n'' in compounds"})
end
-- Record original title and subtitles for use in alternant title-constructing code.
insert(declensions.orig_titles, data.title)
if #data.subtitles > 0 then
local subtitles = {}
for _, subtitle in ipairs(data.subtitles) do
if type(subtitle) == "table" then
-- Occurs e.g. with ''idem'', ''quīdam''
insert(subtitles, concat(subtitle))
else
insert(subtitles, subtitle)
end
end
data.title = data.title .. " (" .. concat(subtitles, ", ") .. ")"
end
insert(declensions.subtitleses, data.subtitles)
else
if not (m_noun_decl or get_m_noun_decl()) then
error("Unrecognized declension '" .. decl .. "'")
end
potential_lemma_slots = potential_noun_lemma_slots
data = {
subtitles = {},
num = seg.num or "",
loc = seg.loc,
pos = pos,
forms = {},
types = seg.types,
unattested = {},
categories = {},
notes = {},
}
(m_noun_decl or get_m_noun_decl())(data, seg.args)
local apparent_decl = data.decl or decl
parsed_run.propses.headword_decl = apparent_decl
-- Construct title out of "original title" and subtitles.
if not data.title then
if decl == "irreg" and apparent_decl ~= decl and #data.subtitles == 0 then
insert(data.subtitles, glossary_link("irregular"))
end
if declension_to_english then
local english = declension_to_english
data.title = "]"
elseif apparent_decl == "irreg" then
data.title = glossary_link("irregular")
elseif apparent_decl == "indecl" or apparent_decl == "0" or apparent_decl == "sgpl" then
data.title = glossary_link("indeclinable")
else
error("Internal error! Don't recognize noun declension " .. apparent_decl)
end
data.title = data.title .. " " .. singularize(data.pos)
end
if data.types.sufn then
insert(data.subtitles, {"with", " ''m'' optionally → ''n'' in compounds"})
elseif data.types.not_sufn then
insert(data.subtitles, {"without", " ''m'' optionally → ''n'' in compounds"})
end
-- Record original title and subtitles for use in alternant title-constructing code.
insert(declensions.orig_titles, data.title)
if #data.subtitles > 0 then
local subtitles = {}
for _, subtitle in ipairs(data.subtitles) do
if type(subtitle) == "table" then
-- Occurs e.g. with 1st-declension ''-ābus'' ending where
-- we want a common prefix to be extracted out if possible
-- in the alternant title-generating code.
insert(subtitles, concat(subtitle))
else
insert(subtitles, subtitle)
end
end
data.title = data.title .. " (" .. concat(subtitles, ", ") .. ")"
end
insert(declensions.subtitleses, data.subtitles)
end
-- Generate linked variants of slots that may be the lemma.
-- If the form is the same as the lemma (with links removed),
-- substitute the original lemma (with links included).
for _, slot in ipairs(potential_lemma_slots) do
local forms = data.forms
if forms then
local linked_forms = {}
if type(forms) ~= "table" then
forms = {forms}
end
for _, form in ipairs(forms) do
if form == seg.lemma then
insert(linked_forms, seg.orig_lemma)
else
insert(linked_forms, form)
end
end
data.forms = linked_forms
end
end
if seg.types.lig then
apply_ligatures(data.forms, is_adj)
end
if seg.types.sufn then
apply_sufn(data.forms, is_adj)
end
propagate_number_restrictions(data.forms, seg.num, is_adj)
for slot in iter_slots(is_adj) do
-- 1. Select the forms to append to the existing ones.
local new_forms
if is_adj then
if not seg.is_adj then
error("Can't decline noun '" .. seg.lemma .. "' when overall term is an adjective")
end
new_forms = data.forms
if not new_forms and slot:find("_$") then
new_forms = data.forms$", "_m")]
end
elseif seg.is_adj then
if not seg.gender then
error("Declining modifying adjective " .. seg.lemma .. " but don't know gender of associated noun")
end
-- Select the appropriately gendered equivalent of the case/number
-- combination. Some adjectives won't have feminine or neuter
-- variants, though (e.g. 3-1 and 3-2 adjectives don't have a
-- distinct feminine), so in that case select the masculine.
new_forms = data.forms
or data.forms
else
new_forms = data.forms
end
-- 2. Extract the new footnotes in the format we require, which is
-- different from the format passed in by the declension functions.
local new_notes = {}
if type(new_forms) == "string" and data.notes then
new_notes = {data.notes}
elseif new_forms then
for j = 1, #new_forms do
if data.notes then
new_notes = {data.notes}
end
end
end
-- 3. Append new forms and footnotes to the existing ones.
new_forms = normalize_form(new_forms)
if new_forms == nil then
declensions.forms = nil
declensions.notes = nil
else
declensions.forms, declensions.notes = append_form(
declensions.forms, declensions.notes, new_forms,
new_notes, slot:find("linked", nil, true) and seg.orig_prefix or seg.prefix)
end
end
for slot, v in pairs(data.unattested) do
if v then
declensions.unattested = true
end
end
if not seg.types.nocat and (is_adj or not seg.is_adj) then
for _, cat in ipairs(data.categories) do
insert_if_not(declensions.categories, cat)
end
end
if data.footnote then
insert(declensions.footnotes, data.footnote)
end
if seg.prefix ~= "" and seg.prefix ~= "-" and seg.prefix ~= " " then
insert(declensions.title, glossary_link("indeclinable") .. " portion")
end
insert(declensions.title, data.title)
elseif seg.alternants then
local seg_declensions = nil
local seg_titles = {}
local seg_subtitleses = {}
local seg_stems_seen = {}
local seg_unattested = {}
local seg_categories = {}
local seg_footnotes = {}
-- If all alternants have exactly one non-constant segment and all are
-- of the same declension, we use special code that displays the
-- differences in the subtitles. Otherwise we use more general code
-- that displays the full title and subtitles of each segment,
-- separating segment combined titles by "and" and the segment-run
-- combined titles by "or".
local title_the_hard_way = false
local alternant_decl = nil
local alternant_decl_title = nil
for _, this_parsed_run in ipairs(seg.alternants) do
local num_non_constant_segments = 0
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
if not alternant_decl then
alternant_decl = segment.decl
elseif alternant_decl ~= segment.decl then
title_the_hard_way = true
num_non_constant_segments = 500
break
end
num_non_constant_segments = num_non_constant_segments + 1
end
end
if num_non_constant_segments ~= 1 then
title_the_hard_way = true
break
end
end
if not title_the_hard_way then
-- If using the special-purpose code, find the subtypes that are
-- not present in a given alternant but are present in at least
-- one other, and record "negative" variants of these subtypes
-- so that the declension-construction code can record subtitles
-- for these negative variants (so we can construct text like
-- "i-stem or imparisyllabic non-i-stem").
local subtypeses = {}
for _, this_parsed_run in ipairs(seg.alternants) do
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
insert(subtypeses, segment.types)
insert_if_not(seg_stems_seen, segment.stem2)
end
end
end
local union = set_union(subtypeses)
for _, this_parsed_run in ipairs(seg.alternants) do
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
local neg_subtypes = set_difference(union, segment.types)
for neg_subtype, _ in pairs(neg_subtypes) do
segment.types = true
end
end
end
end
end
for _, this_parsed_run in ipairs(seg.alternants) do
this_parsed_run.loc = seg.loc
this_parsed_run.num = this_parsed_run.num or seg.num
this_parsed_run.gender = this_parsed_run.gender or seg.gender
local this_declensions = decline_segment_run(this_parsed_run, pos, is_adj)
if this_declensions.noneut then
declensions.noneut = true
end
if this_declensions.nomf then
declensions.nomf = true
end
-- If there's a number restriction on the segment run, blank
-- out the forms outside the restriction. This allows us to
-- e.g. construct heteroclites that decline one way in the
-- singular and a different way in the plural.
if this_parsed_run.num == "sg" or this_parsed_run.num == "pl" then
for slot in iter_slots(is_adj) do
if this_parsed_run.num == "sg" and slot:find("pl", nil, true) or
this_parsed_run.num == "pl" and slot:find("sg", nil, true) then
this_declensions.forms = {}
this_declensions.notes = nil
end
end
end
if not seg_declensions then
seg_declensions = this_declensions
else
for slot in iter_slots(is_adj) do
-- For a given slot, combine the existing and new forms.
-- We do this by checking to see whether a new form is
-- already present and not adding it if so; in the
-- process, we keep a map from indices in the new forms
-- to indices in the combined forms, for use in
-- combining footnotes below.
local curforms = seg_declensions.forms or {}
local newforms = this_declensions.forms or {}
local newform_index_to_new_index = {}
for newj, form in ipairs(newforms) do
local did_break = false
for j = 1, #curforms do
if curforms == form then
newform_index_to_new_index = j
did_break = true
break
end
end
if not did_break then
insert(curforms, form)
newform_index_to_new_index = #curforms
end
end
seg_declensions.forms = curforms
-- Now combine the footnotes. Keep in mind that
-- each form may have its own set of footnotes, and
-- in some cases we didn't add a form from the new
-- list of forms because it already occurred in the
-- existing list of forms; in that case, we combine
-- footnotes from the two sources.
local curnotes = seg_declensions.notes
local newnotes = this_declensions.notes
if newnotes then
if not curnotes then
curnotes = {}
end
for index, notes in pairs(newnotes) do
local combined_index = newform_index_to_new_index
if not curnotes then
curnotes = notes
else
local combined = mw.clone(curnotes)
for _, note in ipairs(newnotes) do
insert_if_not(combined, note)
end
curnotes = combined
end
end
end
end
end
for slot, v in pairs(this_declensions.unattested) do
if v then
seg_unattested = true
end
end
for _, cat in ipairs(this_declensions.categories) do
insert_if_not(seg_categories, cat)
end
for _, footnote in ipairs(this_declensions.footnotes) do
insert_if_not(seg_footnotes, footnote)
end
insert_if_not(seg_titles, this_declensions.title)
for _, subtitles in ipairs(this_declensions.subtitleses) do
insert(seg_subtitleses, subtitles)
end
if not alternant_decl_title then
alternant_decl_title = this_declensions.orig_titles
end
end
-- If overall run is singular, copy singular to plural, and
-- vice-versa. See propagate_number_restrictions() for rationale;
-- also, this should eliminate cases of empty forms, which will
-- cause the overall set of forms for that slot to be empty.
propagate_number_restrictions(seg_declensions.forms, parsed_run.num,
is_adj)
for slot in iter_slots(is_adj) do
local new_forms = normalize_form(seg_declensions.forms)
if new_forms == nil then
declensions.forms = nil
declensions.notes = nil
else
declensions.forms, declensions.notes = append_form(
declensions.forms, declensions.notes,
new_forms, seg_declensions.notes, nil)
end
end
for slot, v in pairs(seg_unattested) do
if v then
declensions.unattested = true
end
end
if is_adj or not seg.is_adj then
for _, cat in ipairs(seg_categories) do
insert_if_not(declensions.categories, cat)
end
end
for _, footnote in ipairs(seg_footnotes) do
insert_if_not(declensions.footnotes, footnote)
end
local title_to_insert
if title_the_hard_way then
title_to_insert = join_sentences(seg_titles, " or ")
else
-- Special-purpose title-generation code, for the common
-- situation where each alternant has single-segment runs and
-- all segments belong to the same declension.
--
-- 1. Find the initial subtitles common to all segments.
local first_subtitles = seg_subtitleses
local num_common_subtitles = #first_subtitles
for j = 2, #seg_subtitleses do
local this_subtitles = seg_subtitleses
for k = 1, num_common_subtitles do
if not deep_equals(first_subtitles, this_subtitles) then
num_common_subtitles = k - 1
break
end
end
end
-- 2. Construct the portion of the text based on the common subtitles.
local common_subtitles = {}
for j = 1, num_common_subtitles do
if type(first_subtitles) == "table" then
insert(common_subtitles, concat(first_subtitles))
else
insert(common_subtitles, first_subtitles)
end
end
local common_subtitle_portion = concat(common_subtitles, ", ")
local non_common_subtitle_portion
-- 3. Special-case the situation where there's one non-common
-- subtitle in each segment and a common prefix or suffix to
-- all of them.
local common_prefix, common_suffix
for j = 1, #seg_subtitleses do
local this_subtitles = seg_subtitleses
if #this_subtitles ~= num_common_subtitles + 1 or
type(this_subtitles) ~= "table" or
#this_subtitles ~= 2 then
break
end
if j == 1 then
common_prefix = this_subtitles
common_suffix = this_subtitles
else
local this_prefix = this_subtitles
local this_suffix = this_subtitles
if this_prefix ~= common_prefix then
common_prefix = nil
end
if this_suffix ~= common_suffix then
common_suffix = nil
end
if not common_prefix and not common_suffix then
break
end
end
end
if common_prefix or common_suffix then
if common_prefix and common_suffix then
error("Something is wrong, first non-common subtitle is actually common to all segments")
end
if common_prefix then
local non_common_parts = {}
for j = 1, #seg_subtitleses do
insert(non_common_parts, seg_subtitleses)
end
non_common_subtitle_portion = common_prefix .. concat(non_common_parts, " or ")
else
local non_common_parts = {}
for j = 1, #seg_subtitleses do
insert(non_common_parts, seg_subtitleses)
end
non_common_subtitle_portion = concat(non_common_parts, " or ") .. common_suffix
end
else
-- 4. Join the subtitles that differ from segment to segment.
-- Record whether there are any such differing subtitles.
-- If some segments have differing subtitles and others don't,
-- we use the text "otherwise" for the segments without
-- differing subtitles.
local saw_non_common_subtitles = false
local non_common_subtitles = {}
for j = 1, #seg_subtitleses do
local this_subtitles = seg_subtitleses
local this_non_common_subtitles = {}
for k = num_common_subtitles + 1, #this_subtitles do
if type(this_subtitles) == "table" then
insert(this_non_common_subtitles, concat(this_subtitles))
else
insert(this_non_common_subtitles, this_subtitles)
end
end
if #this_non_common_subtitles > 0 then
insert(non_common_subtitles, concat(this_non_common_subtitles, ", "))
saw_non_common_subtitles = true
else
insert(non_common_subtitles, "otherwise")
end
end
non_common_subtitle_portion =
saw_non_common_subtitles and concat(non_common_subtitles, " or ") or ""
end
-- 5. Combine the common and non-common subtitle portions.
local subtitle_portions = {}
if common_subtitle_portion ~= "" then
insert(subtitle_portions, common_subtitle_portion)
end
if non_common_subtitle_portion ~= "" then
insert(subtitle_portions, non_common_subtitle_portion)
end
if #seg_stems_seen > 1 then
insert(subtitle_portions,
(number_to_english or "" .. #seg_stems_seen) .. " different stems"
)
end
local subtitle_portion = concat(subtitle_portions, "; ")
if subtitle_portion ~= "" then
title_to_insert = alternant_decl_title .. " (" .. subtitle_portion .. ")"
else
title_to_insert = alternant_decl_title
end
end
-- Don't insert blank title (happens e.g. with "((ali))quis<irreg+>").
if title_to_insert ~= "" then
insert(declensions.title, title_to_insert)
end
else
for slot in iter_slots(is_adj) do
declensions.forms, declensions.notes = append_form(
declensions.forms, declensions.notes,
slot:find("linked", nil, true) and seg.orig_prefix or seg.prefix)
end
insert(declensions.title, glossary_link("indeclinable") .. " portion")
end
end
-- First title is uppercase, remainder have an indefinite article, joined
-- using "with".
local titles = {}
for i, title in ipairs(declensions.title) do
if i == 1 then
insert(titles, ucfirst(title))
else
insert(titles, add_indefinite_article(title))
end
end
declensions.title = concat(titles, " with ")
return declensions
end
local function construct_title(args_title, declensions_title, generate_type, parsed_run)
if args_title then
declensions_title = args_title:gsub("<1>", "]")
declensions_title = declensions_title:gsub("<1&2>", "]/]")
declensions_title = declensions_title:gsub("<2>", "]")
declensions_title = declensions_title:gsub("<3>", "]")
declensions_title = declensions_title:gsub("<4>", "]")
declensions_title = declensions_title:gsub("<5>", "]")
if generate_type == "headword" then
declensions_title = lcfirst((declensions_title:gsub("%.$", "")))
else
declensions_title = ucfirst(declensions_title)
end
else
local post_text_parts = {}
if parsed_run.loc then
insert(post_text_parts, ", with locative")
end
if parsed_run.num == "sg" then
insert(post_text_parts, ", singular only")
elseif parsed_run.num == "pl" then
insert(post_text_parts, ", plural only")
end
local post_text = concat(post_text_parts)
if generate_type == "headword" then
declensions_title = lcfirst(declensions_title) .. post_text
else
declensions_title = ucfirst(declensions_title) .. post_text .. "."
end
end
return declensions_title
end
function export.do_generate_noun_forms(parent_args, pos, generate_type, def)
local params = {
= {required = true, default = def or "aqua<1>"},
footnote = true,
title = true,
num = true,
json = {type = "boolean"},
}
for slot in iter_noun_slots() do
params = true
end
if generate_type == "headword" then
local list = {list = true}
local sublist = {sublist = "/"}
params.lemma = list
params.id = true
params.cat = list
params.m = sublist
params.f = sublist
params.g = list
params.indecl = {type = "boolean"}
end
if pos == "numerals" then
params = true
end
local args = process_params(parent_args, params)
if args.title then
track("overriding-title")
end
local parsed_run = parse_segment_run_allowing_alternants(args)
parsed_run.loc = parsed_run.loc or not not (args.loc_sg or args.loc_pl)
parsed_run.num = args.num or parsed_run.num
local declensions = decline_segment_run(parsed_run, pos, false)
if not parsed_run.loc then
declensions.forms.loc_sg = nil
declensions.forms.loc_pl = nil
end
declensions.title = construct_title(args.title, declensions.title, generate_type, parsed_run)
local all_data = {
title = declensions.title,
footnotes = {},
num = parsed_run.num or "",
gender = parsed_run.gender,
propses = parsed_run.propses,
forms = declensions.forms,
unattested = declensions.unattested,
categories = declensions.categories,
notes = {},
user_specified = {},
overriding_lemma = args.lemma,
id = args.id,
pos = pos,
cat = args.cat,
indecl = args.indecl,
m = args.m,
f = args.f,
overriding_genders = args.g,
num_type = args,
}
if generate_type ~= "bare" then
all_data.accel = {}
end
if args.footnote then
insert_if_not(all_data.footnotes, args.footnote)
end
for _, footnote in ipairs(declensions.footnotes) do
insert_if_not(all_data.footnotes, footnote)
end
for slot in iter_noun_slots() do
if declensions.notes then
for index, notes in pairs(declensions.notes) do
all_data.notes = notes
end
end
end
process_noun_forms_and_overrides(all_data, args, generate_type)
if args.json then
return require(json_module).toJSON(all_data)
end
return all_data
end
function export.do_generate_adj_forms(parent_args, pos, generate_type, degree, def)
local boolean = {type = "boolean"}
local params = {
= {required = true, default = def or "bonus"},
footnote = true,
title = true,
num = true,
noneut = boolean,
nomf = boolean,
json = boolean,
}
for slot in iter_adj_slots() do
params = true
end
if generate_type == "headword" then
local list = {list = true}
local sublist = {sublist = "/"}
params.lemma = list
params.adv = sublist
params.id = true
params.cat = list
params.indecl = boolean
if degree == "comparative" or degree == "superlative" then
params.positive = sublist
end
if degree ~= "comparative" then
params.comp = sublist
end
if degree ~= "superlative" then
params.sup = sublist
end
end
if pos == "numerals" then
params = true
end
local args = process_params(parent_args, params)
if args.title then
track("overriding-title")
end
local segment_run = args
if not segment_run:match("") then
-- If the segment run doesn't have any explicit declension specs or alternants,
-- add a default declension spec of <+> to it (or <0+> for indeclinable
-- adjectives). This allows the majority of adjectives to just specify
-- the lemma.
segment_run = segment_run .. (args.indecl and "<0+>" or "<+>")
end
local parsed_run = parse_segment_run_allowing_alternants(segment_run)
parsed_run.loc = parsed_run.loc or not not (
args.loc_sg_m or args.loc_sg_f or args.loc_sg_n or args.loc_pl_m or args.loc_pl_f or args.loc_pl_n
)
parsed_run.num = args.num or parsed_run.num
local declensions = decline_segment_run(parsed_run, pos, true)
if not parsed_run.loc then
declensions.forms.loc_sg_m = nil
declensions.forms.loc_sg_f = nil
declensions.forms.loc_sg_n = nil
declensions.forms.loc_pl_m = nil
declensions.forms.loc_pl_f = nil
declensions.forms.loc_pl_n = nil
end
declensions.title = construct_title(args.title, declensions.title, generate_type, parsed_run)
local all_data = {
title = declensions.title,
footnotes = {},
num = parsed_run.num or "",
propses = parsed_run.propses,
forms = declensions.forms,
unattested = declensions.unattested,
categories = declensions.categories,
notes = {},
user_specified = {},
accel = {},
loc = declensions.loc,
noneut = args.noneut or declensions.noneut,
nomf = args.nomf or declensions.nomf,
overriding_lemma = args.lemma,
positive = args.positive,
comp = args.comp,
sup = args.sup,
adv = args.adv,
id = args.id,
pos = pos,
cat = args.cat,
indecl = args.indecl,
num_type = args,
}
if generate_type ~= "bare" then
all_data.accel = {}
end
if args.footnote then
insert_if_not(all_data.footnotes, args.footnote)
end
for _, footnote in ipairs(declensions.footnotes) do
insert_if_not(all_data.footnotes, footnote)
end
for slot in iter_adj_slots() do
if declensions.notes then
for index, notes in pairs(declensions.notes) do
all_data.notes = notes
end
end
end
process_adj_forms_and_overrides(all_data, args, generate_type)
if args.json then
return require(json_module).toJSON(all_data)
end
return all_data
end
function export.show_noun(frame)
local parent_args = frame:getParent().args
local data = export.do_generate_noun_forms(parent_args, "nouns")
if type(data) == "string" then -- JSON
return data
end
show_forms(data, false)
local num = data.num
if num == "sg" then
return make_noun_table_sg(data)
elseif num == "pl" then
return make_noun_table_pl(data)
end
return make_noun_table(data)
end
function export.show_adj(frame)
local parent_args = frame:getParent().args
local data = export.do_generate_adj_forms(parent_args, "adjectives")
if type(data) == "string" then -- JSON
return data
end
partial_show_forms(data, true)
return make_adj_table(data)
end
return export