Module for working with Latin text.
Functions:
make_stem2(stem)
: Return third-declension stem based on nominative singular.See also:
local export = {}
local debug_track_module = "Module:debug/track"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local gsub = string.gsub
local ipairs = ipairs
local join -- defined below
local match = string.match
local normalize_form -- defined below
local pairs = pairs
local remove = table.remove
local require = require
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local type = type
local u = mw.ustring.char
local umatch = mw.ustring.match
local MACRON = u(0x304)
local VOWEL = ""
local function contains(...)
contains = require(table_module).contains
return contains(...)
end
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
local function decode_entities(...)
decode_entities = require(string_utilities_module).decode_entities
return decode_entities(...)
end
local function deep_equals(...)
deep_equals = require(table_module).deepEquals
return deep_equals(...)
end
local function insert_if_not(...)
insert_if_not = require(table_module).insertIfNot
return insert_if_not(...)
end
local function table_len(...)
table_len = require(table_module).length
return table_len(...)
end
local function trim(...)
trim = require(string_utilities_module).trim
return trim(...)
end
local function ugsub(...)
ugsub = require(string_utilities_module).gsub
return ugsub(...)
end
export.cases = {
= "nominative",
= "genitive",
= "dative",
= "accusative",
= "ablative",
= "vocative",
= "locative",
}
local cons_to_vowel = {
= "i", = "I",
= "u", = "U",
}
local function link_if_unlinked(text)
return match(text, "%]") and text or ugsub(text, "^(%s*)(.-)(%s*)$", "%1]%3")
end
function export.join(a, b)
a, b = toNFD(a), toNFD(b)
-- If the first part ends in "j" or "v", convert it to "i" or "u" unless
-- the second part begins with a vowel.
if not umatch(b, "^" .. VOWEL) then
a = gsub(a, "$", cons_to_vowel)
end
-- If there is a space between the two forms, link both parts separately.
if umatch(a, "%s$") or umatch(b, "^%s") then
a, b = link_if_unlinked(a), link_if_unlinked(b)
end
return toNFC(a .. b)
end
join = export.join
local function normalized_form_is_empty(form)
local cancelled_forms = require(table_module).listToSet{"", "-", "–", "—", "―", "⸺", "⸻"}
function normalized_form_is_empty(form)
return not (form and not cancelled_forms)
end
return normalized_form_is_empty(form)
end
function export.normalize_form(form)
if form == nil then
return nil
elseif normalized_form_is_empty(form) then
return "-"
end
local form_type = type(form)
if form_type ~= "table" then
if form_type == "string" then
form = trim(decode_entities(form))
end
return normalized_form_is_empty(form) and "-" or form
end
local i, subform = 1, form
while subform ~= nil do
subform = normalize_form(subform)
if normalized_form_is_empty(subform) then
remove(form, i)
else
-- Check against all earlier subforms, in case it's a duplicate.
-- Use a loop rather than a table lookup, as it's more efficient
-- when the number of subforms is less than 5, which is almost
-- always the case.
local duplicate = false
for j = 1, i - 1 do
if deep_equals(subform, form) then
duplicate = true
remove(form, i)
break
end
end
if not duplicate then
form = subform
i = i + 1
end
end
subform = form
end
local form_len = i - 1
if form_len == 0 then
return "-"
elseif form_len == 1 then
return form
end
return form
end
normalize_form = export.normalize_form
function export.form_is_empty(form)
return normalized_form_is_empty(normalize_form(form))
end
-- For a given form, we allow either strings (a single form) or lists of forms,
-- and treat strings equivalent to one-element lists.
function export.forms_equal(form1, form2)
return deep_equals(normalize_form(form1), normalize_form(form2))
end
function export.form_contains(form, str)
form = normalize_form(form)
if normalized_form_is_empty(form) then
return false
end
str = normalize_form(str)
if type(form) ~= "table" then
return deep_equals(form, str)
end
return contains(form, str)
end
-- Add a value to a given form key, e.g. "1s_pres_actv_indc". If the value is
-- already present in the key, it won't be added again.
--
-- The value is formed by concatenating `stem` and `suf`. `suf` can be a list,
-- in which case `stem` will be concatenated in turn to each value in the list
-- and all the resulting forms added to the key.
--
-- `pos` is the position to insert the form(s) at; default is at the end. To
-- insert at the beginning, specify 1 for `pos`.
do
local options = {}
local function _add_form(forms, slot, stem, suf, pos)
local curr_form = normalize_form(forms)
local new_form = normalize_form(suf == nil and stem or join(stem, suf))
if normalized_form_is_empty(curr_form) then
forms = new_form
return
elseif deep_equals(curr_form, new_form) then
forms = curr_form
return pos
elseif type(curr_form) ~= "table" then
curr_form = {curr_form}
end
options.pos = pos
local success = insert_if_not(curr_form, new_form, options)
forms = curr_form
return pos ~= nil and success and pos + 1 or pos
end
local function add_stem(forms, slot, stem, suf, pos)
if suf and type(suf) == "table" then
for _, s in ipairs(suf) do
pos = _add_form(forms, slot, stem, s, pos)
end
return pos
else
return _add_form(forms, slot, stem, suf, pos)
end
end
local function add_slot(forms, slot, stem, suf, pos)
-- Bound `pos` between 1 and the current number of forms + 1.
if pos then
local form = forms
if not form then
pos = nil
elseif pos <= 1 then
pos = 1
elseif not (type(form) == "table" and pos <= table_len(form)) then
pos = nil
end
end
if type(stem) == "table" then
for _, s in ipairs(stem) do
pos = add_stem(forms, slot, s, suf, pos)
end
else
add_stem(forms, slot, stem, suf, pos)
end
end
--[==[
Adds one or more forms to `forms`, which is a table of inflections:
* `slot` is the specifier for the form (e.g. "gen_sg"). This does not need to exist before this function is called.
* `stem` is the stem for the form. If `suf` is supplied, `stem` and `suf` will be combined using `exprt.join()` (which accounts for i/j and u/v alternation), and added to the relevant slot(s). If `suf` is not supplied, `stem` will be taken as the form.
* Any or all of `slot`, `stem` and `suf` can optionally be given as a list, in which case all stem/suffix combinations will be generated, which will be added to each of the listed slots. This is useful when adding multiple forms to the same slot, or when the forms are identical between slots (or both).
* If `pos` is supplied, the form(s) will be added to at the specified position in each slot specified.
Note that this function automatically handles duplicates, forms as strings, forms as lists, and cancelled forms (specified with "-").]==]
function export.add_form(forms, slot, stem, suf, pos)
if type(slot) == "table" then
for _, s in ipairs(slot) do
pos = add_slot(forms, s, stem, suf, pos)
end
else
add_slot(forms, slot, stem, suf, pos)
end
end
end
do
local check_keytypes
local function check_exceptions(slot, forms, keytypes, exceptions)
for _, keytype in ipairs(exceptions) do
if match(slot, keytype) then
return true
end
end
if keytypes then
check_keytypes(slot, forms, keytypes)
end
end
function check_keytypes(slot, forms, keytypes, exceptions)
for _, keytype in ipairs(keytypes) do
if match(slot, keytype) and not (exceptions and check_exceptions(slot, forms, nil, exceptions)) then
forms = nil
return
end
end
end
-- Remove all forms with a key matching any of the keys in the list
-- `keytypes`, unless they match any keytypes listed in `exceptions`.
function export.remove_forms(forms, keytypes, exceptions)
-- Check the shorter list first.
local func = (exceptions == nil or #exceptions >= #keytypes) and check_keytypes or check_exceptions
for slot in pairs(forms) do
func(slot, forms, keytypes, exceptions)
end
end
end
local patterns = {
{"a", "%0t"},
{"e", ""},
{"()()", "%1" .. MACRON .. "%2"},
{"l", "%0l"},
{"()en", "%1in"},
{"(ūd)ō", "%1in"},
{"()ō", "%1in"},
{"", "%1n"},
{"er", "r"},
{"s", ""},
{"(n)s", function(v)
return (gsub(toNFD(v), MACRON, "") .. "t")
end},
{"()eps", "%1ipit"},
{"()s", "%1"},
{"us", "or"},
{"s", "t"},
{"ex", "ic"},
{"x", "c"},
}
function export.make_stem2(stem)
local n
for _, pattern in ipairs(patterns) do
local key = pattern
stem, n = ugsub(stem, key .. "$", pattern)
if n > 0 then
debug_track("la-utilities/" .. key)
return toNFC(stem)
end
end
debug_track("la-utilities")
return stem
end
return export