-- fi_pron_inflect: Generates fi-pronunciation inputs for inflected forms.
--
-- Inputs:
-- lemma (string): The spelling of the lemma.
-- lemma_pron (string): The fi-pronunciation input for the lemma. May be an empty string or nil for no input, or shorthand * or (*).
-- infl_class (number): The inflection class: 1-49 for nominals (see ]), 52-78 for verbs (see ]).
-- grad_class (string | nil): The gradation class: nil for no gradation, or a letter A-M (see ]).
-- form_tag (string): The tag for the form; see the table below.
-- form (string): The spelling of the inflected form.
--
-- Outputs:
-- The fi-p input for the inflected form. May be shorthand, e.g. empty string
-- for no input, * for an input with only *, etc.
--
-- Note that the output may change based only on the form_tag, so the
-- pronunciation for each value must be checked separately and then merged
-- as needed.
--
-- Returns nil only if no input could be determined.
--[[
Allowed values for form_tag:
Nominals:
nom_sg: nominative singular
nom_pl: nominative plural
gen_sg: genitive singular
gen_pl: genitive plural
par_sg: partitive singular
par_pl: partitive plural
ine_sg: inessive singular
ine_pl: inessive plural
ela_sg: elative singular
ela_pl: elative plural
ill_sg: illative singular
ill_pl: illative plural
ade_sg: adessive singular
ade_pl: adessive plural
abl_sg: ablative singular
abl_pl: ablative plural
all_sg: allative singular
all_pl: allative plural
ess_sg: essive singular
ess_pl: essive plural
tra_sg: translative singular
tra_pl: translative plural
abe_sg: abessive singular
abe_pl: abessive plural
ins_sg: instructive singular
ins_pl: instructive plural
com_pl: comitative plural
Any of these can be suffixed with /1s, /2s, /1p, /2p, /3,
for possessive forms (first/second/third-person, singular/plural).
Verbs:
pres_1sg: indicative present, first-person singular
pres_2sg: indicative present, second-person singular
pres_3sg: indicative present, third-person singular
pres_1pl: indicative present, first-person plural
pres_2pl: indicative present, second-person plural
pres_3pl: indicative present, third-person plural
pres_conn: indicative present, non-passive connegative
pres_pasv: indicative present, passive
pres_pasv_conn: indicative present, passive connegative
past_1sg: indicative past, first-person singular
past_2sg: indicative past, second-person singular
past_3sg: indicative past, third-person singular
past_1pl: indicative past, first-person plural
past_2pl: indicative past, second-person plural
past_3pl: indicative past, third-person plural
past_pasv: indicative past, passive
cond_1sg: conditional, first-person singular
cond_2sg: conditional, second-person singular
cond_3sg: conditional, third-person singular
cond_1pl: conditional, first-person plural
cond_2pl: conditional, second-person plural
cond_3pl: conditional, third-person plural
cond_conn: conditional, non-passive connegative
cond_pasv: conditional, passive
cond_pasv_conn: conditional, passive connegative
impr_2sg: imperative, second-person singular
impr_3sg: imperative, third-person singular
impr_1pl: imperative, first-person plural
impr_2pl: imperative, second-person plural
impr_3pl: imperative, third-person plural
impr_conn: imperative, non-passive connegative
impr_pasv: imperative, passive
impr_pasv_conn: imperative, passive connegative
potn_1sg: potential, first-person singular
potn_2sg: potential, second-person singular
potn_3sg: potential, third-person singular
potn_1pl: potential, first-person plural
potn_2pl: potential, second-person plural
potn_3pl: potential, third-person plural
potn_conn: potential, non-passive connegative
potn_pasv: potential, passive
potn_pasv_conn: potential, passive connegative
inf1: first infinitive (lemma for verbs)
inf1_long/1s: long first infinitive, first-person singular possessive
inf1_long/2s: long first infinitive, second-person singular possessive
inf1_long/1p: long first infinitive, first-person plural possessive
inf1_long/2p: long first infinitive, second-person plural possessive
inf1_long/3: long first infinitive, third-person singular
inf2_ine: second active infinitive, inessive
inf2_ine/1s: second active infinitive, first-person singular possessive
inf2_ine/2s: second active infinitive, second-person singular possessive
inf2_ine/1p: second active infinitive, first-person plural possessive
inf2_ine/2p: second active infinitive, second-person plural possessive
inf2_ine/3: second active infinitive, third-person singular
inf2_ins: second active infinitive, instructive
inf2_pasv_ine: second passive infinitive, inessive
inf3_ine: third active infinitive, inessive
inf3_ela: third active infinitive, elative
inf3_ill: third active infinitive, illative
inf3_ade: third active infinitive, adessive
inf3_abe: third active infinitive, abessive
inf3_ins: third active infinitive, instructive
inf3_pasv_ins: third passive infinitive, instructive
inf4: -minen verbal noun
inf5/1s: fifth infinitive, first-person singular possessive
inf5/2s: fifth infinitive, second-person singular possessive
inf5/1p: fifth infinitive, first-person plural possessive
inf5/2p: fifth infinitive, second-person plural possessive
inf5/3: fifth infinitive, third-person singular
pres_part: present active participle
pres_pasv_part: present passive participle
past_part: past active participle
past_part_pl: past active participle, plural form
past_pasv_part: past passive participle
agnt_part: agent participle
nega_part: negative participle
]]--
local export = {}
local consonants = "bdfghjklmnprsštvxzž"
local vowels = "aeiouyäö"
-- inflected form ends in * (1) or (*) (2)
local infl_finalgem = {
= 1,
= 1,
= 1,
= 1,
= 1,
= 1,
= 1,
= 1,
= 1,
= 1,
= 2,
}
-- nominal forms with weak grade
local infl_grad_weak = {
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
}
-- infl classes with reverse gradation
local infl_grad_reverse = {
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
}
-- nominal forms have weak grade
-- infl class 69: ts is weak
local infl69_ts_weak = {
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
}
-- Split form_tag into primary form and possessive suffix.
local function split_tag_decl(form_tag)
if form_tag:find("/") then
return mw.ustring.match(form_tag, "(*)/(.*)")
else
return form_tag, nil
end
end
-- Wye: for the lemma and the inflected form, returns the common prefix
-- and the two suffixes.
local function wye(lemma, form)
local n = 1
while mw.ustring.sub(lemma, n, n) == mw.ustring.sub(form, n, n) do
n = n + 1
end
return mw.ustring.sub(lemma, 1, n - 1), mw.ustring.sub(lemma, n), mw.ustring.sub(form, n)
end
local unravel_spelling_ignore = {
= true, = true, = true, = true
}
-- Unravel fi-p to remove a suffix. Returns nil in case of error.
local function pron_unravel(pron, suf)
local n = mw.ustring.len(suf)
while true do
-- always remove these before
if mw.ustring.match(pron, "$") then
pron = mw.ustring.gsub(pron, "%(%*%)$", "")
pron = mw.ustring.gsub(pron, "%*$", "")
end
if n == 0 then
break
end
local c = mw.ustring.sub(suf, n, n)
if unravel_spelling_ignore then
-- do nothing more
elseif mw.ustring.match(c, "") then
if c == "d" then
-- either D gets removed
mask = ""
else
mask = c
end
if mw.ustring.match(pron, mask .. "$") then
pron = mw.ustring.sub(pron, 1, -2)
else
return nil -- unexpected character
end
if c == "s" then
-- remove ts specs
pron = mw.ustring.gsub(pron, "$", "")
end
else
return nil -- unsupported character
end
n = n - 1
-- always remove these after
if mw.ustring.match(pron, "$") then
pron = mw.ustring.gsub(pron, "%(%.%)$", "")
pron = mw.ustring.gsub(pron, "%.$", "")
end
end
return pron
end
-- Wye fi-p: given the fi-p for the lemma, the lemma and the inflected form,
-- return the common fi-p prefix, lemma suffix and inflected form suffix.
-- The common prefix is nil in case of error.
local function pron_wye(lemma_pron, lemma, form)
local common, lemma_suf, form_suf = wye(lemma, form)
local prefix = pron_unravel(lemma_pron, lemma_suf)
-- add an <i> if the spelling ends in a vowel and the pronunciation does not (unadapted loanwords)
if prefix and lemma_suf == "" and mw.ustring.match(prefix, "$") and mw.ustring.match(common, "$") then
prefix = prefix .. "i"
end
return prefix, lemma_suf, form_suf
end
local diphthongs_pure = {
= true, = true, = true,
= true, = true, = true, = true,
= true, = true, = true, = true,
= true, = true, = true, = true,
= true, = true, = true,
}
local function ts_fix(pron, weak_grade)
if weak_grade then
-- final ts > t!s
return mw.ustring.gsub(pron, "ts(*)$", "t!s%1")
else
-- final t!s > ts
return mw.ustring.gsub(pron, "t%!s(*)$", "ts%1")
end
end
-- adds/removes syllable breaks
local function dotter(pron, grad_class, lemma_suf, form_suf)
if not mw.ustring.match(form_suf, "^") then
-- remove trailing dot if the suffix does not start with a vowel
pron = mw.ustring.gsub(pron, "%(%.%)$", "")
end
if grad_class == "D" and mw.ustring.match(lemma_suf, "^k") and not mw.ustring.match(form_suf, "^k") then
-- may need to add trailing dot if we have a diphthong
local m_fi_p = require("Module:fi-pronunciation")
local partial = pron .. mw.ustring.sub(form_suf, 1, 1)
local syl = m_fi_p.syllabify(partial)
local syl_last = syl
if diphthongs_pure then
pron = pron .. "(.)"
end
end
return pron
end
local function reapply(prefix, suffix, ts_weak)
suffix = mw.ustring.gsub(suffix, "d", "D")
if ts_weak then suffix = mw.ustring.gsub(suffix, "ts", "t!s") end
return prefix .. suffix
end
function export.fi_pron_inflect(lemma, lemma_pron, infl_class, grad_class, form_tag, form)
local infl, poss = split_tag_decl(form_tag)
-- undo shorthand
lemma_pron = lemma_pron or ""
if mw.ustring.match(lemma_pron, "^%(?%*?%)?$") then lemma_pron = lemma .. lemma_pron end
-- wye split
local pron, lemma_suf, form_suf = pron_wye(lemma_pron, lemma, form)
if pron == nil then return nil end
-- nom_sg and nom_pl are gen_sg with possessive
if poss and infl:match("^nom_") then
infl = "gen_sg"
end
if lemma_pron:match("%]$") then
error("syllabification hints must currently be dealt with manually")
end
-- ts fixes for nominals
if 1 <= infl_class and infl_class <= 49 and mw.ustring.find(lemma, "ts") then
local reverse_gradation = not not infl_grad_reverse
local grade_weak = not not infl_grad_weak
-- strong grade for nom/gen_sg with possessive
if poss and infl == "gen_sg" then grade_weak = false end
grade_weak = grade_weak ~= reverse_gradation
pron = ts_fix(pron, grade_weak)
end
-- apply form_suf to pron
local final = reapply(dotter(pron, grad_class, lemma_suf, form_suf), form_suf, infl_class == 69 and infl69_ts_weak)
if final == form then
-- shorthand
final = ""
end
if poss == "3" and mw.ustring.match(form, "ns$") then
final = final .. "*"
else
local finalgem = infl_finalgem
if finalgem == 1 then
final = final .. "*"
elseif finalgem == 2 then
final = final .. "(*)"
end
end
return final
end
return export