local export = {}
local romut_module = "Module:romance utilities"
local u = mw.ustring.char
local rsplit = mw.text.split
local rsubn = mw.ustring.gsub
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local TEMPC1 = u(0xFFF1)
local TEMPC2 = u(0xFFF2)
local TEMPV1 = u(0xFFF3)
local DIV = u(0xFFF4)
local unaccented_vowel = "aeiouüAEIOUÜ"
local accented_vowel = "áéíóúýÁÉÍÓÚÝ"
local vowel = unaccented_vowel .. accented_vowel
local V = ""
export.V = V
local AV = ""
export.AV = AV
local NAV = ""
export.NAV = NAV
local W = "" -- glide
export.W = W
local C = ""
export.C = C
local remove_accent = {
="a", ="e", ="i", ="o", ="u", ="y",
="A", ="E", ="I", ="O", ="U", ="Y",
}
export.remove_accent = remove_accent
local add_accent = {
="á", ="é", ="í", ="ó", ="ú", ="ý",
="Á", ="É", ="Í", ="Ó", ="Ú", ="Ý",
}
export.add_accent = add_accent
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
export.rsub = rsub
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
export.rsubb = rsubb
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
export.rsub_repeatedly = rsub_repeatedly
function export.remove_final_accent(stem)
return rsub(stem, "(" .. AV .. ")(" .. C .. "*)$", function(v, c) return (remove_accent or v) .. c end)
end
function export.add_final_accent(stem)
return rsub(stem, "(" .. NAV .. ")(" .. C .. "*)$", function(v, c) return (add_accent or v) .. c end)
end
local prepositions = {
-- a + optional article
"a ",
"ás? ",
"aos? ",
-- con + optional article
"con ",
"coa?s? ",
-- de + optional article
"de ",
"ds? ",
"d'",
-- en/em + optional article
"en ",
"ns? ",
-- por + optional article
"por ",
"pols? ",
-- para + optional article
"para ",
"prs? ",
-- others
"at ",
"com ",
"entre ",
"sen ",
"so ",
"sobre ",
}
local function call_handle_multiword(term, special, make_fun, fun_name)
local retval = require(romut_module).handle_multiword(term, special, make_fun, prepositions)
if retval then
if #retval ~= 1 then
error("Internal error: Should have one return value for " .. fun_name .. ": " .. table.concat(retval, ","))
end
return retval
end
return nil
end
local function make_try(word)
return function(from, to)
local newval, changed = rsubb(word, from, to)
if changed then
return newval
end
return nil
end
end
function export.make_plural(term, special)
local retval = call_handle_multiword(term, special, export.make_plural, "make_plural")
if retval then
return retval
end
local try = make_try(term)
-- Based on https://www.lingua.gal/c/document_library/get_file?file_path=/portal-lingua/celga/celga-1/material-alumno/Manual_Aula_de_Galego_1_resumo_gramatical.pdf
return try("r$", "res") or
try("z$", "ces") or
try("(" .. V .. "be)l$", "%1is") or -- vowel + -bel
try("(" .. AV .. ".*" .. V .. "l)$", "%1es") or -- non-final stress + -l e.g. ] -> 'túneles'
try("^(" .. C .. "*" .. V .. C .. "*l)$", "%1es") or -- monosyllable ending in -l e.g. ] -> 'soles'
try("il$", "ís") or -- final stressed -il e.g. ] -> 'civís'
try("(" .. V .. ")l$", "%1is") or -- any other vowel + -l e.g. ] -> 'papeis'
try("(" .. V .. ")s$", "%1ses") or -- vowel + stressed í/ú + -s e.g. ] -> 'países'
try("(" .. AV .. ")s$", -- other final accented vowel + -s e.g. ] -> 'autobuses'
function(av) return remove_accent .. "ses" end) or
try("(" .. V .. "?s)$", "%1es") or -- diphthong + final -s e.g. ] -> 'deuses'
try("^(C" .. "*" .. V .. "s)$", "%1es") or -- monosyllable + final -s e.g. ] -> 'froses', ] -> 'gases'
try("()$", "%1") or -- other final -s or -x (stressed on penult or antepenult or ending in cluster), e.g.
-- ], ], ], ], ]
term .. "s" -- ending in vowel, -n or other consonant e.g. ], ], ], ],
-- ], ], ], ]
end
function export.make_feminine(term, is_noun, special)
local retval = call_handle_multiword(term, special, function(term) return export.make_feminine(term, is_noun) end,
"make_feminine")
if retval then
return retval
end
local try = make_try(term)
-- Based on https://www.lingua.gal/c/document_library/get_file?file_path=/portal-lingua/celga/celga-1/material-alumno/Manual_Aula_de_Galego_1_resumo_gramatical.pdf
return
try("o$", "a") or
try("º$", "ª") or -- ordinal indicator
try("^(" .. C .. "*)u$", "%1úa") or -- ] -> núa, ] -> crúa
try("eu$", "ía") or -- ] -> sandía, ] -> xudía
-- many nouns and adjectives in -án:
-- ], ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ]
--
-- but not (instead in -ana):
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ],
-- ], ], ], ], ], ], ]
try("án$", "á") or
-- nouns in -z e.g. ]; but not ], ], ], etc.
-- only such adjective is ] -> andaluza, ] -> rapaza
is_noun and try("z$", "za") or
try("ín$", "ina") or -- ], ], ], ], ], ],
-- ]; but not ], ], ]
-- ], ], ], etc.
--
-- but not (instead in -oa): ], ], ], ], ], ],
-- ], ], ], ], ], ] (also fem. ladra), ],
-- ], ], ], ], ], ]
--
-- but not (invariable in singular): ], ], ], ], ], ],
-- ]
try("ón$", "ona") or
try("és$", "esa") or -- ], ], ], ] etc.
-- but not ], ]
-- adjectives in:
-- * ], ] (], ], etc.), ] (], etc.)
-- * -tor (], ], ], etc.)
-- * -sor (], ], ], etc.)
-- but not:
-- * ]/]/]/]/]/]/]/etc.
-- * ]/]/etc.
try("(or)$", "%1a") or
term
end
function export.make_masculine(term, special)
local retval = call_handle_multiword(term, special, export.make_masculine, "make_masculine")
if retval then
return retval
end
local try = make_try(term)
return
try("()ora$", "%1or") or
try("a$", "o") or
-- ordinal indicator
try("ª$", "º") or
term
end
-- Syllabify a word. This is copied and modified from ] and attempts to implements a full
-- syllabification algorithm, based on the corresponding code in ]. This is more than is needed for
-- the purpose of this module, which doesn't care so much about syllable boundaries, but won't hurt.
function export.syllabify(word)
word = DIV .. word .. DIV
-- gu/qu + front vowel; make sure we treat the u as a consonant; a following i should not be treated as a consonant
-- (may make no difference for Galician; necessary in Spanish for ])
word = rsub(word, "()u()", "%1" .. TEMPC2 .. "%2")
local vowel_to_glide = { = TEMPC1, = TEMPC2 }
-- i and u between vowels should behave like consonants (], ]); Spanish also has ],
-- ], ], etc. not in Galician
word = rsub_repeatedly(word, "(" .. V .. ")()(" .. V .. ")",
function(v1, iu, v2) return v1 .. vowel_to_glide .. v2 end
)
-- y between consonants or after a consonant at the end of the word should behave like a vowel
-- (], ], ], ], etc.)
word = rsub_repeatedly(word, "(" .. C .. ")y(" .. C .. ")",
function(c1, c2) return c1 .. TEMPV1 .. c2 end
)
word = rsub_repeatedly(word, "(" .. V .. ")(" .. C .. W .. "?" .. V .. ")", "%1.%2")
word = rsub_repeatedly(word, "(" .. V .. C .. ")(" .. C .. V .. ")", "%1.%2")
word = rsub_repeatedly(word, "(" .. V .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
word = rsub(word, "()%.()", ".%1%2")
word = rsub_repeatedly(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
-- Any aeo, or stressed iu, should be syllabically divided from a following aeo or stressed iu.
word = rsub_repeatedly(word, "()()", "%1.%2")
word = rsub_repeatedly(word, "()()", "%1.%2")
word = rsub_repeatedly(word, "()()", "%1.%2")
word = rsub(word, "()", {
= "",
= "i",
= "u",
= "y",
})
return rsplit(word, "%.")
end
-- Return the index of the (last) stressed syllable.
function export.stressed_syllable(syllables)
-- If a syllable is stressed, return it.
for i = #syllables, 1, -1 do
if rfind(syllables, AV) then
return i
end
end
-- Monosyllabic words are stressed on that syllable.
if #syllables == 1 then
return 1
end
local i = #syllables
-- Unaccented words ending in a vowel or a vowel + n/s/ns are stressed on the preceding syllable.
if rfind(syllables, V .. "n?s?$") then
return i - 1
end
-- Remaining words are stressed on the last syllable.
return i
end
-- Add an accent to the appropriate vowel in a syllable, if not already accented.
function export.add_accent_to_syllable(syllable)
-- Don't do anything if syllable already stressed.
if rfind(syllable, AV) then
return syllable
end
-- Prefer to accent an a/e/o in case of a diphthong or triphthong (the first one if for some reason
-- there are multiple, which should not occur with the standard syllabification algorithm);
-- otherwise, do the first i or u in case of a diphthong ui or iu.
if rfind(syllable, "") then
return rsub(syllable, "^(.-)()", function(prev, v) return prev .. add_accent end)
end
return rsub(syllable, "^(.-)()", function(prev, v) return prev .. add_accent end)
end
-- Remove any accent from a syllable.
function export.remove_accent_from_syllable(syllable)
return rsub(syllable, AV, remove_accent)
end
-- Return true if an accent is needed on syllable number `sylno` if that syllable were to receive the stress,
-- given the syllables of a word. The current accent may be on any syllable.
function export.accent_needed(syllables, sylno)
-- Diphthongs iu and ui are normally stressed on the first vowel, so if the accent is on the second vowel,
-- it's needed.
if rfind(syllables, "iú") or rfind(syllables, "í") then
return true
end
-- If the default-stressed syllable is different from `sylno`, accent is needed.
local unaccented_syllables = {}
for _, syl in ipairs(syllables) do
table.insert(unaccented_syllables, export.remove_accent_from_syllable(syl))
end
local would_be_stressed_syl = export.stressed_syllable(unaccented_syllables)
if would_be_stressed_syl ~= sylno then
return true
end
-- At this point, we know that the stress would by default go on `sylno`, given the syllabification in
-- `syllables`. Now we have to check for situations where removing the accent mark would result in a
-- different syllabification. For example, países -> `pa.i.ses` but removing the accent mark would lead
-- to `pai.ses`. Similarly, río -> `ri.o` but removing the accent mark would lead to single-syllable `rio`.
-- We need to check whether (a) the stress falls on an i or u; (b) in the absence of an accent mark, the
-- i or u would form a diphthong with a preceding or following vowel and the stress would be on that vowel.
-- The conditions are slightly different when dealing with preceding or following vowels because iu and ui
-- diphthongs are by default stressed on the first vowel.
local accented_syllable = export.add_accent_to_syllable(unaccented_syllables)
if sylno > 1 then
if rfind(unaccented_syllables, "$") and rfind(accented_syllable, "^í") or
rfind(unaccented_syllables, "$") and rfind(accented_syllable, "^ú") then
return true
end
end
if sylno < #syllables and rfind(accented_syllable, "$") and rfind(unaccented_syllables, "^") then
return true
end
return false
end
-- FIXME: Next two copied from ]. Move to a utilities module.
-- Add links around words. If multiword_only, do it only in multiword forms.
function export.add_links(form, multiword_only)
if form == "" or form == " " then
return form
end
if not form:find("%[%[") then
if rfind(form, "") then --optimization to avoid loading ] on single-word forms
local m_headword = require("Module:headword")
if m_headword.head_is_multiword(form) then
form = m_headword.add_multiword_links(form)
end
end
if not multiword_only and not form:find("%[%[") then
form = "]"
end
end
return form
end
function export.strip_redundant_links(form)
-- Strip redundant brackets surrounding entire form.
return rmatch(form, "^%]*)%]%]$") or form
end
return export