This module is not to be directly used. It is used by {{la-IPA}}
, see there for usage.
local export = {}
local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local lang = require("Module:languages").getByCode("la")
local concat = table.concat
local deep_equals = m_table.deepEquals
local gsplit = m_str_utils.gsplit
local insert = table.insert
local invert = m_table.invert
local list_to_set = m_table.listToSet
local remove = table.remove
local rfind = m_str_utils.find
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local umatch = m_str_utils.match
local MACRON = u(0x304) -- ̄
local BREVE = u(0x306) -- ̆
local TREMA = u(0x308) -- ̈
local LENGTH = MACRON .. "?" .. BREVE .. "?" .. TREMA .. "?"
local TIE = u(0x361) -- ͡
local VOWELS = "aeɛiɪoɔuʊyʏ"
local VOWEL = ""
local TILDE = u(0x303) -- ̃
local HALF_LONG = "ˑ"
local LONG = "ː"
local letters_ipa = {
= "aː", = "eː", = "iː", = "oː", = "uː", = "yː",
= "ae̯", = "au̯", = "ei̯", = "eu̯", = "oe̯", = "uː",
= "k", = "ɡ", = "k", = "w", = {"k", "s"},
= "pʰ", = "tʰ", = "kʰ", = "rʰ",
= "kʷ", = "ɡʷ", = "sʷ",
= "ˈ",
}
-- Only includes changes from letters_ipa above.
local letters_ipa_eccl = require("Module:table/setParent")({
= "i", = "iː",
= "eː", = "eː",
= "c", -- becomes /k/, /t͡ʃ/ or /ʃ/
= "v",
= "f",
}, letters_ipa)
local lax_vowel = {
= "ɛ",
= "ɪ",
= "ɔ",
= "ʊ",
-- No evidence for this, and Greek did not have a near-close lax front-rounded vowel as far as we can tell
-- = "ʏ",
}
local tense_vowel = invert(lax_vowel)
local voicing = {
= "b",
= "d",
= "ɡ",
}
local devoicing = invert(voicing)
local phonetic_rules = {
-- Bibliography included at the end
-- Assimilation of to before a following /n/
{"ɡ(*)n", "ŋ%1n"},
-- Per Allen (1978: 23), although note the reservations expressed on the next page.
-- Assimilation of word-internal /n/ and /m/ to following consonants. Exception: /m/ does not assimilate to a following /n/.
{"(*)()", "ŋ%1%2"},
{"m(*)()", "n%1%2"},
{"n(*)()", "m%1%2"},
-- Per George M. Lane: “Nasals changed their place of articulation to that of the following consonant. Thus, dental n before the labials p and b became the labial m... labial m before the gutturals c and g became guttural n...labial m before the dentals t, d, s became dental n...” (§164.3); “One nasal, n, is assimilated to another, m...but an m before n is never assimilated..." (§166.5). -- Per Lloyd (1987: 84): “The opposition between nasals was neutralized in syllable-final position, with the realization of the nasality being assimilated to the point of articulation of the following consonant, e.g., is found only before labials, only before dentals or alveolars, and only before velars and /n/."
-- Potential addition: assimilation of final /m/ and /n/ across word boundaries, per e.g. Allen (1987: 28, 31).
-- No additional labialization before high back vowels
{"ʷ%f", ""},
-- Tensing of short vowels before another vowel
{
"(" .. VOWEL .. ")(+?)%f" .. VOWEL,
function (v, following)
return (tense_vowel or v) .. following
end,
},
-- But not before consonantal glides
{"e(̯)", "ɛ%1"},
-- Nasal vowels
{
"(" .. VOWEL .. ")(" .. LONG .. "?)m$",
function (v, long)
-- 2025-05-15: Change per ]
if true then -- long == LONG then
return (tense_vowel or v) .. TILDE .. LONG
end
return (lax_vowel or v) .. TILDE .. HALF_LONG
end,
},
{
"(" .. VOWEL .. ")(*)",
function (v, following)
return (tense_vowel or v) .. TILDE .. LONG .. following
end,
},
-- Realization of /r/ as a tap
-- Pultrová (2013) argues for Latin /r/ being an alveolar tap.
-- Lloyd (1987: 81) agrees: “The /r/ was doubtlessly an alveolar flap."
-- Allen (1978: 33) expresses doubt: “By the classical period there is no reason to think that the sound had not strengthened to the trill described by later writers.”
-- Unconditional transcription is preferable to unconditional per 18 September 2021 discussion at ]
-- No consensus yet on how to implement conditional allophony of vs.
-- Voicing and loss of intervocalic /h/.
{"(.)h", "%1(ɦ)"},
-- Per Allen (1978: 43–45).
-- Phonetic (as opposed to lexical/phonemic) assimilations
-- Place
-- First because this accounts for 'atque' seemingly escaping total assimilation (and 'adque' presumably not)
{"d(*s%f)", "s%1"}, -- leave out since etsi has , not
{"s(*)s%f", "s(ː)%1"},
{"st(+)()", "s(t)%1%2"},
{"d(+)()", "%2%1%2"}, --leave out since dr does not assimilate, even when heterosyllabic (e.g. quadrans), except in prefixed words
{"b(+)()", "%2%1%2"},
{"s(+)(f)", "%2%1%2"},
-- Regressive voicing assimilation in consonant clusters
{
"()(*)%f",
function (consonant, following)
return (devoicing or consonant) .. following
end,
},
{
"()(*)%f",
function (consonant, following)
return (voicing or consonant) .. following
end,
},
-- 2025-05-15: Numerous changes per ]:
-- (1) simplify l-pinguis vs. l-exilis to just (formerly ) vs. (formerly in some circumstances);
-- consider further simplifying further to use before non-high-front vowels
-- (2) don't mark dental or alveolar notations on coronals
-- (3) don't mark centralized ä on
-- Allophones of /l/
{"l", "ɫ"},
-- “Pinguis”. Dark/velarized.
-- Per Weiss (2009: 117): “... pinguis (velar). l is exīlis before i and when geminate, otherwise l is pinguis.”
-- Page 82: “... l is pinguis even before e, e.g. Herculēs < Hercolēs ... < Hercelēs ...”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-; l pinguis occurred before any other vowel; before any consonant except l; and in word-final position l pinguis actually had two degrees of avoirdupois, being fatter before a consonant than before a vowel...”
-- Page 41: “... velarized l (that is, ‘l pinguis’)...”
-- Sen (2015: §2) states that /l/ was velarized in word-final position or before consonants–other than another /l/–and that it had varying degrees of “dark resonance (velarization in articulatory terms)” (p. 23) before e, a, o, and u (p. 33).
-- Both Sen and Sihler indicate different degrees of velarization, depending on the environment. IPA lacks a way to represent these gradations, unfortunately.
{"ɫ(*)ɫ", "l%1l"},
{"ɫ(*)", "l%1"},
-- “Exīlis”. Not dark/velarized. Possibly palatalized.
-- Per Sen (2015: 29): It is plausible that simple onset /l/ was palatalized before /i/, thus it seems likely that geminate /ll/ was also palatalized, given the similar behaviour of the two...”
-- Per Weiss (2009: 82): “In Latin, l developed... a non-velar (possibly palatal) allophone called exīlis before i and when geminate...”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-.”
-- Per Sihler (2000: §133.1): "It is less clear whether the 'thin' lateral was specifically palatal, or palatalized, or only neutral."
-- Giannini and Marotta apparently argue that it was not palatalized (https://i.imgur.com/ytM1QDn.png). I do not have access to the book in question.
-- Retracted /s/
-- {"s", "s̠"},
-- Lloyd (1987: 80–81) expresses some uncertainty about this, but appears to overall be in favour of it: “... the evidence that the apico-alveolar pronunciation was ancient in Latin and inherited from Indo-European is quite strong.”
-- Per Zampaulo (2019: 93), “... in many instances, Latin s was likely pronounced as an apical segment (rather than laminal )."
-- Per Widdison (1987: 64), "In all, it would be fair to state that the apico-alveolar articulation represented the main allophonic variant of Latin and possibly IE /s/..."
-- dental Z
-- {"z", "z̪"},
-- Dental articulations
-- {"", "%0̪"} ,
-- {"n(*)", "n̪%1"}, --it's not as clear as for the stops
--Allophones of A
-- {"a", "ä"},
-- Works cited
-- Allen, William Sidney. 1978. Vox Latina: A Guide to the pronunciation of Classical Latin.
-- Lane, George M. A Latin grammar for schools and colleges.
-- Lloyd, Paul M. 1987. From Latin to Spanish.
-- Pultrová, Lucie. 2013. On the phonetic nature of the Latin R.
-- Sen, Ranjan. 2015. Syllable and segment in Latin.
-- Sihler, Andrew L. 1995. New comparative grammar of Greek and Latin.
-- Sihler, Andrew L. 2000. Language history: An introduction.
-- Weiss, Michael. 2009. Outline of the historical and comparative grammar of Latin.
-- Widdison, Kirk A. 16th century Spanish sibilant reordering: Reasons for divergence.
-- Zampaulo, André. 2019. Palatal Sound Change in the Romance languages: Diachronic and Synchronic Perspectives.
}
local phonetic_rules_eccl = {
-- Specifically the Roman Ecclesiastical for singing from the Liber Usualis
{"(*)s(*)%f", "%1s̬%2"}, --partial voicing of s between vowels
{"s(*)%f", "z%1"}, --full voicing of s before voiced consonants
{"ek(*)s()", "eɡ%1z%2"}, --voicing of the prefix ex-
{"kz", "ɡz"},
-- Tapped R intervocalically and in complex onset
-- ^ Citation needed for this being the case in Ecclesiastical pronunciation
-- {"(+)r(?)", "%1ɾ%2"},
-- {"()r", "%1ɾ"},
-- Dental articulations
{"()(*)", "%1̪%2"}, --assimilation of n to dentality.
--Note that the quality of n might not be dental otherwise--it may be alveolar in most contexts in Italian, according to Wikipedia.
{"()()", "%1̪%2"}, --t and d are dental, except as the first element of a palatal affricate
{"t͡s", "t̪͡s̪"}, -- dental affricates
{"d͡z", "d̪͡z̪"}, --dental affricates
{"t̪(*t͡ʃ)", "t%1"},
{"d̪(*d͡ʒ)", "d%1"},
--end of words
{"()t$", "%1̪t̪"},
{"()$", "%1̪"},
--Partial assimilation of l and n before palatal affricates, as in Italian
{"()(*t͡ʃ)", "%1̠ʲ%2"},
{"()(*d͡ʒ)", "%1̠ʲ%2"},
{"()(*ʃ)", "%1̠ʲ%2"},
-- other coda nasal assimilation, full and partial. Per Canepari, only applies to /n/ and not to /m/
{"n(*)", "ŋ%1"},
{"n(*)", "ɱ%1"},
}
local lengthen_vowel = {
= "aː", = "aː",
= "ɛː", = "ɛː",
= "eː", = "eː",
= "iː", = "iː",
= "ɔː", = "ɔː",
= "oː", = "oː",
= "uː", = "uː",
= "aːu̯",
= "ɛːu̯",
= "eːu̯",
}
local vowels = list_to_set{
"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
"ae̯", "au̯", "ei̯", "eu̯", "oe̯", "ou̯",
}
local onsets = {
"p", "pʰ", "b",
"t", "tʰ", "d",
"k", "kʰ", "kʷ", "ɡ", "ɡʷ",
"s", "sʷ", "z", "f", "v", "h",
"t͡s", "d͡z", "t͡ʃ", "d͡ʒ", "ʃ",
"l", "r", "rʰ",
"m", "n", "ɲ",
"j", "w",
"pl", "pʰl", "bl",
"kl", "kʰl", "ɡl",
"fl",
"pr", "pʰr", "br",
"tr", "tʰr", "dr",
"kr", "kʰr", "ɡr",
"fr",
}
for i = 1, #onsets do
local v = onsets
if umatch(v, "^*?$") then
insert(onsets, "s" .. v)
end
end
onsets = list_to_set(onsets)
local codas = list_to_set{
"p", "pʰ", "b",
"t", "tʰ", "d",
"k", "kʰ", "ɡ",
"s", "z", "f",
"ʃ",
"l", "r",
"m", "n", "ɲ",
"j",
"ps", "ts", "ks",
"sp", "st", "sk",
"spʰ", "stʰ", "skʰ",
"lp", "lpʰ", "lb", "lps",
"lt", "ltʰ", "ld",
"lk", "lkʰ", "lɡ", "lks",
"ls",
"lm", "ln", "lms", "lns",
"rp", "rpʰ", "rb", "rps",
"rt", "rtʰ", "rd",
"rk", "rkʰ", "rɡ", "rks",
"rs",
"rl", "rls",
"rm", "rn", "rms", "rns",
"mp", "mpʰ", "mb", "mps",
"nt", "ntʰ", "nd",
"nk", "nkʰ", "nɡ", "nks",
"ns",
}
-- Prefixes that end in a consonant; can be patterns. Occurrences of such
-- prefixes + i + vowel cause the i to convert to j (to suppress this, add a
-- dot, i.e. syllable boundary, after the i).
local cons_ending_prefixes = {
"a", "circum", "con", "dis", "ex", "inter", "in", "ob", "per",
"subter", "sub", "super", "trns"
}
local macrons_to_breves = {
= "ă",
= "ĕ",
= "ĭ",
= "ŏ",
= "ŭ",
-- Unicode doesn't have breve-y
= "y" .. BREVE,
}
local function normalize_ligatures(ligature, diacritic)
return (ligature == "æ" and "a" or "o") .. diacritic .. "e"
end
-- NOTE: Everything is lowercased very early on, so we don't have to worry
-- about capitalized letters.
-- FIXME: handle ǟë̄ï̄ȫǖÿ̄ etc.
local short_vowels_string = "aeiouyăĕĭŏŭäëïöüÿ" -- no breve-y in Unicode
local long_vowels_string = "āēīōūȳ"
local vowels_string = short_vowels_string .. long_vowels_string
local vowels_c = ""
local non_vowels_c = ""
local function track(page)
require("Module:debug/track")("la-IPA/" .. page)
return true
end
local function remove_diacritic(word, ch)
return toNFC((ugsub(toNFD(word), ch, "")))
end
local function match_phoneme(ch, pattern)
return ch and umatch(ch, pattern) and true or false
end
local function letters_to_ipa(word, phonetic, eccl)
local ph = {}
local dictionary = eccl and letters_ipa_eccl or letters_ipa
while ulen(word) > 0 do
local longestmatch = ""
for letter in pairs(dictionary) do
local letter_len = ulen(letter)
if letter_len > ulen(longestmatch) and usub(word, 1, letter_len) == letter then
longestmatch = letter
end
end
if ulen(longestmatch) > 0 then
local ipa = dictionary
if type(ipa) == "table" then
for _, phoneme in ipairs(ipa) do
insert(ph, phoneme)
end
else
insert(ph, ipa)
end
word = usub(word, ulen(longestmatch) + 1)
else
insert(ph, usub(word, 1, 1))
word = usub(word, 2)
end
end
if eccl then
local front_vowel = "^"
local i, n = 1, #ph
while i <= n do
local cur = ph
if cur == "c" then -- c, but not k/q/x
ph = "k" -- default
local nxt = ph
if nxt == "c" then -- cc{e|i}: t.t͡ʃ
if match_phoneme(ph, front_vowel) then
ph, ph = "t", "t͡ʃ"
i = i + 2
end
elseif match_phoneme(nxt, front_vowel) then -- c{e|i}: t͡ʃ
ph = "t͡ʃ"
i = i + 1
end
elseif cur == "ɡ" then
local nxt = ph
if nxt == "ɡ" then -- gg{e|i}: d.d͡ʒ
if match_phoneme(ph, front_vowel) then
ph, ph = "d", "d͡ʒ"
i = i + 2
end
elseif nxt == "n" then -- gn: ɲ.ɲ
ph, ph = "ɲ", "ɲ"
i = i + 1
elseif match_phoneme(nxt, front_vowel) then -- g{e|i}: d͡ʒ
ph = "d͡ʒ"
end
elseif cur == "h" then
ph = ""
elseif cur == "m" then -- mihī̆ = michī̆
if ph == "i" and ph == "h" and match_phoneme(ph, "^ː?$") then
ph = letters_ipa_eccl
i = i + 3
end
elseif cur == "n" then -- nihil = nichil
if ph == "i" and ph == "h" and ph == "i" and ph == "l" then
ph = letters_ipa_eccl
i = i + 4
end
elseif cur == "s" then -- sc{e|i}: ʃ.ʃ
if ph == "c" and match_phoneme(ph, front_vowel) then
ph, ph = "ʃ", "ʃ"
i = i + 2
end
elseif cur == "t" then -- ti{V}: t͡si
if match_phoneme(ph, "^") and not match_phoneme(ph, "ʰ?$") and vowels] then
ph = "t͡s"
i = i + 2
end
elseif cur == "z" then
if ph == "z" then -- zz: d.d͡z
ph, ph = "d", "d͡z"
i = i + 1
else -- z: d͡z
ph = "d͡z"
end
end
i = i + 1
end
end
return ph
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels] then
break
end
if syll ~= "ˈ" then
insert(consonants, syll)
end
end
return concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels] then
break
end
insert(consonants, 1, syll)
end
return concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels] then return syll end
end
end
-- Split the word into syllables of CV shape
local function split_syllables(remainder)
local syllables, syll = {}, {}
for _, phoneme in ipairs(remainder) do
if phoneme == "." then
if #syll > 0 then
insert(syllables, syll)
syll = {}
end
-- Insert a special syllable consisting only of a period.
-- We remove it later but it forces no movement of consonants across
-- the period.
insert(syllables, {"."})
elseif phoneme == "ˈ" then
if #syll > 0 then
insert(syllables,syll)
end
syll = {"ˈ"}
elseif vowels then
insert(syll, phoneme)
insert(syllables, syll)
syll = {}
else
insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant.
-- Add another syllable for them, which will get joined the preceding
-- syllable down below.
if #syll > 0 then
insert(syllables, syll)
end
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if #current == 1 and current == "." then
-- If the current syllable is just a period (explicit syllable
-- break), remove it. The loop will then skip the next syllable,
-- which will prevent movement of consonants across the syllable
-- break (since movement of consonants happens from the current
-- syllable to the previous one).
remove(syllables, i)
elseif i > 1 then
local previous = syllables
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets) do
insert(previous, remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel,
-- and the current one begins with s + another consonant, then shift it over.
if get_coda(previous) == "" and (current == "s" and not vowels]) then
insert(previous, remove(current, 1))
end
-- Check if there is no vowel at all in this syllable. That
-- generally happens either (1) with an explicit syllable division
-- specified, like 'cap.ra', which will get divided into the syllables
-- , , , ; or (2) at the end of a word that ends with
-- one or more consonants. We move the consonants onto the preceding
-- syllable, then remove the resulting empty syllable. If the
-- new current syllable is , remove it, too. The loop will then
-- skip the next syllable, which will prevent movement of consonants
-- across the syllable break (since movement of consonants happens
-- from the current syllable to the previous one).
if not get_vowel(current) then
for _ = 1, #current do
insert(previous, remove(current, 1))
end
remove(syllables, i)
if syllables and #syllables == 1 and syllables == "." then
remove(syllables, i)
end
end
end
end
for _, syll in ipairs(syllables) do
local onset = get_onset(syll)
local coda = get_coda(syll)
if not (onset == "" or onsets) then
track("bad onset")
--error("onset error:")
end
if not (coda == "" or codas) then
track("bad coda")
--error("coda error:")
end
end
return syllables
end
local function phoneme_is_short_vowel(phoneme)
return rfind(phoneme, "^$")
end
local function detect_accent(syllables, is_prefix, is_suffix)
-- Manual override
for i=1,#syllables do
for j=1,#syllables do
if syllables == "ˈ" then
remove(syllables,j)
return i
end
end
end
-- Prefixes have no accent.
if is_prefix then
return -1
end
-- Suffixes have an accent only if the stress would be on the suffix when the
-- suffix is part of a word. Don't get tripped up by the first syllable being
-- nonsyllabic (e.g. in -rnus).
if is_suffix then
local syllables_with_vowel = #syllables - (get_vowel(syllables) and 0 or 1)
if syllables_with_vowel < 2 then
return -1
end
if syllables_with_vowel == 2 then
local penult = syllables
if phoneme_is_short_vowel(penult) then
return -1
end
end
end
-- Detect accent placement
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables
if phoneme_is_short_vowel(penult) then
return #syllables - 2
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
elseif #syllables == 1 then
return #syllables --mark stress on monosyllables so that stress-conditioned sound rules work correctly. Then, delete it prior to display
end
end
local function clean_syllable_breaks(word)
return (ugsub(word, "+", function(m)
return m:find("ˈ") and "ˈ" or "."
end))
end
local function convert_word(word, phonetic, eccl)
-- Normalize i/j/u/v; do this before removing breves, so we keep the
-- ŭ in langŭī (perfect of languēscō) as a vowel.
word = ugsub(word, "(" .. vowels_c .. ")(" .. non_vowels_c .. ")", "%1u%2")
word = ugsub(word, "qu(" .. vowels_c .. ")", "qw%1")
word = word:gsub("%fw", "qw")
word = ugsub(word, "%fgu(" .. vowels_c .. ")", "gw%1") -- nguV or initial guV
word = ugsub(word, "^i(" .. vowels_c .. ")", "j%1")
word = ugsub(word, "^u(" .. vowels_c .. ")", "v%1")
-- Per the August 31 2019 recommendation by ] in
-- ], we convert i/j between vowels to jj if the
-- preceding vowel is short but to single j if the preceding vowel is long.
word = ugsub(word, "(" .. vowels_c .. ")('?)()()", function (vowel, stress, cons, pos)
if vowels_string:find(usub(word, pos, pos)) then
if cons == "u" then
cons = stress .. "v"
-- FIXME: this should also catch diphthongs.
elseif eccl or long_vowels_string:find(vowel) then
cons = stress .. "j"
else
cons = "j" .. stress .. "j"
end
return vowel .. cons
end
end)
--Convert v/w to u syllable-finally
word = word:gsub("%f", "u")
-- Convert i to j before vowel and after any prefix that ends in a consonant,
-- per the August 23 2019 discussion in ].
for _, pref in ipairs(cons_ending_prefixes) do
word = ugsub(word, "^(" .. pref .. ")i(" .. vowels_c .. ")", "%1j%2")
end
-- Convert z to zz between vowels so that the syllable weight and stress assignment will be correct.
word = ugsub(word, "(" .. vowels_c .. ")z(" .. vowels_c .. ")", "%1zz%2")
if eccl then
word = ugsub(word, "(" .. vowels_c .. ")ti(" .. vowels_c .. ")", "%1tt͡si%2")
end
-- Now remove breves.
word = remove_diacritic(word, BREVE)
-- Normalize syllabic vowels like aë, oë; do this after removing breves but
-- before any other normalizations.
word = ugsub(word, "(" .. vowels_c .. ")()", "%1.%2")
word = remove_diacritic(word, TREMA)
-- Assume the u in a final -us or -um is not part of a diphthong
word = word:gsub("()(u)$", "%1.%2")
word = word:gsub("%f", "%0.")
word = word:gsub("_", "")
-- Vowel length before nasal + fricative is allophonic
word = toNFC(toNFD(word):gsub("()" .. MACRON .. "(*)", "%1%2"))
if eccl then
word = toNFC(word:gsub("%f", "%0" .. MACRON))
end
-- Per May 10 2019 discussion in ], we syllabify
-- prefixes ab-, ad-, ob-, sub- separately from following l or r.
word = word:gsub("^a()()", "a%1.%2")
word = word:gsub("^ob()", "ob.%1")
word = word:gsub("^sub()", "sub.%1")
-- Remove hyphens indicating prefixes or suffixes; do this after the above,
-- some of which are sensitive to beginning or end of word and shouldn't
-- apply to end of prefix or beginning of suffix.
local is_prefix, is_suffix
word = word:gsub("^(%-?)(.-)(%-?)$", function(m1, m2, m3)
is_prefix, is_suffix = m1 == "-", m3 == "-"
return m2
end)
-- Convert word to IPA
local phonemes = letters_to_ipa(word, phonetic, eccl)
-- Split into syllables
local syllables = split_syllables(phonemes)
-- Add accent
local accent = detect_accent(syllables, is_prefix, is_suffix)
for _, syll in ipairs(syllables) do
for j in ipairs(syll) do
if eccl then
syll = syll:gsub("ː", "")
elseif phonetic then
syll = lax_vowel] or syll
end
end
end
for i, syll in ipairs(syllables) do
if eccl and i == accent and phonetic and vowels] then
syll = lengthen_vowel] or syll
end
for j = 1, #syll - 1 do
if syll == syll then
syll = ""
end
end
end
for i, syll in ipairs(syllables) do
syll = concat(syll)
-- Atonic /ɔ/ and /ɛ/ merge with /o/ and /e/ respectively
if eccl and phonetic and i == accent then
syll = syll:gsub("o", "ɔ")
syll = syll:gsub("e", "ɛ")
-- Syllable-initial /ɡn/ becomes /n/ (e.g. "gnōscō")
elseif not eccl then
syll = syll:gsub("^ɡn", "n")
end
syllables = (i == accent and "ˈ" or "") .. syll
end
word = clean_syllable_breaks(concat(syllables, "."))
-- poetic meter shows that a consonant before "h" was syllabified as an onset, not as a coda.
-- Based on outcome of talk page discussion, this will be indicated by the omission of /h/ in this context.
word = ugsub(word, "()(*)h", "%2%1")
if eccl then
-- Replace ʃ.ʃ or ʃˈʃ with .ʃ or ˈʃ after any consonant.
word = ugsub(word, "()ʃ(*)ʃ", "%1%2ʃ")
word = word:gsub("ʰ", "")
else
-- ]
-- word = word:gsub("j", "i̯") -- normalize glide spelling
-- word = word:gsub("w", "u̯")
end
if phonetic then
local rules = eccl and phonetic_rules_eccl or phonetic_rules
for _, rule in ipairs(rules) do
word = ugsub(word, rule, rule)
end
-- [2025-05-15: now that we've disabled the phonemic notation, we should put the syllable breaks in the
-- phonetic notation, as otherwise they don't display at all]
-- word = word:gsub("%.+", "") -- remove the dots
end
if phonetic then
word = ugsub(word, "(%a(?))%1", "%1" .. LONG) -- convert double consonants into long ones
word = ugsub(word, "+", "ː") -- maximum of one full length mark
end
return clean_syllable_breaks(word)
end
function export.convert_words(text, phonetic, eccl)
local disallowed = ugsub(text, "", "")
local n = ulen(disallowed)
if n > 0 then
local msg = ("The character%%s %s %%s not allowed."):format(mw.dumpObject(disallowed))
if n == 1 then
error(msg:format("", "is"))
else
error(msg:format("s", "are"))
end
end
text = toNFD(text)
-- Call ulower() even though it's also called in phoneticize,
-- in case convert_words() is called externally.
text = ulower(text)
text = text:gsub("", "")
text = ugsub(text, "()(" .. LENGTH .. ")", normalize_ligatures)
-- Treat a tie as "_".
text = text:gsub(TIE, "_")
local result = {}
-- Split on spaces and hyphens, but hyphens preceded/followed by a space
-- are included in the word (e.g. prefixes and suffixes).
for chunk in gsplit(text, " +") do
for word in gsplit(chunk, "%f%-+%f") do
if word:match("") then
insert(result, convert_word(toNFC(word), phonetic, eccl))
end
end
end
return trim(concat(result, " "))
end
-- Phoneticize Latin TEXT. Return a list of one or more phoneticizations,
-- each of which is a two-element list {PHONEMIC, PHONETIC}. If ECCL, use
-- Ecclesiastical pronunciation. Otherwise, use Classical pronunciation.
function export.phoneticize(text, eccl)
local function do_phoneticize(text, eccl)
return {
export.convert_words(text, false, eccl),
export.convert_words(text, true, eccl),
}
end
text = ulower(text)
-- If we have a macron-breve sequence, generate two pronunciations, one for
-- the long vowel and one for the short.
if rfind(text, "" .. BREVE) then
local longvar = ugsub(text, "()" .. BREVE, "%1")
local shortvar = ugsub(text, "()" .. BREVE, macrons_to_breves)
local longipa = do_phoneticize(longvar, eccl)
local shortipa = do_phoneticize(shortvar, eccl)
-- Make sure long and short variants are actually different (they won't
-- be in Ecclesiastical pronunciation).
if not deep_equals(longipa, shortipa) then
return {longipa, shortipa}
else
return {longipa}
end
elseif rfind(text, ";") then
local tautosyllabicvar = text:gsub(";", "")
local heterosyllabicvar = text:gsub(";", ".")
local tautosyllabicipa = do_phoneticize(tautosyllabicvar, eccl)
local heterosyllabicipa = do_phoneticize(heterosyllabicvar, eccl)
if not deep_equals(tautosyllabicipa, heterosyllabicipa) then
return {tautosyllabicipa, heterosyllabicipa}
else
return {tautosyllabicipa}
end
else
return {do_phoneticize(text, eccl)}
end
end
local function make_row(phoneticizations, dials, include_phonemic)
local IPA_items = {}
for _, phoneticization in ipairs(phoneticizations) do
local phonemic = phoneticization
local phonetic = phoneticization
local IPA_arg
local phonetic_brackets = ""
if include_phonemic then
IPA_arg = {pron = "/" .. phonemic .. "/ " .. phonetic_brackets}
else
IPA_arg = {pron = phonetic_brackets}
end
insert(IPA_items, IPA_arg)
end
return m_a.format_qualifiers(lang, dials) .. " " .. m_IPA.format_IPA_full { lang = lang, items = IPA_items }
end
function export.show_full(frame)
local boolean_default_true = {type = "boolean", default = true}
local args = require("Module:parameters").process(frame:getParent().args, {
= {default = mw.title.getCurrentTitle().nsText == "Template" and "īnspīrāre" or mw.loadData("Module:headword/data").pagename},
classical = boolean_default_true,
cl = {alias_of = "classical"},
ecclesiastical = boolean_default_true,
eccl = {alias_of = "ecclesiastical"},
vul = {type = "boolean"}, -- To be removed.
-- 2025-05-15: Add include_phonemic (not by default) per ]
include_phonemic = {type = "boolean"},
ann = true,
accent = {list = true},
indent = true
})
-- Track down any remaining uses of |vul=
if args.vul ~= nil then
track("vul")
end
local text = args
local categories = {}
local accent = args.accent
local indent = (args.indent or "*") .. " "
local out = ""
if args.indent then
out = indent
end
if args.classical then
out = out .. make_row(export.phoneticize(text, false), #accent > 0 and accent or {"Classical"})
else
insert(categories, lang:getCanonicalName() .. " terms with Ecclesiastical IPA pronunciation only")
end
local anntext = (
args.ann == "1" and "'''" .. text:gsub("", "") .. "''': " or
args.ann and "'''" .. args.ann .. "''': " or
"")
out = anntext .. out
if args.ecclesiastical then
if args.classical then
out = out .. "\n" .. indent .. anntext
end
out = out .. make_row(
export.phoneticize(text, true),
#accent > 0 and accent or {'Ecclesiastical'}
)
end
return out .. require("Module:utilities").format_categories(categories)
end
return export