local export = {}
local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local ut = require("Module:utils")
local lang = require("Module:languages").getByCode("la")
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local usub = mw.ustring.sub
local ulen = mw.ustring.len
local BREVE = u(0x0306) -- breve = ̆
local TILDE = u(0x0303) -- ̃
local HALF_LONG = "ˑ"
local LONG = "ː"
local letters_ipa = {
= "a", = "e", = "i", = "o", = "u", = "y",
= "aː", = "eː", = "iː", = "oː", = "uː", = "yː",
= "ae̯", = "oe̯", = "ei̯", = "au̯", = "eu̯",
= "b", = "d", = "f",
= "k", = "ɡ", = "w", = "ks",
= "pʰ", = "tʰ", = "kʰ", = "r", = "kʷ", = "ɡʷ",
= "ˈ", = "ˈ",
}
local letters_ipa_eccl = {
= "a", = "e", = "i", = "o", = "u", = "i",
= "aː", = "eː", = "iː", = "oː", = "uː", = "iː",
= "eː", = "eː", = "ei̯", = "au̯", = "eu̯",
= "b", = "d", = "f",
= "q", -- dirty hack to make sure k isn't palatalized
= "k", = "ɡ", = "v", = "ks",
= "f", = "tʰ", = "kʰ", = "r", = "kw", = "ɡw", = "sw", --"sw" is needed to avoid in words like suavium
= "",
= "ˈ", = "ˈ",
}
local lax_vowel = {
= "ɛ",
= "ɪ",
= "ɔ",
= "ʊ",
= "ʏ",
}
local tense_vowel = {
= "e",
= "i",
= "o",
= "u",
= "y",
}
local voicing = {
= "b",
= "d",
= "ɡ",
}
local devoicing = {
= "p",
= "t",
= "k",
}
local classical_vowel_letters = "aeɛiɪoɔuʊyʏ"
local classical_vowel = ""
local phonetic_rules = {
-- Bibliography included at the end
-- Assimilation of to before a following /n/.
{"ɡ(?)n", "ŋ%1n"},
-- Per Allen (1978: 23), although note the reservations expressed on the next page.
-- Assimilation of word-internal /n/ and /m/ to following consonants. Exception: /m/ does not assimilate to a following /n/.
{"n(?)()", "m%1%2"},
{"n(?)()", "ŋ%1%2"},
{"m(?)()", "n%1%2"},
{"m(?)()", "ŋ%1%2"},
-- Per George M. Lane: “Nasals changed their place of articulation to that of the following consonant. Thus, dental n before the labials p and b became the labial m... labial m before the gutturals c and g became guttural n...labial m before the dentals t, d, s became dental n…” (§164.3); “One nasal, n, is assimilated to another, m...but an m before n is never assimilated..." (§166.5). -- Per Lloyd (1987: 84): “The opposition between nasals was neutralized in syllable-final position, with the realization of the nasality being assimilated to the point of articulation of the following consonant, e.g., is found only before labials, only before dentals or alveolars, and only before velars and /n/."
-- Potential addition: assimilation of final /m/ and /n/ across word boundaries, per e.g. Allen (1987: 28, 31).
-- No additional labialization before high back vowels
{"ʷ%f", ""},
-- Tensing of short vowels before another vowel
{
"()(?)%f",
function (vowel, following)
return (tense_vowel or vowel) .. following
end,
},
-- But not before consonantal glides
{"ei̯", "ɛi̯"},
{"eu̯", "ɛu̯"},
-- Nasal vowels
{
"(" .. classical_vowel .. ")m$",
function (vowel)
return (lax_vowel or vowel) .. TILDE .. HALF_LONG
end,
},
{
"(" .. classical_vowel .. ")(?)",
function (vowel, following)
return (tense_vowel or vowel) .. TILDE .. LONG .. following
end,
},
-- Dissimilation after homorganic glides (the tuom volgus-type)
--{"()(?)(?)ʊ", "%1%2%3o"},
--{"()(?)(?)ɪ", "%1%2%3e"},
---Disabled per 19 September 2021 discussion at Template_talk:la-IPA#Transcription_of_syllable-initial_semivowels
-- Realization of /r/ as a tap
-- Pultrová (2013) argues for Latin /r/ being an alveolar tap.
-- Lloyd (1987: 81) agrees: “The /r/ was doubtlessly an alveolar flap."
-- Allen (1978: 33) expresses doubt: “By the classical period there is no reason to think that the sound had not strengthened to the trill described by later writers.”
-- Unconditional transcription is preferable to unconditional per 18 September 2021 discussion at Module_talk:la-pronunc#Transcription_of_Latin's_rhotic_consonant
-- No consensus yet on how to implement conditional allophony of vs.
-- Voicing and loss of intervocalic /h/.
{"(.)h", "%1(ɦ)"},
-- Per Allen (1978: 43–45).
-- Phonetic (as opposed to lexical/phonemic) assimilations
-- Place
-- First because this accounts for 'atque' seemingly escaping total assimilation (and 'adque' presumably not)
{"(?)s", "s%1s"}, -- leave out since etsi has , not
{"s(?)s%f", "s(ː)%1"},
{"st()()", "s(t)%1%2"},
{"d()()", "%2%1%2"}, --leave out since dr does not assimilate, even when heterosyllabic (e.g. quadrans), except in prefixed words
{"b()()", "%2%1%2"},
{"s()f", "f%1f"},
-- Regressive voicing assimilation in consonant clusters
{
"()(?)%f",
function (consonant, following)
return (devoicing or consonant) .. following
end,
},
{
"()(?)%f",
function (consonant, following)
return (voicing or consonant) .. following
end,
},
-- Allophones of /l/
{"l", "ɫ̪"},
-- “Pinguis”. Dark/velarized.
-- Per Weiss (2009: 117): “…pinguis (velar). l is exīlis before i and when geminate, otherwise l is pinguis.”
-- Page 82: “…l is pinguis even before e, e.g. Herculēs < Hercolēs … < Hercelēs …”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-; l pinguis occurred before any other vowel; before any consonant except l; and in word-final position l pinguis actually had two degrees of avoirdupois, being fatter before a consonant than before a vowel…”
-- Page 41: “…velarized l (that is, ‘l pinguis’)…”
-- Sen (2015: §2) states that /l/ was velarized in word-final position or before consonants–other than another /l/–and that it had varying degrees of “dark resonance (velarization in articulatory terms)” (p. 23) before e, a, o, and u (p. 33).
-- Both Sen and Sihler indicate different degrees of velarization, depending on the environment. IPA lacks a way to represent these gradations, unfortunately.
{"ɫ̪(?)ɫ̪", "l%1lʲ"},
{"ɫ̪(?)", "lʲ%1"},
-- “Exīlis”. Not dark/velarized. Possibly palatalized.
-- Per Sen (2015: 29): It is plausible that simple onset /l/ was palatalized before /i/, thus it seems likely that geminate /ll/ was also palatalized, given the similar behaviour of the two…”
-- Per Weiss (2009: 82): “In Latin, l developed…a non-velar (possibly palatal) allophone called exīlis before i and when geminate…”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-.”
-- Per Sihler (2000: §133.1): "It is less clear whether the 'thin' lateral was specifically palatal, or palatalized, or only neutral."
-- Giannini and Marotta apparently argue that it was not palatalized (https://i.imgur.com/ytM1QDn.png). I do not have access to the book in question.
-- Retracted /s/
{"s", "s̠"},
-- Lloyd (1987: 80–81) expresses some uncertainty about this, but appears to overall be in favour of it: “…the evidence that the apico-alveolar pronunciation was ancient in Latin and inherited from Indo-European is quite strong.”
-- Per Zampaulo (2019: 93), “…in many instances, Latin s was likely pronounced as an apical segment (rather than laminal )."
-- Per Widdison (1987: 64), "In all, it would be fair to state that the apico-alveolar articulation represented the main allophonic variant of Latin and possibly IE /s/..."
-- dental Z
{"z()", "d͡z%1"}, --See discussion
{"z()z", "z%1(d͡)z"},
{"z", "z̪"},
-- Dental articulations
{"t", "t̪"},
{"d", "d̪"},
{"n(?)()", "n̪%1%2"}, --it's not as clear as for the stops
--Allophones of A
{"a", "ä"},
-- Works cited
-- Allen, William Sidney. 1978. Vox Latina: A Guide to the pronunciation of Classical Latin.
-- Lane, George M. A Latin grammar for schools and colleges.
-- Lloyd, Paul M. 1987. From Latin to Spanish.
-- Pultrová, Lucie. 2013. On the phonetic nature of the Latin R.
-- Sen, Ranjan. 2015. Syllable and segment in Latin.
-- Sihler, Andrew L. 1995. New comparative grammar of Greek and Latin.
-- Sihler, Andrew L. 2000. Language history: An introduction.
-- Weiss, Michael. 2009. Outline of the historical and comparative grammar of Latin.
-- Widdison, Kirk A. 16th century Spanish sibilant reordering: Reasons for divergence.
-- Zampaulo, André. 2019. Palatal Sound Change in the Romance languages: Diachronic and Synchronic Perspectives.
}
local phonetic_rules_eccl = {
-- Specifically the Roman Ecclesiastical for singing from the Liber Usualis
{"(*)s(*)%f", "%1s̬%2"}, --partial voicing of s between vowels
{"s(*)%f", "z%1"}, --full voicing of s before voiced consonants
{"ek(*)s()", "eɡ%1z%2"}, --voicing of the prefix ex-
{"kz", "ɡz"}, --i give up, without this /ksˈl/ gives
-- Tapped R intervocalically and in complex onset
-- ^ Citation needed for this being the case in Ecclesiastical pronunciation
-- {"(ː?)r(?)", "%1ɾ%2"},
-- {"()r", "%1ɾ"},
{"a", "ä"}, --a is open and central per 17 September 2021 discussion at Template_talk:la-IPA#Ecclesiastical_a
-- /e/ and /o/ realization is phonetic but handled in convert_word below as it is sensitive to stress
-- Dental articulations
{"n(?)()()", "n̪%1%2%3"}, --assimilation of n to dentality.
{"l(?)()()", "l̪%1%2%3"},
--Note that the quality of n might not be dental otherwise--it may be alveolar in most contexts in Italian, according to Wikipedia.
{"t()", "t̪%1"}, --t is dental, except as the first element of a palatal affricate
{"d()", "d̪%1"}, --d is dental, except as the first element of a palatal affricate
{"t͡s", "t̪͡s̪"}, -- dental affricates
{"d͡z", "d̪͡z̪"}, --dental affricates
{"t̪(?)t͡ʃ", "t%1t͡ʃ"},
{"d̪(?)d͡ʒ", "d%1d͡ʒ"},
--end of words
{"lt$", "l̪t̪"},
{"nt$", "n̪t̪"},
{"t$", "t̪"},
{"d$", "d̪"},
--Partial assimilation of l and n before palatal affricates, as in Italian
{"l(?)t͡ʃ", "l̠ʲ%1t͡ʃ"},
{"l(?)d͡ʒ", "l̠ʲ%1d͡ʒ"},
{"l(?)ʃ", "l̠ʲ%1ʃ"},
{"n(?)t͡ʃ", "n̠ʲ%1t͡ʃ"},
{"n(?)d͡ʒ", "n̠ʲ%1d͡ʒ"},
{"n(?)ʃ", "n̠ʲ%1ʃ"},
-- other coda nasal assimilation, full and partial. Per Canepari, only applies to /n/ and not to /m/
{"n(?)()", "ŋ%1%2"},
{"n(?)()", "ɱ%1%2"},
}
local lenition = {
= "ɣ", = "ð",
}
local lengthen_vowel = {
= "aː", = "aː",
= "ɛː", = "ɛː",
= "eː", = "eː",
= "iː", = "iː",
= "ɔː", = "ɔː",
= "oː", = "oː",
= "uː", = "uː",
= "aːu̯",
= "ɛːu̯",
= "eːu̯",
}
local vowels = {
"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
"ae̯", "oe̯", "ei̯", "au̯", "eu̯",
}
local onsets = {
"b", "p", "pʰ", "d", "t", "tʰ", "β",
"ɡ", "k", "kʰ", "kʷ", "ɡʷ", "kw", "ɡw", "t͡s", "t͡ʃ", "d͡ʒ", "ʃ",
"f", "s", "z", "d͡z", "h",
"l", "m", "n", "ɲ", "r", "j", "v", "w",
"bl", "pl", "pʰl", "br", "pr", "pʰr",
"dr", "tr", "tʰr",
"ɡl", "kl", "kʰl", "ɡr", "kr", "kʰr",
"fl", "fr",
"sp", "st", "sk", "skʷ", "sw",
"spr", "str", "skr",
"spl", "skl",
}
local codas = {
"b", "p", "pʰ", "d", "t", "tʰ", "ɡ", "k", "kʰ", "β",
"f", "s", "z",
"l", "m", "n", "ɲ", "r", "j", "ʃ",
"sp", "st", "sk",
"spʰ", "stʰ", "skʰ",
"lp", "lt", "lk",
"lb", "ld", "lɡ",
"lpʰ", "ltʰ", "lkʰ",
"lf",
"rp", "rt", "rk",
"rb", "rd", "rɡ",
"rpʰ", "rtʰ", "rkʰ",
"rf",
"mp", "nt", "nk",
"mb", "nd", "nɡ",
"mpʰ", "ntʰ", "nkʰ",
"lm", "rl", "rm", "rn",
"ps", "ts", "ks", "ls", "ns", "rs",
"lks", "nks", "rks",
"rps", "mps",
"lms", "rls", "rms", "rns",
}
-- Prefixes that end in a consonant; can be patterns. Occurrences of such
-- prefixes + i + vowel cause the i to convert to j (to suppress this, add a
-- dot, i.e. syllable boundary, after the i).
local cons_ending_prefixes = {
"a", "circum", "con", "dis", "ex", "in", "inter", "ob", "per",
"sub", "subter", "super", "trns"
}
local remove_macrons = {
= "a",
= "e",
= "i",
= "o",
= "u",
= "y",
}
local macrons_to_breves = {
= "ă",
= "ĕ",
= "ĭ",
= "ŏ",
= "ŭ",
-- Unicode doesn't have breve-y
= "y" .. BREVE,
}
local remove_breves = {
= "a",
= "e",
= "i",
= "o",
= "u",
-- Unicode doesn't have breve-y
}
local remove_ligatures = {
= "ae",
= "oe",
}
for i, val in ipairs(vowels) do
vowels = true
end
for i, val in ipairs(onsets) do
onsets = true
end
for i, val in ipairs(codas) do
codas = true
end
-- NOTE: Everything is lowercased very early on, so we don't have to worry
-- about capitalized letters.
local short_vowels_string = "aeiouyăĕĭŏŭäëïöüÿ" -- no breve-y in Unicode
local long_vowels_string = "āēīōūȳ"
local vowels_string = short_vowels_string .. long_vowels_string
local vowels_c = ""
local non_vowels_c = ""
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function letters_to_ipa(word,phonetic,eccl,vul)
local phonemes = {}
local dictionary = eccl and letters_ipa_eccl or (vul and letters_ipa_vul or letters_ipa)
while ulen(word) > 0 do
local longestmatch = ""
for letter, ipa in pairs(dictionary) do
if ulen(letter) > ulen(longestmatch) and usub(word, 1, ulen(letter)) == letter then
longestmatch = letter
end
end
if ulen(longestmatch) > 0 then
if dictionary == "ks" then
table.insert(phonemes, "k")
table.insert(phonemes, "s")
else
table.insert(phonemes, dictionary)
end
word = usub(word, ulen(longestmatch) + 1)
else
table.insert(phonemes, usub(word, 1, 1))
word = usub(word, 2)
end
end
if eccl then for i=1,#phonemes do
local prev, cur, next = phonemes, phonemes, phonemes
if next and (cur == "k" or cur == "ɡ") and rfind(next, "^ː?$") then
if cur == "k" then
if prev == "s" then --and ((not phonemes) or phonemes ~= "k")
prev = "ʃ"
cur = "ʃ"
else
cur = "t͡ʃ"
if prev == "k" then prev = "t" end
end
else
cur = "d͡ʒ"
if prev == "ɡ" then prev = "d" end
end
end
-- dirty hack to make sure k isn't palatalized
if cur == "q" then
cur = "k"
end
if cur == "t" and next == "i" and not (prev == "s" or prev == "t")
and vowels] then
cur = "t͡s"
end
if cur == "z" then
if next == "z" then
cur = "d"
next = "d͡z"
else
cur = "d͡z"
end
end
if cur == "kʰ" then cur = "k" end
if cur == "tʰ" then cur = "t" end
if cur == "ɡ" and next == "n" then
cur = "ɲ"
next = "ɲ"
end
phonemes, phonemes, phonemes = prev, cur, next
end end
return phonemes
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels] then
break
end
if syll ~= "ˈ" then
table.insert(consonants, syll)
end
end
return table.concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels] then
break
end
table.insert(consonants, 1, syll)
end
return table.concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels] then return syll end
end
end
-- Split the word into syllables of CV shape
local function split_syllables(remainder)
local syllables = {}
local syll = {}
for _, phoneme in ipairs(remainder) do
if phoneme == "." then
if #syll > 0 then
table.insert(syllables, syll)
syll = {}
end
-- Insert a special syllable consisting only of a period.
-- We remove it later but it forces no movement of consonants across
-- the period.
table.insert(syllables, {"."})
elseif phoneme == "ˈ" then
if #syll > 0 then
table.insert(syllables,syll)
end
syll = {"ˈ"}
elseif vowels then
table.insert(syll, phoneme)
table.insert(syllables, syll)
syll = {}
else
table.insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant.
-- Add another syllable for them, which will get joined the preceding
-- syllable down below.
if #syll > 0 then
table.insert(syllables, syll)
end
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if #current == 1 and current == "." then
-- If the current syllable is just a period (explicit syllable
-- break), remove it. The loop will then skip the next syllable,
-- which will prevent movement of consonants across the syllable
-- break (since movement of consonants happens from the current
-- syllable to the previous one).
table.remove(syllables, i)
elseif i > 1 then
local previous = syllables
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets) do
table.insert(previous, table.remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel,
-- and the current one begins with s + another consonant, then shift it over.
if get_coda(previous) == "" and (current == "s" and not vowels]) then
table.insert(previous, table.remove(current, 1))
end
-- Check if there is no vowel at all in this syllable. That
-- generally happens either (1) with an explicit syllable division
-- specified, like 'cap.ra', which will get divided into the syllables
-- , , , ; or (2) at the end of a word that ends with
-- one or more consonants. We move the consonants onto the preceding
-- syllable, then remove the resulting empty syllable. If the
-- new current syllable is , remove it, too. The loop will then
-- skip the next syllable, which will prevent movement of consonants
-- across the syllable break (since movement of consonants happens
-- from the current syllable to the previous one).
if not get_vowel(current) then
for j=1,#current do
table.insert(previous, table.remove(current, 1))
end
table.remove(syllables, i)
if syllables and #syllables == 1 and syllables == "." then
table.remove(syllables, i)
end
end
end
end
for i, syll in ipairs(syllables) do
local onset = get_onset(syll)
local coda = get_coda(syll)
if not (onset == "" or onsets) then
require("Module:debug").track("la-pronunc/bad onset")
--error("onset error:")
end
if not (coda == "" or codas) then
require("Module:debug").track("la-pronunc/bad coda")
--error("coda error:")
end
end
return syllables
end
local function phoneme_is_short_vowel(phoneme)
return rfind(phoneme, "^$")
end
local function detect_accent(syllables, is_prefix, is_suffix)
-- Manual override
for i=1,#syllables do
for j=1,#syllables do
if syllables == "ˈ" then
table.remove(syllables,j)
return i
end
end
end
-- Prefixes have no accent.
if is_prefix then
return -1
end
-- Suffixes have an accent only if the stress would be on the suffix when the
-- suffix is part of a word. Don't get tripped up by the first syllable being
-- nonsyllabic (e.g. in -rnus).
if is_suffix then
local syllables_with_vowel = #syllables - (get_vowel(syllables) and 0 or 1)
if syllables_with_vowel < 2 then
return -1
end
if syllables_with_vowel == 2 then
local penult = syllables
if phoneme_is_short_vowel(penult) then
return -1
end
end
end
-- Detect accent placement
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables
if phoneme_is_short_vowel(penult) then
return #syllables - 2
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
elseif #syllables == 1 then
return #syllables --mark stress on monosyllables so that stress-conditioned sound rules work correctly. Then, delete it prior to display
end
end
local function convert_word(word, phonetic, eccl, vul)
-- Normalize i/j/u/v; do this before removing breves, so we keep the
-- ŭ in langŭī (perfect of languēscō) as a vowel.
word = rsub(word, "w", "v")
word = rsub(word, "(" .. vowels_c .. ")v(" .. non_vowels_c .. ")", "%1u%2")
word = rsub(word, "qu", "qv")
word = rsub(word, "ngu(" .. vowels_c .. ")", "ngv%1")
word = rsub(word, "^i(" .. vowels_c .. ")", "j%1")
word = rsub(word, "^u(" .. vowels_c .. ")", "v%1")
-- Per the August 31 2019 recommendation by ] in
-- ], we convert i/j between vowels to jj if the
-- preceding vowel is short but to single j if the preceding vowel is long.
word = rsub(
word,
"(" .. vowels_c .. ")()()",
function (vowel, potential_consonant, pos)
if vowels_string:find(usub(word, pos, pos)) then
if potential_consonant == "u" then
return vowel .. "v"
else
if long_vowels_string:find(vowel) then
return vowel .. "j"
else
return vowel .. "jj"
end
end
end
end)
--Convert v to u syllable-finally
word = rsub(word, "v%.", "u.")
word = rsub(word, "v$", "u")
-- Convert i to j before vowel and after any prefix that ends in a consonant,
-- per the August 23 2019 discussion in ].
for _, pref in ipairs(cons_ending_prefixes) do
word = rsub(word, "^(" .. pref .. ")i(" .. vowels_c .. ")", "%1j%2")
end
-- Ecclesiastical has neither geminate j.j, nor geminate w.w in Greek words
if eccl then
word = rsub(word, "(" .. vowels_c .. ")u(?)v(" .. vowels_c .. ")", "%1%2v%3")
word = rsub(word, "(" .. vowels_c .. ")j(?)j(" .. vowels_c .. ")", "%1%2j%3")
end
-- Convert z to zz between vowels so that the syllable weight and stress assignment will be correct.
word = rsub(word, "(" .. vowels_c .. ")z(" .. vowels_c .. ")", "%1zz%2")
if eccl then
word = rsub(word, "(" .. vowels_c .. ")ti(" .. vowels_c .. ")", "%1tt͡si%2")
end
-- Now remove breves.
word = rsub(word, "()", remove_breves)
-- BREVE sits uncombined in y+breve and vowel-macron + breve
word = rsub(word, BREVE, "")
-- Normalize aë, oë; do this after removing breves but before any
-- other normalizations involving e.
word = rsub(word, "()ë", "%1.e")
-- Eu and ei diphthongs
word = rsub(word, "e(u)$", "e.%1")
word = rsub(word, "ei", "e.i")
word = rsub(word, "_", "")
-- Vowel length before nasal + fricative is allophonic
word = rsub(word, "()()",
function(vowel, nasalfric)
return remove_macrons .. nasalfric
end
)
local vowel_before_yod = {
= "āj",
= "ēj",
= "ōj",
= "ūj",
= "ȳ",
}
if eccl then
word = rsub(word, "()()", vowel_before_yod)
end
-- Apply some basic phoneme-level assimilations for Ecclesiastical, which reads as written; in living varieties the assimilations were phonetic
-- Italian (and therefore, by implication, Ecclesiastical Latin) does not show assimilation in clusters like /bk/
-- Source: "How can Italian phonology lack voice assimilation?", by Bálint Huszthy (2019): https://www.academia.edu/39347303/How_can_Italian_phonology_lack_voice_assimilation
word = rsub(word, "xs", "x")
-- Per May 10 2019 discussion in ], we syllabify
-- prefixes ab-, ad-, ob-, sub- separately from following l or r.
word = rsub(word, "^a()()", "a%1.%2")
word = rsub(word, "^ob()", "ob.%1")
word = rsub(word, "^sub()", "sub.%1")
-- Remove hyphens indicating prefixes or suffixes; do this after the above,
-- some of which are sensitive to beginning or end of word and shouldn't
-- apply to end of prefix or beginning of suffix.
local is_prefix, is_suffix
word, is_prefix = rsubb(word, "%-$", "")
word, is_suffix = rsubb(word, "^%-", "")
-- Convert word to IPA
local phonemes = letters_to_ipa(word,phonetic,eccl,vul)
-- Split into syllables
local syllables = split_syllables(phonemes)
-- Add accent
local accent = detect_accent(syllables, is_prefix, is_suffix)
-- poetic meter shows that a consonant before "h" was syllabified as an onset, not as a coda.
-- Based on outcome of talk page discussion, this will be indicated by the omission of /h/ in this context.
word = rsub(word, "()(?)h", "%1")
for i, syll in ipairs(syllables) do
for j, phoneme in ipairs(syll) do
if eccl or vul then
syll = rsub(syll, "ː", "")
elseif phonetic then
syll = lax_vowel] or syll
end
end
end
for i, syll in ipairs(syllables) do
if (eccl or vul) and i == accent and phonetic and vowels] then
syll = lengthen_vowel] or syll
end
for j=1, #syll-1 do
if syll==syll then
syll = ""
end
end
end
-- Atonic /ɔ/ and /ɛ/ merge with /o/ and /e/ respectively
for i, syll in ipairs(syllables) do
syll = table.concat(syll)
if vul and i ~= accent then
syll = rsub(syll, "ɔ", "o")
syll = rsub(syll, "ɛ", "e")
end
if eccl and phonetic and i == accent then
syll = rsub(syll, "o", "ɔ")
syll = rsub(syll, "e", "ɛ")
end
syllables = (i == accent and "ˈ" or "") .. syll
end
word = (rsub(table.concat(syllables, "."), "%.ˈ", "ˈ"))
if #syllables == 1 then
word = rsub(word, "^ˈ", "") --remove word-initial accent marks in monosyllables
end
if eccl then
word = rsub(word, "()ʃ(?)ʃ", "%1%2ʃ") -- replace ʃ.ʃ or ʃˈʃ with .ʃ or ˈʃ after any consonant
end
if phonetic then
local rules = eccl and phonetic_rules_eccl or (vul and phonetic_rules_vul or phonetic_rules)
for i, rule in ipairs(rules) do
word = rsub(word, rule, rule)
end
word = rsub(word, "", "") --remove the dots! >_<
end
if not eccl then
word = rsub(word, "j", "i̯") -- normalize glide spelling
word = rsub(word, "w", "u̯")
end
if phonetic then
word = rsub(word, "(%a(?))%1", "%1" .. LONG) --convert double consonants into long ones
word = rsub(word, "ːː", "ː")
end
return word
end
function initial_canonicalize_text(text)
-- Call ulower() even though it's also called in phoneticize,
-- in case convert_words() is called externally.
text = ulower(text)
text = rsub(text, '', '')
text = rsub(text, '', remove_ligatures)
return text
end
function export.convert_words(text, phonetic, eccl, vul)
text = initial_canonicalize_text(text)
local disallowed = rsub(text, '', '')
if ulen(disallowed) > 0 then
if ulen(disallowed) == 1 then
error('The character "' .. disallowed .. '" is not allowed.')
else
error('The characters "' .. disallowed .. '" are not allowed.')
end
end
local result = {}
for word in mw.text.gsplit(text, " ") do
table.insert(result, convert_word(word, phonetic, eccl, vul))
end
return table.concat(result, " ")
end
-- Phoneticize Latin TEXT. Return a list of one or more phoneticizations,
-- each of which is a two-element list {PHONEMIC, PHONETIC}. If ECCL, use
-- Ecclesiastical pronunciation. If VUL, use Vulgar Latin pronunciation.
-- Otherwise, use Classical pronunciation.
function export.phoneticize(text, eccl, vul)
local function do_phoneticize(text, eccl, vul)
return {
export.convert_words(text, false, eccl, vul),
export.convert_words(text, true, eccl, vul),
}
end
text = ulower(text)
-- If we have a macron-breve sequence, generate two pronunciations, one for
-- the long vowel and one for the short.
if rfind(text, "" .. BREVE) then
local longvar = rsub(text, "()" .. BREVE, "%1")
local shortvar = rsub(text, "()" .. BREVE, macrons_to_breves)
local longipa = do_phoneticize(longvar, eccl, vul)
local shortipa = do_phoneticize(shortvar, eccl, vul)
-- Make sure long and short variants are actually different (they won't
-- be in Ecclesiastical pronunciation).
if not ut.equals(longipa, shortipa) then
return {longipa, shortipa}
else
return {longipa}
end
elseif rfind(text, ";") then
local tautosyllabicvar = rsub(text, ";", "")
local heterosyllabicvar = rsub(text, ";", ".")
local tautosyllabicipa = do_phoneticize(tautosyllabicvar, eccl, vul)
local heterosyllabicipa = do_phoneticize(heterosyllabicvar, eccl, vul)
if not ut.equals(tautosyllabicipa, heterosyllabicipa) then
return {tautosyllabicipa, heterosyllabicipa}
else
return {tautosyllabicipa}
end
else
return {do_phoneticize(text, eccl, vul)}
end
end
local function make_row(phoneticizations, dials)
local full_pronuns = {}
for _, phoneticization in ipairs(phoneticizations) do
local phonemic = phoneticization
local phonetic = phoneticization
local IPA_args = {{pron = '/' .. phonemic .. '/'}}
table.insert(IPA_args, {pron = ''})
table.insert(full_pronuns, m_IPA.format_IPA_full { lang = lang, items = IPA_args })
end
return m_a.format_qualifiers(lang, dials) .. ' ' .. table.concat(full_pronuns, ' or ')
end
function export.show_full(frame)
local params = {
= {default = mw.title.getCurrentTitle().nsText == 'Template' and 'īnspīrāre' or mw.title.getCurrentTitle().text},
classical = {type = 'boolean', default = true},
cl = {type = 'boolean', alias_of = 'classical', default = true},
ecclesiastical = {type = 'boolean', default = true},
eccl = {type = 'boolean', alias_of = 'ecclesiastical', default = true},
vul = {type = 'boolean', default = false},
ann = {},
accent = {list = true},
indent = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
text = args
local categories = {}
local accent = args.accent
local indent = (args.indent or "*") .. " "
local out = ''
if args.indent then
out = indent
end
if args.classical then
out = out .. make_row(export.phoneticize(text, false, false), #accent > 0 and accent or {'Classical'})
end
local anntext = (
args.ann == "1" and "'''" .. rsub(text, "", "") .. "''': " or
args.ann and "'''" .. args.ann .. "''': " or
"")
out = anntext .. out
if args.ecclesiastical then
if args.classical or args.vul then
out = out .. '\n' .. indent .. anntext
end
out = out .. make_row(
export.phoneticize(text, true, false),
#accent > 0 and accent or {'Ecclesiastical'}
)
table.insert(categories, lang:getCanonicalName() .. ' terms with Ecclesiastical IPA pronunciation')
end
return out .. require("Module:utilities").format_categories(categories)
end
function export.show(text, phonetic, eccl, vul)
if type(text) == "table" then -- assume a frame
eccl = text.args
vul = text.args
text = text.args or mw.title.getCurrentTitle().text
end
if vul then
phonetic = true
end
return export.convert_words(text, phonetic, eccl, vul)
end
function export.allophone(word, eccl, vul)
return export.show(word, true, eccl, vul)
end
return export
-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: