Module:la-IPA

The following documentation is located at Module:la-IPA/documentation. Categories were auto-generated by Module:module categorization.
Useful links: subpage list • links • transclusions • testcases • sandbox
This module is not to be directly used. It is used by {{la-IPA}}, see there for usage.
local export = {}

local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local lang = require("Module:languages").getByCode("la")

local concat = table.concat
local deep_equals = m_table.deepEquals
local gsplit = m_str_utils.gsplit
local insert = table.insert
local invert = m_table.invert
local list_to_set = m_table.listToSet
local remove = table.remove
local rfind = m_str_utils.find
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local umatch = m_str_utils.match

local MACRON = u(0x304) -- ̄
local BREVE = u(0x306) -- ̆
local TREMA = u(0x308) -- ̈
local LENGTH = MACRON .. "?" .. BREVE .. "?" .. TREMA .. "?"
local TIE = u(0x361) -- ͡
local VOWELS = "aeɛiɪoɔuʊyʏ"
local VOWEL = ""

local TILDE = u(0x303) -- ̃
local HALF_LONG = "ˑ"
local LONG = "ː"

local letters_ipa = {
	 = "aː",  = "eː",  = "iː",  = "oː",  = "uː",  = "yː",
	 = "ae̯",  = "au̯",  = "ei̯",  = "eu̯",  = "oe̯",  = "uː",
	 = "k",  = "ɡ",  = "k",  = "w",  = {"k", "s"},
	 = "pʰ",  = "tʰ",  = "kʰ",  = "rʰ",
	 = "kʷ",  = "ɡʷ",  = "sʷ",
	 = "ˈ",
}

-- Only includes changes from letters_ipa above.
local letters_ipa_eccl = require("Module:table/setParent")({
	 = "i",  = "iː",
	 = "eː",  = "eː",
	 = "c", -- becomes /k/, /t͡ʃ/ or /ʃ/
	 = "v",
	 = "f",
}, letters_ipa)

local lax_vowel = {
	 = "ɛ",
	 = "ɪ",
	 = "ɔ",
	 = "ʊ",
	-- No evidence for this, and Greek did not have a near-close lax front-rounded vowel as far as we can tell
	--  = "ʏ",
}

local tense_vowel = invert(lax_vowel)

local voicing = {
	 = "b",
	 = "d",
	 = "ɡ",
}

local devoicing = invert(voicing)

local phonetic_rules = {

	-- Bibliography included at the end

	-- Assimilation of  to  before a following /n/
	{"ɡ(*)n", "ŋ%1n"},
	-- Per Allen (1978: 23), although note the reservations expressed on the next page.

	-- Assimilation of word-internal /n/ and /m/ to following consonants. Exception: /m/ does not assimilate to a following /n/.
	{"(*)()", "ŋ%1%2"},
	{"m(*)()", "n%1%2"},
	{"n(*)()", "m%1%2"},
		-- Per George M. Lane: “Nasals changed their place of articulation to that of the following consonant. Thus, dental n before the labials p and b became the labial m... labial m before the gutturals c and g became guttural n...labial m before the dentals t, d, s became dental n...” (§164.3); “One nasal, n, is assimilated to another, m...but an m before n is never assimilated..." (§166.5).		-- Per Lloyd (1987: 84): “The opposition between nasals was neutralized in syllable-final position, with the realization of the nasality being assimilated to the point of articulation of the following consonant, e.g.,  is found only before labials,  only before dentals or alveolars, and  only before velars and /n/."
		-- Potential addition: assimilation of final /m/ and /n/ across word boundaries, per e.g. Allen (1987: 28, 31).
	
	-- No additional labialization before high back vowels
	{"ʷ%f", ""},
	
	-- Tensing of short vowels before another vowel
	{
		"(" .. VOWEL .. ")(+?)%f" .. VOWEL,
		function (v, following)
			return (tense_vowel or v) .. following
		end,
	},

	-- But not before consonantal glides
	{"e(̯)", "ɛ%1"},

	-- Nasal vowels
	{
		"(" .. VOWEL .. ")(" .. LONG .. "?)m$",
		function (v, long)
			-- 2025-05-15: Change per ]
			if true then -- long == LONG then
				return (tense_vowel or v) .. TILDE .. LONG
			end
			return (lax_vowel or v) .. TILDE .. HALF_LONG
		end,
	},
	{
		"(" .. VOWEL .. ")(*)",
		function (v, following)
			return (tense_vowel or v) .. TILDE .. LONG .. following
		end,
	},
	
	-- Realization of /r/ as a tap
		-- Pultrová (2013) argues for Latin /r/ being an alveolar tap.
		-- Lloyd (1987: 81) agrees: “The /r/ was doubtlessly an alveolar flap."
		-- Allen (1978: 33) expresses doubt: “By the classical period there is no reason to think that the sound had not strengthened to the trill described by later writers.”
		-- Unconditional  transcription is preferable to unconditional  per 18 September 2021 discussion at ]
		-- No consensus yet on how to implement conditional allophony of  vs. 
		
	-- Voicing and loss of intervocalic /h/.
	{"(.)h", "%1(ɦ)"},
	-- Per Allen (1978: 43–45).

	-- Phonetic (as opposed to lexical/phonemic) assimilations
		-- Place
			-- First because this accounts for 'atque' seemingly escaping total assimilation (and 'adque' presumably not)
	{"d(*s%f)", "s%1"}, -- leave  out since etsi has , not 
	{"s(*)s%f", "s(ː)%1"},
	{"st(+)()", "s(t)%1%2"},

	{"d(+)()", "%2%1%2"}, --leave  out since dr does not assimilate, even when heterosyllabic (e.g. quadrans), except in prefixed words
	{"b(+)()", "%2%1%2"},
	{"s(+)(f)", "%2%1%2"},

	-- Regressive voicing assimilation in consonant clusters
	{
		"()(*)%f",
		function (consonant, following)
			return (devoicing or consonant) .. following
		end,
	},
	{
		"()(*)%f",
		function (consonant, following)
			return (voicing or consonant) .. following
		end,
	},

	-- 2025-05-15: Numerous changes per ]:
	-- (1) simplify l-pinguis vs. l-exilis to just  (formerly ) vs.  (formerly  in some circumstances);
	--     consider further simplifying further to use  before non-high-front vowels
	-- (2) don't mark dental or alveolar notations on coronals
	-- (3) don't mark centralized ä on 

	-- Allophones of /l/
	{"l", "ɫ"},
		-- “Pinguis”. Dark/velarized.
		-- Per Weiss (2009: 117): “... pinguis (velar). l is exīlis before i and when geminate, otherwise l is pinguis.”
		-- Page 82: “... l is pinguis even before e, e.g. Herculēs < Hercolēs ... < Hercelēs ...”
		-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-; l pinguis occurred before any other vowel; before any consonant except l; and in word-final position  l pinguis actually had two degrees of avoirdupois, being fatter before a consonant than before a vowel...” 
		-- Page 41: “... velarized l (that is, ‘l pinguis’)...”
		-- Sen (2015: §2) states that /l/ was velarized in word-final position or before consonants–other than another /l/–and that it had varying degrees of “dark resonance (velarization in articulatory terms)” (p. 23) before e, a, o, and u (p. 33).
		-- Both Sen and Sihler indicate different degrees of velarization, depending on the environment. IPA lacks a way to represent these gradations, unfortunately.
	{"ɫ(*)ɫ", "l%1l"},
	{"ɫ(*)", "l%1"},
		-- “Exīlis”. Not dark/velarized. Possibly palatalized.
		-- Per Sen (2015: 29): It is plausible  that simple onset /l/ was palatalized before /i/, thus   it seems likely that geminate /ll/ was also palatalized, given the similar behaviour of the two...”
		-- Per Weiss (2009: 82): “In Latin, l developed... a non-velar (possibly palatal) allophone called exīlis before i and when geminate...”
		-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-.”
		-- Per Sihler (2000: §133.1): "It is less clear whether the 'thin' lateral  was specifically palatal, or palatalized, or only neutral."
		-- Giannini and Marotta apparently argue that it was not palatalized (https://i.imgur.com/ytM1QDn.png). I do not have access to the book in question.

	-- Retracted /s/
	-- {"s", "s̠"}, 
		-- Lloyd (1987: 80–81) expresses some uncertainty about this, but appears to overall be in favour of it: “... the evidence that the apico-alveolar pronunciation was ancient in Latin and inherited from Indo-European is quite strong.”
		-- Per Zampaulo (2019: 93), “... in many instances, Latin s was likely pronounced as an apical segment  (rather than laminal )."
		-- Per Widdison (1987: 64), "In all, it would be fair to state that the apico-alveolar  articulation represented the main allophonic variant of Latin and possibly IE /s/..."

	-- dental Z
	-- {"z", "z̪"}, 

	-- Dental articulations
	-- {"", "%0̪"} ,
	-- {"n(*)", "n̪%1"}, --it's not as clear as for the stops 

	--Allophones of A
	-- {"a", "ä"}, 

	-- Works cited
		-- Allen, William Sidney. 1978. Vox Latina: A Guide to the pronunciation of Classical Latin.
		-- Lane, George M. A Latin grammar for schools and colleges.
		-- Lloyd, Paul M. 1987. From Latin to Spanish.
		-- Pultrová, Lucie. 2013. On the phonetic nature of the Latin R.
		-- Sen, Ranjan. 2015. Syllable and segment in Latin.
		-- Sihler, Andrew L. 1995. New comparative grammar of Greek and Latin.
		-- Sihler, Andrew L. 2000. Language history: An introduction.
		-- Weiss, Michael. 2009. Outline of the historical and comparative grammar of Latin.
		-- Widdison, Kirk A. 16th century Spanish sibilant reordering: Reasons for divergence.
		-- Zampaulo, André. 2019. Palatal Sound Change in the Romance languages: Diachronic and Synchronic Perspectives.
}

local phonetic_rules_eccl = {
	-- Specifically the Roman Ecclesiastical for singing from the Liber Usualis

	{"(*)s(*)%f", "%1s̬%2"}, --partial voicing of s between vowels
	{"s(*)%f", "z%1"}, --full voicing of s before voiced consonants
	{"ek(*)s()", "eɡ%1z%2"}, --voicing of the prefix ex-
	{"kz", "ɡz"},

	-- Tapped R intervocalically and in complex onset
	-- ^ Citation needed for this being the case in Ecclesiastical pronunciation
	-- {"(+)r(?)", "%1ɾ%2"},
	-- {"()r", "%1ɾ"},

	-- Dental articulations
	{"()(*)", "%1̪%2"}, --assimilation of n to dentality. 
	--Note that the quality of n might not be dental otherwise--it may be alveolar in most contexts in Italian, according to Wikipedia.
	{"()()", "%1̪%2"}, --t and d are dental, except as the first element of a palatal affricate
	{"t͡s", "t̪͡s̪"}, -- dental affricates
	{"d͡z", "d̪͡z̪"}, --dental affricates
	{"t̪(*t͡ʃ)", "t%1"},
	{"d̪(*d͡ʒ)", "d%1"},

	--end of words
	{"()t$", "%1̪t̪"},
	{"()$", "%1̪"},

	--Partial assimilation of l and n before palatal affricates, as in Italian
	{"()(*t͡ʃ)", "%1̠ʲ%2"},
	{"()(*d͡ʒ)", "%1̠ʲ%2"},
	{"()(*ʃ)", "%1̠ʲ%2"},

	-- other coda nasal assimilation, full and partial. Per Canepari, only applies to /n/ and not to /m/
	{"n(*)", "ŋ%1"},
	{"n(*)", "ɱ%1"},
}

local lengthen_vowel = {
	 = "aː",  = "aː",
	 = "ɛː",  = "ɛː",
	 = "eː",  = "eː",
	 = "iː",  = "iː",
	 = "ɔː",  = "ɔː",
	 = "oː",  = "oː",
	 = "uː",  = "uː",
	 = "aːu̯",
	 = "ɛːu̯",
	 = "eːu̯",
}

local vowels = list_to_set{
	"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
	"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
	"ae̯", "au̯", "ei̯", "eu̯", "oe̯", "ou̯",
}


local onsets = {
	"p", "pʰ", "b",
	"t", "tʰ", "d",
	"k", "kʰ", "kʷ", "ɡ", "ɡʷ",
	"s", "sʷ", "z", "f", "v", "h",
	"t͡s", "d͡z", "t͡ʃ", "d͡ʒ", "ʃ",
	"l", "r", "rʰ",
	"m", "n", "ɲ",
	"j", "w",
	
	"pl", "pʰl", "bl",
	"kl", "kʰl", "ɡl",
	"fl",
	
	"pr", "pʰr", "br",
	"tr", "tʰr", "dr",
	"kr", "kʰr", "ɡr",
	"fr",
}
for i = 1, #onsets do
	local v = onsets
	if umatch(v, "^*?$") then
		insert(onsets, "s" .. v)
	end
end
onsets = list_to_set(onsets)

local codas = list_to_set{
	"p", "pʰ", "b",
	"t", "tʰ", "d",
	"k", "kʰ", "ɡ",
	"s", "z", "f",
	"ʃ",
	"l", "r",
	"m", "n", "ɲ",
	"j",

	"ps", "ts", "ks",

	"sp", "st", "sk",
	"spʰ", "stʰ", "skʰ",

	"lp", "lpʰ", "lb", "lps",
	"lt", "ltʰ", "ld",
	"lk", "lkʰ", "lɡ", "lks",
	"ls",
	"lm", "ln", "lms", "lns",

	"rp", "rpʰ", "rb", "rps",
	"rt", "rtʰ", "rd",
	"rk", "rkʰ", "rɡ", "rks",
	"rs",
	"rl", "rls",
	"rm", "rn", "rms", "rns",

	"mp", "mpʰ", "mb", "mps",
	"nt", "ntʰ", "nd",
	"nk", "nkʰ", "nɡ", "nks",
	"ns",
}

-- Prefixes that end in a consonant; can be patterns. Occurrences of such
-- prefixes + i + vowel cause the i to convert to j (to suppress this, add a
-- dot, i.e. syllable boundary, after the i).
local cons_ending_prefixes = {
	"a", "circum", "con", "dis", "ex", "inter", "in", "ob", "per",
	"subter", "sub", "super", "trns"
}

local macrons_to_breves = {
	 = "ă",
	 = "ĕ",
	 = "ĭ",
	 = "ŏ",
	 = "ŭ",
	-- Unicode doesn't have breve-y
	 = "y" .. BREVE,
}

local function normalize_ligatures(ligature, diacritic)
	return (ligature == "æ" and "a" or "o") .. diacritic .. "e"
end

-- NOTE: Everything is lowercased very early on, so we don't have to worry
-- about capitalized letters.
-- FIXME: handle ǟë̄ï̄ȫǖÿ̄ etc.
local short_vowels_string = "aeiouyăĕĭŏŭäëïöüÿ" -- no breve-y in Unicode
local long_vowels_string = "āēīōūȳ"
local vowels_string = short_vowels_string .. long_vowels_string
local vowels_c = ""
local non_vowels_c = ""

local function track(page)
	require("Module:debug/track")("la-IPA/" .. page)
	return true
end

local function remove_diacritic(word, ch)
	return toNFC((ugsub(toNFD(word), ch, "")))
end

local function match_phoneme(ch, pattern)
	return ch and umatch(ch, pattern) and true or false
end

local function letters_to_ipa(word, phonetic, eccl)
	local ph = {}
	
	local dictionary = eccl and letters_ipa_eccl or letters_ipa
	
	while ulen(word) > 0 do
		local longestmatch = ""
		
		for letter in pairs(dictionary) do
			local letter_len = ulen(letter)
			if letter_len > ulen(longestmatch) and usub(word, 1, letter_len) == letter then
				longestmatch = letter
			end
		end
		
		if ulen(longestmatch) > 0 then
			local ipa = dictionary
			if type(ipa) == "table" then
				for _, phoneme in ipairs(ipa) do
					insert(ph, phoneme)
				end
			else
				insert(ph, ipa)
			end
			word = usub(word, ulen(longestmatch) + 1)
		else
			insert(ph, usub(word, 1, 1))
			word = usub(word, 2)
		end
	end

	if eccl then
		local front_vowel = "^"
		local i, n = 1, #ph
		while i <= n do
			local cur = ph
			if cur == "c" then -- c, but not k/q/x
				ph = "k" -- default
				local nxt = ph
				if nxt == "c" then -- cc{e|i}: t.t͡ʃ
					if match_phoneme(ph, front_vowel) then
						ph, ph = "t", "t͡ʃ"
						i = i + 2
					end
				elseif match_phoneme(nxt, front_vowel) then -- c{e|i}: t͡ʃ
					ph = "t͡ʃ"
					i = i + 1
				end
			elseif cur == "ɡ" then
				local nxt = ph
				if nxt == "ɡ" then -- gg{e|i}: d.d͡ʒ
					if match_phoneme(ph, front_vowel) then
						ph, ph = "d", "d͡ʒ"
						i = i + 2
					end
				elseif nxt == "n" then -- gn: ɲ.ɲ
					ph, ph = "ɲ", "ɲ"
					i = i + 1
				elseif match_phoneme(nxt, front_vowel) then -- g{e|i}: d͡ʒ
					ph = "d͡ʒ"
				end
			elseif cur == "h" then
				ph = ""
			elseif cur == "m" then -- mihī̆ = michī̆
				if ph == "i" and ph == "h" and match_phoneme(ph, "^ː?$") then
					ph = letters_ipa_eccl
					i = i + 3
				end
			elseif cur == "n" then -- nihil = nichil
				if ph == "i" and ph == "h" and ph == "i" and ph == "l" then
					ph = letters_ipa_eccl
					i = i + 4
				end
			elseif cur == "s" then -- sc{e|i}: ʃ.ʃ
				if ph == "c" and match_phoneme(ph, front_vowel) then
					ph, ph = "ʃ", "ʃ"
					i = i + 2
				end
			elseif cur == "t" then -- ti{V}: t͡si
				if match_phoneme(ph, "^") and not match_phoneme(ph, "ʰ?$") and vowels] then
					ph = "t͡s"
					i = i + 2
				end
			elseif cur == "z" then
				if ph == "z" then -- zz: d.d͡z
					ph, ph = "d", "d͡z"
					i = i + 1
				else -- z: d͡z
					ph = "d͡z"
				end
			end
			i = i + 1
		end
	end
	
	return ph
end


local function get_onset(syll)
	local consonants = {}
	
	for i = 1, #syll do
		if vowels] then
			break
		end
		if syll ~= "ˈ" then
			insert(consonants, syll)
		end
	end
	
	return concat(consonants)
end


local function get_coda(syll)
	local consonants = {}
	
	for i = #syll, 1, -1 do
		if vowels] then
			break
		end
		
		insert(consonants, 1, syll)
	end
	
	return concat(consonants)
end


local function get_vowel(syll)
	for i = 1,#syll do
		if vowels] then return syll end
	end
end


-- Split the word into syllables of CV shape
local function split_syllables(remainder)
	local syllables, syll = {}, {}

	for _, phoneme in ipairs(remainder) do
		if phoneme == "." then
			if #syll > 0 then
				insert(syllables, syll)
				syll = {}
			end
			-- Insert a special syllable consisting only of a period.
			-- We remove it later but it forces no movement of consonants across
			-- the period.
			insert(syllables, {"."})
		elseif phoneme == "ˈ" then
			if #syll > 0 then
				insert(syllables,syll)
			end
			syll = {"ˈ"}
		elseif vowels then
			insert(syll, phoneme)
			insert(syllables, syll)
			syll = {}
		else
			insert(syll, phoneme)
		end
	end
	
	-- If there are phonemes left, then the word ends in a consonant.
	-- Add another syllable for them, which will get joined the preceding
	-- syllable down below.
	if #syll > 0 then
		insert(syllables, syll)
	end
	
	-- Split consonant clusters between syllables
	for i, current in ipairs(syllables) do
		if #current == 1 and current == "." then
			-- If the current syllable is just a period (explicit syllable
			-- break), remove it. The loop will then skip the next syllable,
			-- which will prevent movement of consonants across the syllable
			-- break (since movement of consonants happens from the current
			-- syllable to the previous one).
			remove(syllables, i)
		elseif i > 1 then
			local previous = syllables
			local onset = get_onset(current)
			-- Shift over consonants until the syllable onset is valid
			while not (onset == "" or onsets) do
				insert(previous, remove(current, 1))
				onset = get_onset(current)
			end
			
			-- If the preceding syllable still ends with a vowel,
			-- and the current one begins with s + another consonant, then shift it over.
			if get_coda(previous) == "" and (current == "s" and not vowels]) then
				insert(previous, remove(current, 1))
			end
			
			-- Check if there is no vowel at all in this syllable. That
			-- generally happens either (1) with an explicit syllable division
			-- specified, like 'cap.ra', which will get divided into the syllables
			-- , , , ; or (2) at the end of a word that ends with
			-- one or more consonants. We move the consonants onto the preceding
			-- syllable, then remove the resulting empty syllable. If the
			-- new current syllable is , remove it, too. The loop will then
			-- skip the next syllable, which will prevent movement of consonants
			-- across the syllable break (since movement of consonants happens
			-- from the current syllable to the previous one).
			if not get_vowel(current) then
				for _ = 1, #current do
					insert(previous, remove(current, 1))
				end
				remove(syllables, i)
				if syllables and #syllables == 1 and syllables == "." then
					remove(syllables, i)
				end
			end
		end
	end
	
	for _, syll in ipairs(syllables) do
		local onset = get_onset(syll)
		local coda = get_coda(syll)
		
		if not (onset == "" or onsets) then
			track("bad onset")
			--error("onset error:")
		end
		
		if not (coda == "" or codas) then
			track("bad coda")
			--error("coda error:")
		end
	end
	
	return syllables
end

local function phoneme_is_short_vowel(phoneme)
	return rfind(phoneme, "^$")
end

local function detect_accent(syllables, is_prefix, is_suffix)
	-- Manual override
	for i=1,#syllables do
		for j=1,#syllables do
			if syllables == "ˈ" then
				remove(syllables,j)
				return i
			end
		end
	end
	-- Prefixes have no accent.
	if is_prefix then
		return -1
	end
	-- Suffixes have an accent only if the stress would be on the suffix when the
	-- suffix is part of a word. Don't get tripped up by the first syllable being
	-- nonsyllabic (e.g. in -rnus).
	if is_suffix then
		local syllables_with_vowel = #syllables - (get_vowel(syllables) and 0 or 1)
		if syllables_with_vowel < 2 then
			return -1
		end
		if syllables_with_vowel == 2 then
			local penult = syllables
			if phoneme_is_short_vowel(penult) then
				return -1
			end
		end
	end
	-- Detect accent placement
	if #syllables > 2 then
		-- Does the penultimate syllable end in a single vowel?
		local penult = syllables
		
		if phoneme_is_short_vowel(penult) then
			return #syllables - 2
		else
			return #syllables - 1
		end
	elseif #syllables == 2 then
		return #syllables - 1
	elseif #syllables == 1 then
		return #syllables --mark stress on monosyllables so that stress-conditioned sound rules work correctly. Then, delete it prior to display
	end
end

local function clean_syllable_breaks(word)
	return (ugsub(word, "+", function(m)
		return m:find("ˈ") and "ˈ" or "."
	end))
end

local function convert_word(word, phonetic, eccl)
	-- Normalize i/j/u/v; do this before removing breves, so we keep the
	-- ŭ in langŭī (perfect of languēscō) as a vowel.
	word = ugsub(word, "(" .. vowels_c .. ")(" .. non_vowels_c .. ")", "%1u%2")
	word = ugsub(word, "qu(" .. vowels_c .. ")", "qw%1")
	word = word:gsub("%fw", "qw")
	word = ugsub(word, "%fgu(" .. vowels_c .. ")", "gw%1") -- nguV or initial guV

	word = ugsub(word, "^i(" .. vowels_c .. ")", "j%1")
	word = ugsub(word, "^u(" .. vowels_c .. ")", "v%1")
	-- Per the August 31 2019 recommendation by ] in
	-- ], we convert i/j between vowels to jj if the
	-- preceding vowel is short but to single j if the preceding vowel is long.
	word = ugsub(word, "(" .. vowels_c .. ")('?)()()", function (vowel, stress, cons, pos)
		if vowels_string:find(usub(word, pos, pos)) then
			if cons == "u" then
				cons = stress .. "v"
			-- FIXME: this should also catch diphthongs.
			elseif eccl or long_vowels_string:find(vowel) then
				cons = stress .. "j"
			else
				cons = "j" .. stress .. "j"
			end
			return vowel .. cons
		end
	end)

	--Convert v/w to u syllable-finally
	word = word:gsub("%f", "u")

	-- Convert i to j before vowel and after any prefix that ends in a consonant,
	-- per the August 23 2019 discussion in ].
	for _, pref in ipairs(cons_ending_prefixes) do
		word = ugsub(word, "^(" .. pref .. ")i(" .. vowels_c .. ")", "%1j%2")
	end

	-- Convert z to zz between vowels so that the syllable weight and stress assignment will be correct.
	word = ugsub(word, "(" .. vowels_c .. ")z(" .. vowels_c .. ")", "%1zz%2")

	if eccl then
		word = ugsub(word, "(" .. vowels_c .. ")ti(" .. vowels_c .. ")", "%1tt͡si%2")
	end

	-- Now remove breves.
	word = remove_diacritic(word, BREVE)

	-- Normalize syllabic vowels like aë, oë; do this after removing breves but
	-- before any other normalizations.
	word = ugsub(word, "(" .. vowels_c .. ")()", "%1.%2")
	word = remove_diacritic(word, TREMA)

	-- Assume the u in a final -us or -um is not part of a diphthong
	word = word:gsub("()(u)$", "%1.%2")
	word = word:gsub("%f", "%0.")
	word = word:gsub("_", "")
	
	-- Vowel length before nasal + fricative is allophonic
	word = toNFC(toNFD(word):gsub("()" .. MACRON .. "(*)", "%1%2"))

	if eccl then
		word = toNFC(word:gsub("%f", "%0" .. MACRON))
	end
	
	-- Per May 10 2019 discussion in ], we syllabify
	-- prefixes ab-, ad-, ob-, sub- separately from following l or r.
	word = word:gsub("^a()()", "a%1.%2")	
	word = word:gsub("^ob()", "ob.%1")	
	word = word:gsub("^sub()", "sub.%1")

	-- Remove hyphens indicating prefixes or suffixes; do this after the above,
	-- some of which are sensitive to beginning or end of word and shouldn't
	-- apply to end of prefix or beginning of suffix.
	local is_prefix, is_suffix
	word = word:gsub("^(%-?)(.-)(%-?)$", function(m1, m2, m3)
		is_prefix, is_suffix = m1 == "-", m3 == "-"
		return m2
	end)

	-- Convert word to IPA
	local phonemes = letters_to_ipa(word, phonetic, eccl)
	
	-- Split into syllables
	local syllables = split_syllables(phonemes)
	
	-- Add accent
	local accent = detect_accent(syllables, is_prefix, is_suffix)

	for _, syll in ipairs(syllables) do
		for j in ipairs(syll) do
			if eccl then
				syll = syll:gsub("ː", "")
			elseif phonetic then
				syll = lax_vowel] or syll
			end
		end
	end

	for i, syll in ipairs(syllables) do
		if eccl and i == accent and phonetic and vowels] then
			syll = lengthen_vowel] or syll
		end
	
		for j = 1, #syll - 1 do
			if syll == syll then
				syll = ""
			end
		end
	end

	for i, syll in ipairs(syllables) do
		syll = concat(syll)
		-- Atonic /ɔ/ and /ɛ/ merge with /o/ and /e/ respectively
		if eccl and phonetic and i == accent then
			syll = syll:gsub("o", "ɔ")
			syll = syll:gsub("e", "ɛ")
		-- Syllable-initial /ɡn/ becomes /n/ (e.g. "gnōscō")
		elseif not eccl then
			syll = syll:gsub("^ɡn", "n")
		end
		syllables = (i == accent and "ˈ" or "") .. syll
	end

	word = clean_syllable_breaks(concat(syllables, "."))

	-- poetic meter shows that a consonant before "h" was syllabified as an onset, not as a coda. 
	-- Based on outcome of talk page discussion, this will be indicated by the omission of /h/  in this context.
	word = ugsub(word, "()(*)h", "%2%1")
	
	if eccl then
		-- Replace ʃ.ʃ or ʃˈʃ with .ʃ or ˈʃ after any consonant.
		word = ugsub(word, "()ʃ(*)ʃ", "%1%2ʃ")
		word = word:gsub("ʰ", "")
	else
		-- ]
		-- word = word:gsub("j", "i̯") -- normalize glide spelling
		-- word = word:gsub("w", "u̯")
	end

	if phonetic then
		local rules = eccl and phonetic_rules_eccl or phonetic_rules
		for _, rule in ipairs(rules) do
			word = ugsub(word, rule, rule)
		end
		-- [2025-05-15: now that we've disabled the phonemic notation, we should put the syllable breaks in the
		-- phonetic notation, as otherwise they don't display at all]
		-- word = word:gsub("%.+", "") -- remove the dots
	end

	if phonetic then
		word = ugsub(word, "(%a(?))%1", "%1" .. LONG) -- convert double consonants into long ones
		word = ugsub(word, "+", "ː") -- maximum of one full length mark
	end

	return clean_syllable_breaks(word)
end

function export.convert_words(text, phonetic, eccl)
	local disallowed = ugsub(text, "", "")
	local n = ulen(disallowed)
	if n > 0 then
		local msg = ("The character%%s %s %%s not allowed."):format(mw.dumpObject(disallowed))
		if n == 1 then
			error(msg:format("", "is"))
		else
			error(msg:format("s", "are"))
		end
	end
	
	text = toNFD(text)
	-- Call ulower() even though it's also called in phoneticize,
	-- in case convert_words() is called externally.
	text = ulower(text)
	text = text:gsub("", "")
	text = ugsub(text, "()(" .. LENGTH .. ")", normalize_ligatures)
	-- Treat a tie as "_".
	text = text:gsub(TIE, "_")
	
	local result = {}
	-- Split on spaces and hyphens, but hyphens preceded/followed by a space
	-- are included in the word (e.g. prefixes and suffixes).
	for chunk in gsplit(text, " +") do
		for word in gsplit(chunk, "%f%-+%f") do
			if word:match("") then
				insert(result, convert_word(toNFC(word), phonetic, eccl))
			end
		end
	end
	
	return trim(concat(result, " "))
end

-- Phoneticize Latin TEXT. Return a list of one or more phoneticizations,
-- each of which is a two-element list {PHONEMIC, PHONETIC}. If ECCL, use
-- Ecclesiastical pronunciation. Otherwise, use Classical pronunciation.
function export.phoneticize(text, eccl)
	local function do_phoneticize(text, eccl)
		return {
			export.convert_words(text, false, eccl),
			export.convert_words(text, true, eccl),
		}
	end

	text = ulower(text)
	-- If we have a macron-breve sequence, generate two pronunciations, one for
	-- the long vowel and one for the short.
	if rfind(text, "" .. BREVE) then
		local longvar = ugsub(text, "()" .. BREVE, "%1")
		local shortvar = ugsub(text, "()" .. BREVE, macrons_to_breves)
		local longipa = do_phoneticize(longvar, eccl)
		local shortipa = do_phoneticize(shortvar, eccl)
		-- Make sure long and short variants are actually different (they won't
		-- be in Ecclesiastical pronunciation).
		if not deep_equals(longipa, shortipa) then
			return {longipa, shortipa}
		else
			return {longipa}
		end
	elseif rfind(text, ";") then
		local tautosyllabicvar = text:gsub(";", "")
		local heterosyllabicvar = text:gsub(";", ".")
		local tautosyllabicipa = do_phoneticize(tautosyllabicvar, eccl)
		local heterosyllabicipa = do_phoneticize(heterosyllabicvar, eccl)
		if not deep_equals(tautosyllabicipa, heterosyllabicipa) then
			return {tautosyllabicipa, heterosyllabicipa}
		else
			return {tautosyllabicipa}
		end
	else
		return {do_phoneticize(text, eccl)}
	end
end

local function make_row(phoneticizations, dials, include_phonemic)
	local IPA_items = {}
	for _, phoneticization in ipairs(phoneticizations) do
		local phonemic = phoneticization
		local phonetic = phoneticization
		local IPA_arg
		local phonetic_brackets = ""
		if include_phonemic then
			IPA_arg = {pron = "/" .. phonemic .. "/ " .. phonetic_brackets}
		else
			IPA_arg = {pron = phonetic_brackets}
		end
		insert(IPA_items, IPA_arg)
	end
	return m_a.format_qualifiers(lang, dials) .. " " .. m_IPA.format_IPA_full { lang = lang, items = IPA_items }
end

function export.show_full(frame)
	local boolean_default_true = {type = "boolean", default = true}
	local args = require("Module:parameters").process(frame:getParent().args, {
		 = {default = mw.title.getCurrentTitle().nsText == "Template" and "īnspīrāre" or mw.loadData("Module:headword/data").pagename},
		classical = boolean_default_true,
		cl = {alias_of = "classical"},
		ecclesiastical = boolean_default_true,
		eccl = {alias_of = "ecclesiastical"},
		vul = {type = "boolean"}, -- To be removed.
		-- 2025-05-15: Add include_phonemic (not by default) per ]
		include_phonemic = {type = "boolean"},
		ann = true,
		accent = {list = true},
		indent = true
	})

	-- Track down any remaining uses of |vul=
	if args.vul ~= nil then
		track("vul")
	end

	local text = args
	local categories = {}
	local accent = args.accent

	local indent = (args.indent or "*") .. " "
	local out = ""
	
	if args.indent then
		out = indent
	end
	
	if args.classical then
		out = out .. make_row(export.phoneticize(text, false), #accent > 0 and accent or {"Classical"})
	else
		insert(categories, lang:getCanonicalName() .. " terms with Ecclesiastical IPA pronunciation only")
	end
	
	local anntext = (
		args.ann == "1" and "'''" .. text:gsub("", "") .. "''':&#32;" or
		args.ann and "'''" .. args.ann .. "''':&#32;" or
		"")

	out = anntext .. out
	
	if args.ecclesiastical then
		if args.classical then
			out = out .. "\n" .. indent .. anntext
		end
		out = out .. make_row(
			export.phoneticize(text, true),
			#accent > 0 and accent or {'Ecclesiastical'}
		)
	end
	
	return out .. require("Module:utilities").format_categories(categories)
end

return export
Module:la-IPA

Wikious

Boobota

Sagapedia