Módulo:generar-pron/it

La documentación para este módulo puede ser creada en Módulo:generar-pron/it/doc
--[=[
Basado en la versión de en.wikt

This module implements the templates {{it-pr}} and {{it-IPA}}.

Author: benwing2
Implementado por Tmagc

FIXME:

1. Support raw pronunciations in {{it-pr}}. (DONE)
2. ahimè should generate aj.mɛ not a.i.mɛ. (DONE)
3. oriuòlo should divide as o.riuò.lo, both in phonemic and hyphenation. (DONE)
4. Handle <hmp:...> for homophones. (DONE)
5. Raw pronunciations need to use raw:, esp. for  phonetic pronunciation. (DONE)
6. Handle hyphenation of Uppsala, massmediale correctly. (DONE)
7. Homophone may end up before rhyme/hyphenation when it should come after, e.g. in ].
8. Cases like ] with respelling ''Katmandù'' should auto-hyphenate.
]=]

local export = {}

local insert = table.insert
local concat = table.concat
local unpack = unpack or table.unpack

local m_table = require("Módulo:tabla")
local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strmatch = m_str.match
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strstrip = m_str.strip
local strupper = m_str.upper
local strlower = m_str.lower
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html

-- sustitución descartando todo salvo el string retornado
local function strsub(text, pattern, repl, n)
   local t, _ = strsubn(text, pattern, repl, n)
   return t
end

-- Temporarily substitutes for comma+space.
local TEMP1 = u(0xFFF0)
local SYLDIV = u(0xFFF0) -- used to represent a user-specific syllable divider (.) so we won't change it
local WORDDIV = u(0xFFF1) -- used to represent a user-specific word divider (.) so we won't change it
local TEMP_Z = u(0xFFF2)
local TEMP_S = u(0xFFF3)
local TEMP_H = u(0xFFF4)
local TEMP_X = u(0xFFF5)
local AC = u(0x301)
local GR = u(0x300)
local CFLEX = u(0x302)
local DOTOVER = u(0x0307) -- dot over =  ̇ = signal unstressed word
local DOTUNDER = u(0x0323) -- dot under =  ̣ = unstressed vowel with quality marker
local LINEUNDER = u(0x0331) -- line under =  ̱ = secondary-stressed vowel with quality marker
local DIA = u(0x0308) -- diaeresis = ̈
local TIE = u(0x0361) -- tie =  ͡

local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local stress = "ˈˌ"
local stress_c = ""
local quality = AC .. GR
local quality_c = ""
local accent = stress .. quality .. CFLEX .. DOTOVER .. DOTUNDER .. LINEUNDER
local accent_c = ""

local sepsil = "-"

local separadores_silabicos = "%."..sepsil..SYLDIV..stress
local SEPARADORES_SILABICOS = ""

local PUNTUACION = "%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹]"

local glide = "jwJW"
local liquid = "lrLR"
local tie = "‿⁀'"
local W = ""
local W_OR_TIE = ""
-- We include both phonemic and spelling forms of vowels and both lowercase and uppercase
-- for flexibility in applying at various stages of the transformation from spelling -> phonemes.
local vowel_not_high = "aeɛoɔøöüAEƐOƆØÖÜ"
local vowel_not_i = vowel_not_high .. "uU"
local vowel_not_u = vowel_not_high .. "iyIY"
local vowel = vowel_not_high .. "iuyIUY"
local V = ""
local V_NOT_HIGH = ""
local V_NOT_I = ""
local V_NOT_U = ""
local VW = ""
local NV = ""
local charsep_not_tie = accent .. "." .. SYLDIV
local charsep_not_tie_c = ""
local charsep = charsep_not_tie .. tie
local charsep_c = ""
local wordsep_not_tie = charsep_not_tie .. " #"
local wordsep = charsep .. " #"
local wordsep_c = ""
local cons_guts = "^" .. vowel .. wordsep .. "_" -- guts of consonant class
local C = "" -- consonant
local C_NOT_SRZ = "" -- consonant not including srz
local C_NOT_SIBILANT_OR_R = "" -- consonant not including r or sibilant
local C_NOT_H = "" -- consonant not including h
local C_OR_EOW_NOT_GLIDE_LIQUID = "" -- consonant not lrjw, or end of word
local C_OR_TIE = "" -- consonant or tie (‿⁀')
local front = "eɛij"
local front_c = ""
local voiced_C_c = ""
local pron_sign = "#!*°"
local pron_sign_c = ""
local pron_sign_or_punc = pron_sign .. "?|,"
local pron_sign_or_punc_c = ""

local full_affricates = {  = "t͡s",  = "d͡z",  = "t͡ʃ",  = "d͡ʒ" }

local recognized_suffixes = {
	-- -(m)ente, -(m)ento
	{"izzamento", "iddzaménto"}, -- must precede -mento below
	{"ment()", "mént%1"}, -- must precede -ente/o below; must follow -izzamento above
	{"ent()", "ènt%1"}, -- must follow -mente/o above
	-- verbs
	{"izzare", "iddzàre"}, -- must precede -are below
	{"izzarsi", "iddzàrsi"}, -- must precede -arsi below
	{"()re", "%1" .. GR .. "re"}, -- must follow -izzare above
	{"()rsi", "%1" .. GR .. "rsi"}, -- must follow -izzarsi above
	-- nouns
	{"izzatore", "iddzatóre"}, -- must precede -tore below
	{"()ore", "%1óre"}, -- must follow -izzatore above
	{"izzatrice", "iddzatrìce"}, -- must precede -trice below
	{"trice", "trìce"}, -- must follow -izzatrice above
	{"izzazione", "iddzatsióne"}, -- must precede -zione below
	{"zione", "tsióne"}, -- must precede -one below and follow -izzazione above
	{"one", "óne"}, -- must follow -zione above
	{"acchio", "àcchio"},
	{"acci()", "àcci%1"},
	{"()ggine", "%1" .. GR .. "ggine"},
	{"aggio", "àggio"},
	{"()gli()", "%1" .. GR .. "gli%2"},
	{"ai()", "ài%1"},
	{"()me", "%1" .. GR .. "me"},
	{"()nza", "%1" .. GR .. "ntsa"},
	{"ario", "àrio"},
	{"()orio", "%1òrio"},
	{"astr()", "àstr%1"},
	{"ell()", "èll%1"},
	-- exceptions to the following: antimatèria, artèria, cattivèria, fèria, Ibèria, Libèria, matèria, misèria, Nigèria, Sibèria, Valèria
	{"eria", "erìa"},
	{"etta", "étta"},
	-- do not include -etto, both ètto and étto are common
	{"ezza", "éttsa"},
	{"ficio", "fìcio"},
	{"ier()", "ièr%1"},
	-- do not include -iere, lots of verbs in unstressed -iere
	{"ifero", "ìfero"},
	{"ismo", "ìsmo"},
	{"ista", "ìsta"},
	{"izi()", "ìtsi%1"},
	{"logia", "logìa"},
	-- do not include -otto, both òtto and ótto are common
	{"tudine", "tùdine"},
	{"ura", "ùra"},
	{"()uro", "%1ùro"},
	-- adjectives
	{"izzante", "iddzànte"}, -- must precede -ante below
	{"ante", "ànte"}, -- must follow -izzante above
	{"izzando", "iddzàndo"}, -- must precede -ando below
	{"()ndo", "%1" .. GR .. "ndo"}, -- must follow -izzando above
	{"izzabile", "iddzàbile"}, -- must precede -abile below
	{"()bile", "%1" .. GR .. "bile"}, --must follow -izzabile above
	{"ale", "àle"},
	{"()nico", "%1" .. GR .. "nico"},
	{"()stic()", "%1" .. GR .. "stic%2"},
	{"izzat()", "iddzàt%1"}, -- must precede -at below
	-- exceptions to the following: àbato, àcato, acròbata, àgata, apòstata, àstato, cìato, fégato, omeòpata,
	-- sàb(b)ato, others?
	{"at()", "àt%1"}, -- must follow -izzat above
	-- exceptions to the following: (s)còmputo, (pre)scòrbuto, tànguto; cùscuta, dìsputa, rècluta, lui vàluta
	{"()ut()", "%1ùt%2"},
	{"()tic()", "%1" .. GR .. "tic%2"},
	{"ense", "ènse"},
	{"esc()", "ésc%1"},
	{"evole", "évole"},
	-- FIXME: Systematic exceptions to the following in 3rd plural present tense verb forms
	{"ian()", "iàn%1"},
	{"iv()", "ìv%1"},
	{"oide", "òide"},
	{"oso", "óso"},
}

local unstressed_words = m_table.listToSet {
	"il", "lo", "la", "i", "gli", "le", -- definite articles
	"un", -- indefinite articles
	"mi", "ti", "si", "ci", "vi", "li", -- object pronouns
	"me", "te", "se", "ce", "ve", "ne", -- conjunctive object pronouns
	"e", "ed", "o", "od", -- conjunctions
	"ho", "hai", "ha", -- forms of ]
	"chi", "che", "non", -- misc particles
	"di", "del", "dei", -- prepositions
	"a", "ad", "al", "ai",
	"da", "dal", "dai",
	"in", "nel", "nei",
	"con", "col", "coi",
	"su", "sul", "sui",
	"per", "pei",
	"tra", "fra",
}

local pron_abc = {{"a"},{"bi"},{"ci"},{"di"},{"e"},{"effe"},{"gi"},{"acca"},{"i"},
	{"i lunga"},{"kappa"},{"elle"},{"emme"},{"enne"},{"o"},{"pi"},{"cu"},{"erre"},{"esse"},{"ti"},{"u"},
	{"vi","vu"},{"doppia vi","doppia vu"},{"ics"},{"i greca","ipsilon"},{"dzeta"}}


local function escapar_porcentaje(s)
    return strsubn(s, "%%%%", "%%")
end

-- Apply canonical Unicode decomposition to text, e.g. è → e + ◌̀. But recompose ö and ü so we can treat them as single
-- vowels, and put LINEUNDER/DOTUNDER/DOTOVER after acute/grave (canonical decomposition puts LINEUNDER and DOTUNDER
-- first).
local function decompose(text)
	text = strnfd(text)
	text = strsub(text, "." .. DIA, {
		 = "ö",
		 = "Ö",
		 = "ü",
		 = "Ü",
	})
	text = strsub(text, "()(" .. quality_c .. ")", "%2%1")
	return text
end

-- Apply canonical Unicode composition to text, e.g. e + ◌̀ → è.
local function compose(text)
	return strnfc(text)
end

-- Split into words. Hyphens separate words but not when used to denote affixes, i.e. hyphens between non-spaces
-- separate words. Return value includes alternating words and separators. Use concat(words) to reconstruct
-- the initial text.
local function split_but_rejoin_affixes(text)
	if not strfind(text, "") then
		return {text}
	end
	-- First replace hyphens separating words with a special character. Remaining hyphens denote affixes and don't
	-- get split. After splitting, replace the special character with a hyphen again.
	local TEMP_HYPH = u(0xFFF0)
	text = strsubrep(text, "()%-()", "%1" .. TEMP_HYPH .. "%2")
	local words = strsplit(text, "(+)")
	for i, word in ipairs(words) do
		if word == TEMP_HYPH then
			words = "-"
		end
	end
	return words
end

local function remove_secondary_stress(text)
	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			-- Remove unstressed quality marks.
			word = strsub(word, quality_c .. DOTUNDER, "")
			-- Remove secondary stresses. Specifically:
			-- (1) Remove secondary stresses marked with LINEUNDER if there's a previously stressed vowel.
			-- (2) Otherwise, just remove the LINEUNDER, leaving the accent mark, which will then be removed if there's
			--     a following stressed vowel, but left if it's the only stress in the word, as in có̱lle = con le.
			--     (In the process, we remove other non-stress marks.)
			-- (3) Remove stress mark if there's a following stressed vowel.
			word = strsubrep(word, "(" .. quality_c .. ".*)" .. quality_c .. LINEUNDER, "%1")
			word = strsub(word, "", "")
			word = strsubrep(word, quality_c .. "(.*" .. quality_c .. ")", "%1")
			words = word
		end
	end
	return concat(words)
end

-- Remove all accents. NOTE: `text` on entry must be decomposed using decompose().
local function remove_accents(text)
	return strsub(text, accent_c, "")
end

-- Remove non-word-final accents. NOTE: `text` on entry must be decomposed using decompose().
local function remove_non_final_accents(text)
	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			word = strsubrep(word, accent_c .. "(.)", "%1")
			words = word
		end
	end
	return concat(words)
end

-- Remove word-final accents on monosyllabic words. NOTE: `text` on entry must be decomposed using decompose().
local function remove_final_monosyllabic_accents(text)
	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			word = strsub(word, "^(" .. NV .. "*" .. V .. ")" .. accent_c .. "$", "%1")
			words = word
		end
	end
	return concat(words)
end

-- Return true if all words in `term` have vowels. NOTE: `term` on entry must be decomposed using decompose().
local function all_words_have_vowels(term)
	local words = split_but_rejoin_affixes(term)
	for i, word in ipairs(words) do
		if (i % 2) == 1 and not strfind(word, V) then -- an actual word, not a separator; check for a vowel
			return false
		end
	end
	return true
end

-- Convert respelling conventions back to the original spelling. This does not affect accents or syllable dividers.
local function convert_respelling_to_original(respelling)
	-- discard second return value
	respelling = respelling:gsub("ddz", "zz"):gsub("tts", "zz"):gsub("dz", "z"):gsub("ts", "z")
		:gsub("Dz", "Z"):gsub("Ts", "Z"):gsub("%)%]", "%1"):gsub("%", "u")
		:gsub("ʎi", "gli"):gsub("ʎ", "gli")
	return respelling
end


-- Given raw respelling, canonicalize it and apply auto-accenting where warranted. This does the following:
-- (1) Convert abbreviated specs like ^à to the appropriate accented page name (hence the page name must be passed in).
-- (2) Decompose the text, normalize áíú and similar to àìù, convert commas and em/en dashes to foot boundaries and
--     similarly with other punctuation.
-- (3) Apply suffix respellings as appropriate, e.g -zione -> -tsióne.
-- (4) Auto-accent monosyllabic and bisyllabic words when possible.
-- (5) Throw an error if non-unstressed words remain without accents on them.
local function canonicalize_and_auto_accent(text)
	text = strlower(text)
	text = decompose(text)
	text = strsubrep(text, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	text = strsubrep(text, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	text = strsubrep(text, "", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano) --> REVISAR: esto es un PARCHE, notar que más adelante detecta el guion de prefijo o sufijo pero si saco esto tira error cuando quiere buscar la marca de acentuación (ˈ)

    text = strsubrep(text, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    text = strsubrep(text, "%s+", " ")
	text = strstrip(text, "+")

	text = strsub(text, "()" .. AC, "%1" .. GR) -- áíú -> àìù

	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			local is_prefix = word:find("%-$")
			local is_suffix = word:find("^%-")

			if not is_prefix then
				if not strfind(word, quality_c) then
					-- Apply suffix respellings.
					for _, suffix_pair in ipairs(recognized_suffixes) do
						local orig, respelling = unpack(suffix_pair)
						local replaced
						word, replaced = strsubb(word, orig .. "$", respelling)
						if replaced then
							-- Decompose again because suffix replacements may have accented chars.
							word = decompose(word)
							break
						end
					end
				end

				-- Auto-stress some monosyllabic and bisyllabic words. Don't auto-stress inherently unstressed words
				-- (including those with a * at the end of them indicating syntactic gemination).
				if not unstressed_words and not strfind(word, "") then
					local vowel_count = strlen(strsub(word, NV, ""))
					if vowel_count > 2 then
						-- err("With more than two vowels and an unrecognized suffix, stress must be explicitly given")

						-- en lugar de tirar error, asumo que es llana (-por qué? -y bueno flaco en generar-pron/es hacemos lo mismo así que qué esperás? si no te gusta podés colaborar y marcar con la |ayuda dónde iría la tilde)
						local c1,v1,c2,v2,c3,v3,c4 = strmatch(word, "^" .. "(.*)".."(" .. V .. ")".."(.-)".."(" .. V .. ")".. "(.-)".."(" .. V .. ")".. "(.-)".."$")
						word = c1..v1..c2..v2..GR..c3..v3..c4
					elseif not is_suffix or vowel_count == 2 then -- don't try to stress suffixes with only one vowel
						local before, vow, after = strmatch(word, "^(.-)(" .. V .. ")(.*)$")
						if before then
							--if strfind(vow, "^$") then
								--err("When stressed vowel is e or o, it must be marked é/è or ó/ò to indicate quality")
							--end
							word = before .. vow .. GR .. after
						end
					end
				end
			end

			words = word
		end
	end

	return words
end


local function generar_pron(text)
	local orig_respelling = text
	local words = canonicalize_and_auto_accent(text)
	text = concat(words)
	local canon_respelling = text

	text = strsub(text, CFLEX, "") -- eliminate circumflex over î, etc.
	text = strsub(text, "y", "i")
	text = strsubrep(text, "()'()", "%1‿%2") -- apostrophe between letters is a tie
	text = strsub(text, "(" .. C .. ")'$", "%1‿") -- final apostrophe after a consonant is a tie, e.g. ]
	text = strsub(text, "(" .. C .. ")' ", "%1‿ ") -- final apostrophe in non-utterance-final word is a tie
	text = strsub(text, "'", "") -- other apostrophes just get removed, e.g. ], ].
	 -- For now, use a special marker of syntactic gemination at beginning of word; later we will
	 -- convert to ‿ and remove the space.

	-- Esto estaba para detectar asteriscos, pero por uniformidad no se va a implementar la geminación (a lo sumo quedará automática para locuciones)
	-- text = strsub(text, "%*()(" .. C .. ")", "%1⁀%2")
	-- if strfind(text, "%*") then
	--	error("* for syntactic gemination can only be used when the next word begins with a consonant: " .. canon_respelling)
	-- end

	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			-- Words marked with an acute or grave (quality marker) not followed by an indicator of secondary stress
			-- or non-stress, and not marked with DOTOVER (unstressed word), get primary stress.
			if not word:find(DOTOVER) then
				word = strsub(word, "(" .. quality_c .. ")()", "%1ˈ%2")
				word = strsub(word, "(" .. quality_c .. ")$", "%1ˈ")
			end
			-- Apply quality markers: è -> ɛ, ò -> ɔ
			word = strsub(word, "" .. GR, {
				 = "ɛ",
				 = "ɔ",
			})
			-- Eliminate quality markers and DOTOVER/DOTUNDER, which have served their purpose.
			word = strsub(word, "", "")

			-- LINEUNDER means secondary stress.
			word = strsub(word, LINEUNDER, "ˌ")

			-- Make prefixes unstressed. Primary stress markers become secondary.
			if word:find("%-$") then
				word = strsub(word, "ˈ", "ˌ")
			end

			words = word
		end
	end
	text = concat(words)

	-- Convert hyphens to spaces, to handle ], ], etc.
	text = strsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = strstrip(text)
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = strsub(text, " | ", "# | #")
	text = "##" .. strsub(text, " ", "# #") .. "##"

	-- Random consonant substitutions.
	text = strsub(text, "%", "w") --  means /w/ when the spelling is ⟨u⟩, esp. in ⟨ui⟩ sequences. This helps with hyphenation.
	text = strsub(text, "%", TEMP_X) --  means /x/
	text = strsub(text, "#ex(" .. V .. ")", "eg%1")
	text = text:gsub("x", "ks"):gsub("ck", "k"):gsub("sh", "ʃ")
	text = strsub(text, TEMP_X, "x")
	text = strsub(text, "%", TEMP_Z) --  means /z/
	text = strsub(text, "%", TEMP_S) --  means /s/
	text = strsub(text, "%", TEMP_H) --  means /h/

	-- ci, gi + vowel
	-- Do ci, gi + e, é, è sometimes contain /j/?
	text = strsub(text,
		"()(?)i(" .. V .. ")", function(c, double, v)
			local out_cons
			if c == "c" then
				out_cons = "ʧ"
			else
				out_cons = "ʤ"
			end

			if double ~= "" then
				if double ~= c then
					error("Invalid sequence " .. c .. double .. ".")
				end

				out_cons = out_cons .. out_cons
			end

			return out_cons .. v
		end)

	-- Handle gl and gn.
	text = strsub(text, "gn", "ɲ")
	-- The vast majority of words beginning with gli- have /ɡl/ not /ʎ/ so don't substitute here, although we special-case
	-- ]. Use ʎ exlicitly to get it in ] and such.
	text = strsub(text, "#gli#", "ʎi")
	text = strsubrep(text, "()gli(" .. V .. ")", "%1ʎ%2")
	text = strsubrep(text, "()gl(‿?i)", "%1ʎ%2")

	-- Handle other cases of c, g.
	text = strsub(text, "()(?)(h?)(" .. charsep_c .. "*.)", function(first, double, h, after)
		-- Don't allow the combinations cg, gc. Or do something else?
		if double ~= "" and double ~= first then
			error("Invalid sequence " .. first .. double .. ".")
		end

		-- c, g is soft before e, i.
		local cons
		if strfind(after, front_c) and not strfind(h, "h") then
			if first == "c" then
				cons = "ʧ"
			else
				cons = "ʤ"
			end
		else
			if first == "c" then
				cons = "k"
			else
				cons = "g"
			end
		end

		if double ~= "" then
			cons = cons .. cons
		end

		return cons .. after
	end)

	-- sc before e, i is /ʃ/, doubled after a vowel.
	text = text:gsub("sʧ", "ʃ")

	text = strsub(text, "%", "ʧ")
	text = strsub(text, "%", "ʤ")

	if strfind(text, "z") then
		-- error("z must be respelled (d)dz or (t)ts: " .. canon_respelling) en lugar de tirar error, asumo la pronunciación más probable
		-- la regla en la que me baso es la siguiente: si está entre dos vocales, se mapea como /ts/, sino lo mapeo como /dz/
		text = strsub(text, "("..V..")".."("..SEPARADORES_SILABICOS..")".."zz?".."("..SEPARADORES_SILABICOS..")".."("..V..")", "%1%2".."ts".."%3%4")
		text = strsub(text, "("..V..")".."("..SEPARADORES_SILABICOS..")".."zz?".."("..V..")", "%1%2".."ts".."%3")
		text = strsub(text, "("..V..")".."zz?".."("..SEPARADORES_SILABICOS..")".."("..V..")", "%1".."ts".."%2%3")
		text = strsub(text, "("..V..")".."zz?".."("..V..")", "%1".."ts".."%2")
		text = strsub(text, "()".."("..SEPARADORES_SILABICOS..")".."zz?", "%1%2dz")
		text = strsub(text, "()zz?", "%1dz")
	end

	text = strsub(text, "ddz", "ʣʣ")
	text = strsub(text, "dz", "ʣ")
	text = strsub(text, "tts", "ʦʦ")
	text = strsub(text, "ts", "ʦ")

	-- ⟨qu⟩ represents /kw/.
	text = text:gsub("qu", "kw")
	-- ⟨gu⟩ (unstressed) + vowel represents /gw/.
	text = text:gsub("gu(" .. V .. ")", "gw%1")
	text = strsub(text, "q", "k") -- ], ], etc.

	-- Assimilate n before labial, including across word boundaries; DiPI marks pronunciations like
	-- /ʤanˈpaolo/ for ] as wrong. To prevent this, use _ or h between n and following labial.
	text = strsub(text, "n(" .. wordsep_c .. "*)", "m%1")

	-- Remove 'h' before converting vowels to glides; h should not block e.g. ahimè -> aj.mɛ.
	text = text:gsub("h", "")

	-- Unaccented u or i following a non-high vowel (with or without accent) is a semivowel. Exclude high vowels because
	-- 'iu' should be interpreted as /ju/ not /iw/, and 'ii' (as in ]) and ''uu'' (as in ]), should
	-- remain as vowels. We handle ui specially. By preceding the conversion of glides before vowels, this works
	-- correctly in the common sequence 'aiuo' e.g. ], ]. Note that ci, gi + vowel, gli, qu
	-- must be dealt with beforehand.
	text = strsubrep(text, "(" .. V_NOT_HIGH .. accent_c .. "*)()()", function(v, gl, acc)
		return v .. (gl == "i" and "j" or "w") .. acc
	end)
	text = strsubrep(text, "(u" .. accent_c .. "*)i()", "%1j%2")

	-- Unaccented i or u before another vowel is a glide. Separate into i and u cases to avoid converting ii or uu
	-- except in the sequences iiV or uuV. Do i first so ] -> or.jwɔ.lo.
	text = strsub(text, "i(" .. V_NOT_I .. ")", "j%1")
	text = strsub(text, "u(" .. V_NOT_U .. ")", "w%1")

	-- Double consonant followed by end of word (e.g. ], ], ]), or followed by a consonant
	-- other than a glide or liquid (e.g. ], ]), should be reduced to single. Should not affect double
	-- consonants between vowels or before glides (e.g. ], ]) or liquids (], ]),
	-- or words before a tie (], ]).
	text = strsubrep(text, "(" .. C .. ")%1(" .. charsep_not_tie_c .. "*" .. C_OR_EOW_NOT_GLIDE_LIQUID .. ")", "%1%2")

	-- Between vowels (including glides), /ʃ ʎ ɲ t͡s d͡z/ are doubled (unless already doubled).
	-- Not simply after a vowel; 'z' is not doubled in e.g. ].
	text = strsubrep(text, "(" .. VW .. stress_c .. "?" .. charsep_c .. "*)()(" .. charsep_c .. "*" .. VW .. ")",
		"%1%2%2%3")

	-- Change user-specified . into SYLDIV so we don't shuffle it around when dividing into syllables.
	text = strsub(text, "%.", SYLDIV)

	-- Divide into syllables.
	-- First remove '_', which has served its purpose of preventing context-dependent changes.
	-- It should not interfere with syllabification.
	text = text:gsub("_", "")
	-- Also now convert ⁀ into a copy of the following consonant with the preceding space converted to ⁀
	-- (which we will eventually convert to a tie symbol ‿, but for awhile we need to distinguish the two
	-- because automatic syllabic gemination in final-stress words happens only in multisyllabic words,
	-- and we don't want it to happen in monosyllabic words joined to a previous word by ⁀). We want to do
	-- this after all consonants have been converted to IPA (so the correct consonant is geminated)
	-- but before syllabification, since e.g. 'va* bène' should be treated as a single word 'va⁀b.bɛne' for
	-- syllabification.
	text = strsub(text, "# #⁀(‿?)(.)", "⁀%2%2")
	-- Divide before the last consonant (possibly followed by a glide). We then move the syllable division marker
	-- leftwards over clusters that can form onsets.
	text = strsubrep(text, "(" .. V .. accent_c .. "*?" .. C_OR_TIE .. "-)(" .. C .. W_OR_TIE .. "*" .. V .. ")", "%1.%2")
	-- The previous regex divided VjjV as V.jjV but we want Vj.jV; same for VwwV. Correct this now.
	text = strsubrep(text, "(" .. V .. accent_c .. "*?)%.(" .. W .. ")(?)(%2?" .. V .. ")", "%1%2.%3%4")
	-- Existing hyphenations of ], ], ], ] all divide as .tl,
	-- and none divide as t.l. No examples of -dl- but it should be the same per
	-- http://www.italianlanguageguide.com/pronunciation/syllabication.asp.
	text = strsub(text, "(?)%.()", ".%1%2")
	-- Italian appears to divide sCV as .sCV e.g. pé.sca for ], and similarly for sCh, sCl, sCr. Exceptions are
	-- ss, sr, sz and possibly others.
	text = strsub(text, "(s?)%.(" .. C_NOT_SIBILANT_OR_R .. ")", ".%1%2")
	-- Several existing hyphenations divide .pn and .ps and Olivetti agrees. We do this after moving across s so that
	-- dispnea is divided dis.pnea. Olivetti has tec.no.lo.gì.a for ], showing that cn divides as c.n, and
	-- clàc.son, fuc.sì.na, ric.siò for ], ], ], showing that cs divides as c.s.
	text = strsub(text, "(p?)%.()", ".%1%2")
	text = strsubrep(text, "(" .. V .. accent_c .. "*?)(" .. V .. ")", "%1.%2")

	-- User-specified syllable divider should now be treated like regular one.
	text = strsub(text, SYLDIV, ".")
	text = strsub(text, TEMP_H, "h")

	-- Do the following after syllabification so we can distinguish written s from z, e.g. u.sbè.co but uz.bè.co per Olivetti.
	-- Single ⟨s⟩ between vowels is /z/.
	text = strsubrep(text, "(" .. VW .. stress_c .. "?" .. charsep_c .. "*)s(" .. charsep_c .. "*" .. VW .. ")", "%1z%2")
	-- ⟨s⟩ immediately before a voiced consonant is always /z/
	text = strsub(text, "s(" .. charsep_c .. "*" .. voiced_C_c .. ")", "z%1")
	text = strsub(text, TEMP_Z, "z")
	text = strsub(text, TEMP_S, "s")

	-- French/German vowels
	text = strsub(text, "ü", "y")
	text = strsub(text, "ö", "ø")
	text = strsub(text, "g", "ɡ") -- U+0261 LATIN SMALL LETTER SCRIPT G

	local last_word_self_gemination = strfind(text, "" .. stress_c .."*##$") and not
		-- In case the user used t͡ʃ explicitly
		strfind(text, "t͡ʃ" .. stress_c .."*##$")
	local first_word_self_gemination = strfind(text, "^##" .. stress_c .. "*")
	text = strsub(text, "()(" .. charsep_c .. "*%.?)(*)", function(affricate1, divider, affricate2)
		local full_affricate = full_affricates

		if affricate2 ~= "" then
			return substr(full_affricate, 1, 1) .. divider .. full_affricate
		end

		return full_affricate .. divider
	end)


	-- Para hacer la geminación (por ahora lo dejo desactivado, más adelante vemos cómo lo podemos implementar)

	-- local last_word_ends_in_primary_stressed_vowel = strfind(text, "ˈ##$")
	-- Last word is multisyllabic if it has a syllable marker in it. This should not happen across word boundaries
	-- (spaces) including ⁀, marking where two words were joined by syntactic gemination.
	-- local last_word_is_multisyllabic = strfind(text, "%.*$")
	-- local auto_cogemination = last_word_ends_in_primary_stressed_vowel and last_word_is_multisyllabic
	-- local last_word_ends_in_vowel = strfind(text, V .. stress_c .. "*" .. "##$")
	-- local last_word_ends_in_consonant = strfind(text, C .. "##$")
	-- local auto_final_self_gemination = last_word_self_gemination
	-- local auto_initial_self_gemination = first_word_self_gemination
	-- Now that ⁀ has served its purpose, convert to a regular tie ‿.
	text = strsub(text, "⁀", "‿")

	-- Stress marks.
	-- Move IPA stress marks to the beginning of the syllable.
	text = strsubrep(text, "()(*)(" .. stress_c .. ")", "%1%3%2")
	-- Suppress syllable mark before IPA stress indicator.
	text = strsub(text, "%.(" .. stress_c .. ")", "%1")
	-- Make all primary stresses but the last one in a given word be secondary. May be fed by the first rule above.
	text = strsubrep(text, "ˈ(+)ˈ", "ˌ%1ˈ")

	-- Remove # symbols at word/text boundaries and recompose.
	text = strsub(text, "#", "")
	text = strnfc(text)

	return {{strhtml(text)}}
end


-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no
-- hyphens.
local function get_num_syl_from_phonemic(phonemic)
	-- Maybe we should just count vowels instead of the below code.
	phonemic = strsub(phonemic, "|", " ") -- remove IPA foot boundaries
	local words
	if not phonemic:find(" ") then
		words = {phonemic}
	else
		words = strsplit(phonemic, "( +)")
	end
	for i, word in ipairs(words) do
		if (i % 2) == 1 then -- an actual word, not a separator
			-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
			word = strsub(word, "(.)(.)", "%1.%2")
			word = strsub(word, "", "")
			words = word
		else
			-- Convert spaces and word-separating hyphens into syllable divisions.
			words = "."
		end
	end
	phonemic = concat(words)
	return strlen(strsub(phonemic, "", "")) + 1
end


-- Syllabify a single word based on its spelling. The text should have extraneous characters (e.g. initial or final *) removed.
local function syllabify_word_from_spelling(text)
	-- NOTE: In all of the following, we have to be careful to allow for apostrophes between letters and for capital
	-- letters in the middle of words, as in ], ], ], ],
	-- ], etc.
	local TEMP_I = u(0xFFF2)
	local TEMP_I_CAPS = u(0xFFF3)
	local TEMP_U = u(0xFFF4)
	local TEMP_U_CAPS = u(0xFFF5)
	local TEMP_Y = u(0xFFF6)
	local TEMP_Y_CAPS = u(0xFFF7)
	local TEMP_G = u(0xFFF8)
	local TEMP_G_CAPS = u(0xFFF9)
	-- Change user-specified . into SYLDIV so we don't shuffle it around when dividing into syllables.
	text = text:gsub("%.", SYLDIV)
	-- We propagate underscore this far specifically so we can distinguish g_n (]) from gn.
	-- g_n should end up as g.n but gn should end up as .gn.
	local g_to_temp_g = { = TEMP_G,  = TEMP_G_CAPS}
	text = strsub(text, "()('?)_('?)", function (g, sep, n) return g_to_temp_g .. sep .. n end)
	-- Now remove underscores before any further processing.
	text = text:gsub("_", "")
	-- i, u, y between vowels -> consonant-like substitutions:
	-- With i: ], ], ], ], etc.
	-- With u: ], ], ], ], ], ], etc.
	-- With y: ], ], ], ], ], etc. ] needs special
	-- handling.
	-- Also with h, as in ], ], etc.
	-- With h not dividing diphthongs: ], ], ], ], ], ], etc.
	-- But in the common sequence -Ciuo- (], ], ], ], ],
	-- ], ], ], etc.), both i and u are glides. In the sequence -quiV-
	-- (], ], etc.), both u and i are glides, and probably also in -guiV-, but not in other -CuiV-
	-- sequences such as ], ], ], ], ], etc.). Special cases are
	-- French-origin words like ], ], ]; it's unlikely we can handle these
	-- correctly automatically.
	--
	-- We handle these cases as follows:
	-- 1. q+TEMP_U etc. replace sequences of qu and gu with consonant-type codes. This allows us to distinguish
	--    -quiV-/-guiV- from other -CuiV-.
	-- 2. We convert i in -ViV- sequences to consonant-type TEMP_I, but similarly for u in -VuV- sequences only if the
	--    first V isn't i, so -CiuV- remains with two vowels. The syllabification algorithm below will not divide iu
	--    or uV unless in each case the first vowel is stressed, so -CiuV- remains in a single syllable.
	-- 3. As soon as we convert i to TEMP_I, we undo the u -> TEMP_U change for -quiV-/-guiV-, before u -> TEMP_U in
	--    -VuV- sequences.
	local u_to_temp_u = { = TEMP_U,  = TEMP_U_CAPS}
	text = strsub(text, "()()('?" .. V .. ")", function(qg, u, v) return qg .. u_to_temp_u .. v end)
	local i_to_temp_i = { = TEMP_I,  = TEMP_I_CAPS,  = TEMP_Y,  = TEMP_Y_CAPS}
	text = strsubrep(text, "(" .. V .. accent_c .. "*?)()(" .. V .. ")",
			function(v1, iy, v2) return v1 .. i_to_temp_i .. v2 end)
	text = text:gsub(TEMP_U, "u")
	text = text:gsub(TEMP_U_CAPS, "U")
	text = strsubrep(text, "(" .. V_NOT_I .. accent_c .. "*?)()(" .. V .. ")",
			function(v1, u, v2) return v1 .. u_to_temp_u .. v2 end)
	-- Divide VCV as V.CV; but don't divide if C == h, e.g. ] should be ahi.mè.
	text = strsubrep(text, "(" .. V .. accent_c .. "*'?)(" .. C_NOT_H .. "'?" .. V .. ")", "%1.%2")
	text = strsubrep(text, "(" .. V .. accent_c .. "*'?" .. C .. C_OR_TIE .. "*)(" .. C .. "'?" .. V .. ")", "%1.%2")
	-- Examples in Olivetti like ], ], ], ], ], ]
	-- divide as .Ch. Exceptions are ], ], ], ] but the latter
	-- three seem questionable as the pronunciation puts the first consonant in the following syllable and makes the h
	-- silent.
	text = strsub(text, "(" .. C_NOT_H .. "'?)%.()", ".%1%2")
	-- gn represents a single sound so it should not be divided.
	text = strsub(text, "()%.()", ".%1%2")
	-- Existing hyphenations of ], ], ], ] all divide as .tl,
	-- and none divide as t.l. No examples of -dl- but it should be the same per
	-- http://www.italianlanguageguide.com/pronunciation/syllabication.asp.
	text = strsub(text, "('?)%.()", ".%1%2")
	-- Italian appears to divide sCV as .sCV e.g. pé.sca for ], and similarly for sCh, sCl, sCr. Exceptions are
	-- ss, sr, sz and possibly others. We are careful not to move across s in ], ], etc.
	text = strsub(text, "()('?)%.(" .. C_NOT_SRZ .. ")", "%1.%2%3")
	-- Several existing hyphenations divide .pn and .ps and Olivetti agrees. We do this after moving across s so that
	-- dispnea is divided dis.pnea. We are careful not to move across p in ]. Olivetti has tec.no.lo.gì.a for
	-- ], showing that cn divides as c.n, and clàc.son, fuc.sì.na, ric.siò for ], ],
	-- ], showing that cs divides as c.s.
	text = strsub(text, "()('?)%.()", "%1.%2%3")
	-- Any aeoö, or stressed iuüy, should be syllabically divided from a following aeoö or stressed iuüy.
	-- A stressed vowel might be followed by another accent such as LINEUNDER (which we put after the acute/grave in
	-- decompose()).
	text = strsubrep(text, "(" .. accent_c .. "*'?)(?'?)", "%1.%2")
	text = strsubrep(text, "(" .. accent_c .. "*'?)(?'?" .. V .. quality_c .. ")", "%1.%2")
	text = strsub(text, "(" .. quality_c .. accent_c .. "*'?)(?'?)", "%1.%2")
	text = strsubrep(text, "(" .. quality_c .. accent_c .. "*'?)(?'?" .. V .. quality_c .. ")", "%1.%2")
	-- We divide ii as i.i (]), but not iy or yi, which should hopefully cause ] to be handled
	-- correctly as ke.fiy.yah. Only example with Cyi is ], which may be exceptional.
	text = strsubrep(text, "(" .. accent_c .. "*'?)(?'?)", "%1.%2")
	text = strsubrep(text, "(" .. accent_c .. "*'?)(?'?)", "%1.%2")
	
	text = text:gsub("%.", sepsil)
	text = text:gsub(SYLDIV, sepsil)
	text = text:gsub(TEMP_I, "i")
	text = text:gsub(TEMP_I_CAPS, "I")
	text = text:gsub(TEMP_U, "u")
	text = text:gsub(TEMP_U_CAPS, "U")
	text = text:gsub(TEMP_Y, "y")
	text = text:gsub(TEMP_Y_CAPS, "Y")
	text = text:gsub(TEMP_G, "g")
	text = text:gsub(TEMP_G_CAPS, "G")
	return text
end


-- Syllabify text based on its spelling. The text should have extraneous characters (e.g. initial *) removed.
local function syllabify_from_spelling(text)
	text = decompose(text)
	-- Convert spaces and word-separating hyphens into syllable divisions.
	local words = split_but_rejoin_affixes(text)
	for i, word in ipairs(words) do
		if (i % 2) == 0 then -- a separator
			words = WORDDIV
		else
			words = syllabify_word_from_spelling(word)
		end
	end
	text = concat(words)

	-- Convert word divisions into periods, but first into spaces so we can call remove_secondary_stress().
	-- We have to call remove_secondary_stress() after syllabification so we correctly syllabify words like
	-- bìobibliografìa.
	text = text:gsub(WORDDIV, " ")
	text = remove_secondary_stress(text)
	text = text:gsub(" ", sepsil)
	return text
end

-- Given the canon_respelling field in the structure output by show_IPA_full(), normalize it into the form that can
-- (a) be passed to syllabify_from_spelling() to produce the syllabification that is used to generate hyphenation
-- output, (b) be further processed to determine whether to generate hyphenation at all (by comparing the
-- further-processed result to the original pagename). NOTE: canon_respelling must be decomposed using decompose().
local function normalize_for_syllabification(respelling)
	-- Remove IPA foot boundaries.
	respelling = respelling:gsub("|", " ")
	respelling = respelling:gsub("^%-", " ") -- sufijo
	respelling = respelling:gsub("%-$", " ") -- prefijo
	respelling = strstrip(respelling)
	-- Convert respelling conventions back to the original spelling.
	respelling = convert_respelling_to_original(respelling)
	return respelling
end


-- Given the output of normalize_for_syllabification(), see if it matches the page name. If so, we auto-generate hyphenation output based on the respelling.
local function spelling_normalized_for_syllabification_matches_pagename(text, pagename)
	text = decompose(text)
	pagename = decompose(pagename)
	text = remove_secondary_stress(text)
	text = text:gsub("_", "")
	if text == pagename then
		return true
	end
	text = text:gsub("%.", "")
	if text == pagename then -- e.g. ], ] with non-final accent in the page name
		return true
	end
	text = remove_non_final_accents(text)
	-- Check if the normalized pronunciation is the same as the page name. If a word in the page name is a single
	-- syllable, it may or may not have an accent on it, so also remove final monosyllabic accents from the normalized
	-- pronunciation when comparing. (Don't remove from both normalized pronunciation and page name because we don't
	-- want pronunciation rè to match page name ré or vice versa.)
	return text == pagename or remove_final_monosyllabic_accents(text) == pagename
end


-- Given the output structure from show_IPA_full, generate a list of hyphenation objects. The resulting list can be
-- directly passed in as the `hyphs` field of the data object passed into format_hyphenation() in
-- ].
local function generate_hyphenation_from_phonemic_output(ipa_full_output, pagename)
	local hyphs = {}
	for _, termobj in ipairs(ipa_full_output.terms) do
		local normtext
		-- Figure out if we should not generate a hyphenation for this term.
		local no_hyph
		if termobj.raw then
			no_hyph = true
		else
			normtext = normalize_for_syllabification(termobj.canon_respelling)
			no_hyph = not all_words_have_vowels(normtext)
				or not spelling_normalized_for_syllabification_matches_pagename(normtext, pagename)
		end
		if not no_hyph then
			local syllabification = syllabify_from_spelling(normtext)
			local saw_hyph = false
			for _, hyph in ipairs(hyphs) do
				if hyph.syllabification == syllabification then
					-- already saw hyphenation
					saw_hyph = true
					break
				end
			end
			if not saw_hyph then
				-- Only show pronunciation qualifiers with hyphenations if there's more than one pronunciation given,
				-- and only set the qualifier for new hyphenations. See generate_rhymes_from_phonemic_output().
				local quals = #ipa_full_output.terms > 1 and termobj.qualifiers and #termobj.qualifiers > 0 and termobj.qualifiers or nil
				insert(hyphs, {syllabification = syllabification, hyph = strsplit(syllabification, "%."),
					qualifiers = quals})
			end
		end
	end
	return hyphs
end

--Se obtiene el tipo de acentuación
local function determinar_acentuacion(w)
	if type(w) ~= "string" then
		return nil	
	end
	local silabas = {}
	for s in strmatchit(w, "+") do
		insert(silabas, s)
	end
	local L = #silabas
	local sufijo = nil
	if L >= 4 and silabas == "men" and silabas == "te" then
		return "doble", L
	elseif L == 1 then
		return "monosílaba", L
	else
		local i = 1
		for silaba in strmatchit(w, SEPARADORES_SILABICOS..'*'.."+") do
			if strfind(silaba, primary_stress) then
				local idx = L - i
				if idx == 0 then
					return "aguda", L
				elseif idx == 1 then
					return "llana", L
				elseif idx == 2 then
					return "esdrújula", L
				else
					return "sobreesdrújula", L
				end
				break
			end
			i = i + 1
		end
		error("Se esperaba que la pronunciación de la palabra hubiera sido generada con las marcas de acentuación")
	end
end


-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)	
	if #args < 1 then
		args = titulo
	end

	if #args < 1 and #args < 1 then
		if #titulo == 1 then
			if titulo >= "a" and titulo <= "z" then
				args = pron_abc
				args = args
			elseif titulo >= "A" and titulo <= "Z" then
				args = pron_abc
				args = args
			end
		end
		local rims = {}
		local A = #args
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local fono = generar_pron(args)
			
			local rim = fono
			rim = strsub(rim, "^.*"..primary_stress.."(.-)$", "%1")
			rim = strsub(rim, ".-".."("..V..".*"..")".."$", "%1")
			rims = true
			
			for i,_ in ipairs(fono) do
				insert(args, fono)
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end	
		local tiene_espacios = strfind(titulo, " ")

		if not tiene_espacios then
			if not args then
				args = syllabify_from_spelling(normalize_for_syllabification(args))
			end
			
			local ac, ls = {}, {}
			for _,f in ipairs(args) do
				local ace, lon = determinar_acentuacion(f)
				ls = true
				ac = true
			end
			for lon,_ in pairs(ls) do
				insert(args, lon)
			end
			for ace,_ in pairs(ac) do
				insert(args, ace)
			end
		end	
	end

	return args

end

return export
Módulo:generar-pron/it

Separar Módulo:generar-pron/it en sílabas

Listado de errores ortográficos de Módulo:generar-pron/it

Seguidamente te presentamos una lista con los errores ortográficos más usuales, con el fin de que los tomes en consideración y sepas cómo no cometerlos.Sin más preámbulos, aquí tienes el listado de errores ortográficos de Módulo:generar-pron/it

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot