Hola, siéntete bienvenido o bienvenida, lo más probable es que has llegado hasta esta página web tratando de hallar la definición de Módulo:generar-pron/pt. En esta web no solo te será posible hallar el total de las acepciones reconocidas por el diccionario para la palabra Módulo:generar-pron/pt, sino que además te explicaremos su etimología, sus peculiaridades y podrás saber el modo en que se dice la palabra Módulo:generar-pron/pt en singular y en plural. Todo aquello que es necesario conocer referente a la palabra Módulo:generar-pron/pt aquí lo tienes. La definición de la palabra Módulo:generar-pron/pt a ser más concreto y idóneo en el momento de conversar o formular tus piezas. Saber la definición deMódulo:generar-pron/pt, conjuntamente con las definiciones de otros términos, enriquecen nuestro léxico y nos suministran mayores y mejores recursos lingüísticos.
La documentación para este módulo puede ser creada en Módulo:generar-pron/pt/doc
--[=[
Author: Benwing
Introducido en es.wikt por Tmagc
Issues concerning South Brazil pronunciation:
(Rather than an unified South Brazil pronunciation, it's better to transform it in Rio Grande do Sul/Gaúcho pronunciation)
1. Should all 'ẽ' (not just word-final) be rendered as ? We have several existing examples, e.g.
/de.zẽ.ba.ˈla.do/|/de.zẽj̃.ba.ˈla.do/ for ], /ˌde.zẽj̃.has.ˈkɐ̃.so/ for ],
/ẽj̃.baw.sa.ˈma(ɻ)/ for ], for ], /ẽ.ʁus.tiɾ/| for
].
2. Coda 'r': Mostly /ɻ/ but sometimes given as both /ɾ/ and /ɻ/ or occasionally /r/.
3. Raising of unstressed 'e' and 'o', and 'des-': Mostly not present but sometimes yes, e.g. ]
/kla.ɾe.ˈa(ɾ)/|/kla.ɾe.ˈa(ɻ)/|/kla.ɾi.ˈa(ɾ)/|/kla.ɾi.ˈa(ɻ)/|/kla.ˈɾja(ɾ)/|/kla.ˈɾja(ɻ)/, ]
, ] /ko.ˈzi.dʊ/|/ku.ˈzi.dʊ/, ] /dez.blo.ke.ˈa(ɾ)/|/dez.blo.ˈkja(ɾ)/|/d͡ʒiz-/|/-(ɻ)/,
] /des.fe.ˈɾi(ɾ)/|/des.fe.ˈɾi(ɻ)/|/des.fi.ˈɾi(ɾ)/|/des.fi.ˈɾi(ɻ)/, ] /dez.hes.pej.ˈta(ɾ)/|/d͡ʒiz-/|/-(ɻ)/,
] /des.tɾu.ˈi.do/|/d͡ʒis.tɾu.ˈi.do/.
4. Epenthetic 'i': Less common? E.g. ] given as just /ˌd͡ʒiɡ.ni.ˈda.de/, but ] as /ˈd͡ʒiɡ.no/|/ˈd͡ʒi.ɡi.no/.
5. Secondary stress: Often given two syllables before the stress. I have left it out unless it seems stable and in a prefix, e.g.
] /e.ˌlej.to.ˈɾa.do/, ] /e.ˌle.tɾi.ˈzɐ̃.te/, ] /ˌe.le.ˈva.do/, ] /e.ˌle.va.ˈdoɻ/,
] /ˌẽ.ɡa.ˌha.fa.ˈmẽ.to/.
6. Initial em-: Mostly given as /ẽ-/ or /ẽj̃-/, but sometimes /ĩ-/ as in ] /ĩ.pa.ɾe.ˈda(ɾ)/|/ĩ.pa.ɾe.ˈda(ɻ)/
or both as in ] /ẽ.kaj.ˈʃa(ɾ)/|/ĩ.kaj.ˈʃa(ɻ)/|/ẽ.kaj.ˈʃa(ɾ)/|/ĩ.kaj.ˈʃa(ɻ)/.
7. -nh-: ] given as , but is /j̃/ actually characteristic of this accent or is it /ɲ/?
8. Other inconsistencies: e.g. ] /ẽ.ʁus.tiɾ/|, with coda /ʃ/ and strong (usually ).
] /ab.ʁoˈɡa(ɻ)/|/ab.hoˈɡa(ɻ)/|/ab.χoˈɡa(ɻ)/|/ab.ɦoˈɡa(ɻ)/ with all possible strong r's.
9. Nasal vowels: Almost always as elsewhere, but occasionally e.g. ] /ĩn.tẽnˈde(r)/, ] /a.ˈʒen.te/|/a.ˈʒẽ.te/.
10. Nasal diphthongs: -ão sometimes claims to be /ɐ̃õ/, sometimes /ɐ̃w̃/. ] listed both.
]=]
local export = {}
local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat
local sort = table.sort
local m_table = require("Módulo:tabla")
local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strmatch = m_str.match
local strmatchit = m_str.gmatch
local strsplit = m_str.split
local strstrip = m_str.strip
local strexplode = m_str.explode_utf8
local strlower = m_str.lower
local strlen = m_str.len
local substr = m_str.sub
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html
-- version of strsubn() that discards all but the first return value
local function strsub(term, foo, bar)
local retval = strsubn(term, foo, bar)
return retval
end
local PUNTUACION = "%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀ = open vowel quality without stress in Portugal only
local MACRON = u(0x0304) -- macron = ̄ = closed vowel quality without stress in Portugal only
local CFLEX = u(0x0302) -- circumflex = ̂
local TILDE = u(0x0303) -- tilde = ̃
local DIA = u(0x0308) -- diaeresis = ̈
local CEDILLA = u(0x0327) -- cedilla = ̧
local DOTOVER = u(0x0307) -- dot over = ̇
-- DOTUNDER indicates an explicitly unstressed syllable; useful when accompanied by a quality marker (acute or
-- circumflex), or by itself with a/e/o, where it defaults to acute (except in the following circumstances, where it
-- defaults to circumflex: (1) in the diphthongs ei/eu/oi/ou; (2) in a nasal vowel).
local DOTUNDER = u(0x0323) -- dot under = ̣
-- LINEUNDER indicates an explicit secondary stress; normally not necessary as primary stress is converted to secondary
-- stress if another primary stress follows, but can be used e.g. after a primary stress; can be accompanied by a
-- quality marker (acute or circumflex) with a/e/o; if not, defaults to acute (except in the same circumstances where
-- dot under defaults to circumflex).
local LINEUNDER = u(0x0331) -- line under = ̱
-- Serves to temorarily mark where a syllable division should not happen; temporarily substitutes for comma+space;
-- temporarily substitutes for #.
local TEMP1 = u(0xFFF0)
local SYLDIV = u(0xFFF1) -- used to represent a user-specific syllable divider (.) so we won't change it
local PSEUDOCONS = u(0xFFF2) -- pseudo-consonant at the edge of prefixes ending in a vowel and suffixes beginning with a vowel
local PREFIX_MARKER = u(0xFFF3) -- marker indicating a prefix so we can convert primary to secondary accents
local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local stress = "ˈˌ"
local stress_c = ""
local sepsil = "-"
local separadores_silabicos = "%."..sepsil..SYLDIV..stress
local SEPARADORES_SILABICOS = ""
-- Since we convert all symbols at the beginning and decompose accented characters (except for ç and ü), we can later
-- use capital and/or accented letters to represent additional distinctions, typically in cases where we want to
-- remember the source of a given phoneme. By convention we use capital letters, optionally with accents.
-- Specifically:
-- * A/E/O represent written a/e/o where we don't yet know the vowel quality. Towards the beginning, we convert all
-- written a/e/o to A/E/O and later convert them to their final qualities (which might include /a/ /e/ /o/, so we
-- can't use those symbols directly for this purpose).
-- * Ẽ stands for a word-initial Brazilian sound that can be pronounced either /ẽ/ (in careful speech) or /ĩ/ (in
-- natural speech) and originates from en- or em- before a consonant. We distinguish this from written in-/im-,
-- which can be only /ĩ/, and written ehn-/ehm- (or similar), which can be only /ẽ/.
-- * I is used to represent epenthetic i in Brazilian variants (which should not affect stress assignment but is
-- otherwise treated as a normal sound), and Ɨ represents deleted epenthetic i (which still palatalizes /t/ and /d/).
-- I is also used to represent Brazil e or i from initial esC-, and Portugal (i) from initial esC-.
-- * Ì is used to represent either i. in hiatus or /j/ in Brazil; likewise for Ù representing u. in hiatus or /w/.
-- * Ɔ (capital version of ɔ) stands for a Portugal sound that can be pronounced either /o/ or /ɔ/ (depending on the
-- speaker), before syllable-final /l/.
-- * Ú is used word-finally after i to represent either .u in hiatus or /w/ in Brazil.
-- * L is used word-finally in Portugal to split words ending in -le into two pronuns due to the differing pronunciation
-- of /l/ in the two cases (coda or not).
local vowel = "aɐeɛiɨoɔuüAEẼIƗÌOƆÙÚ"
local V = ""
local NV_NOT_SPACING_CFLEX = ""
local high_front_vocalic = "iIƗÌy"
local front_vocalic = "eɛɨẼ" .. high_front_vocalic
local FRONTV = ""
local glide = "yw"
local W = "" -- glide
local ipa_stress = "ˈˌ"
local ipa_stress_c = ""
local primary_quality = AC .. CFLEX
local primary_quality_c = ""
local quality = AC .. CFLEX .. GR .. MACRON
local quality_c = ""
local stress = LINEUNDER .. DOTOVER .. DOTUNDER .. ipa_stress
local stress_c = ""
local non_primary_stress = LINEUNDER .. DOTUNDER .. "ˌ"
local non_primary_stress_c = ""
local accent = quality .. stress .. TILDE
local accent_c = ""
-- Any component separator that should be "transparent" (i.e. ignored) during syllabification processes. This should
-- include a subset of the component_sep characters, currently + and * (which ++ is converted into).
local syl_transp_component_sep = "+*"
local syl_transp_component_sep_c = ""
-- Any character that should be "transparent" (i.e. ignored) during syllabification processes. This includes the
-- syllable-transparent component separators + and ++ (converted into *) as well as the tie character, which originates
-- from an apostrophe (e.g. ]).
local syl_transp = syl_transp_component_sep .. "‿"
local syl_transp_c = ""
-- Zero or more syllable-transparent characters; used during syllabification.
local STC = syl_transp_c .. "*"
-- Component separators that are not transparent to syllabification. Includes colon (:), hyphen (-) and double hyphen
-- (--), which is converted internally to @.
local non_syl_transp_component_sep = ":@%-"
local non_syl_transp_component_sep_c = ""
-- "component_sep" means any symbol that may separate word components (not including #, which is added at a certain
-- point next to certain word components so that the adjacent characters are treated as if they are at word bounaries).
local component_sep = syl_transp_component_sep .. non_syl_transp_component_sep
local component_sep_c = ""
local word_or_component_sep_c = ""
-- Syllable divider (auto-inserted or user-specified).
local syldiv = "." .. SYLDIV
local syldiv_c = ""
-- "charsep" means any symbol that may separate the individual characters that make up a word, and which should be
-- ignored for e.g. consonant-consonant assimilation processes. This currently includes accents and syllable dividers.
local charsep = accent .. syldiv
local charsep_c = ""
-- Characters that may divide words, other than the tie (‿), which is transparent to syllabification.
local non_syl_transp_word_divider = " #"
-- All characters that may divide words.
local word_divider = non_syl_transp_word_divider .. "‿"
-- "wordsep_not_syl_transp" means the same as "wordsep" below but excludes syllable-transparent characters. It is used
-- in other collections of symbols (particularly when negated, so as to include syllable-transparent characters but
-- otherwise exclude word separators) rather than by itself.
local wordsep_not_syl_transp = charsep .. non_syl_transp_word_divider .. non_syl_transp_component_sep
-- "wordsep" means any symbol that may separate the individual characters that make up a word or may separate words or
-- components, and which should be ignored for e.g. consonant-consonant assimilation processes that operate across
-- words. This currently includes everything in "charsep" and "component_sep" plus symbols that may divide words.
local wordsep = wordsep_not_syl_transp .. syl_transp
local wordsep_c = ""
local C = "" -- consonant
-- consonant or syllable-transparent component separator
local C_OR_SYL_TRANSP = ""
local H_OR_SYL_TRANSP = ""
local H_GLIDE_OR_SYL_TRANSP = ""
local C_NOT_H_OR_GLIDE = "" -- consonant other than h, w or y
local C_OR_WORD_BOUNDARY = "" -- consonant or word boundary
local voiced_cons = "bdglʎmnɲŋrɾʁvzjʒʤ" -- voiced sound
-- Unstressed words with vowel reduction in Brazil and Portugal.
local unstressed_words = m_table.listToSet({
"o", "os", -- definite articles
"me", "te", "se", "lhe", "lhes", "nos", "vos", -- unstressed object pronouns
-- See https://en.wikipedia.orghttps://dictious.com/es/Personal_notaouns_in_Portuguese#Contractions_between_clitic_notaouns
"mo", "mos", "to", "tos", "lho", "lhos", -- object pronouns combined with articles
-- Allomorphs of articles after certain consonants
"lo", "los", "no", -- ] above as object pronoun
-- Allomorphs of object pronouns before other pronouns
"vo", -- ] above as allomorph of article
"que", -- subordinating conjunctions
"e", -- coordinating conjunctions
"de", "do", "dos", "por", -- basic prepositions + combinations with articles; ], ] above already
-- FIXME: Portugal pronun for pelos, pela, pelas given as if spelled pêlos, etc. with stress, but differently for pelo.
-- I am assuming the Portugal pronuns with stress are wrong.
"pelo", "pelos", "pela", "pelas" -- preposition + article combinations
})
-- Unstressed words with vowel reduction in Portugal only.
local unstressed_full_vowel_words_brazil = m_table.listToSet({
"a", "as", -- definite articles
-- See https://en.wikipedia.orghttps://dictious.com/es/Personal_notaouns_in_Portuguese#Contractions_between_clitic_notaouns
"ma", "mas", "ta", "tas", "lha", "lhas", -- object pronouns combined with articles
-- Allomorphs of articles after certain consonants
"la", "las", "na", "nas",
"da", "das", -- basic prepositions + combinations with articles; ], ] above already
-- coordinating conjunctions; ] above already
"para", "pra", -- preposition; for the verb form use "pára" instead
})
-- Unstressed words without vowel reduction.
local unstressed_full_vowel_words = m_table.listToSet({
"um", "uns", -- single-syllable indefinite articles
"meu", "teu", "seu", "meus", "teus", "seus", -- single-syllable possessives
"ou", -- coordinating conjunctions
-- Note that in order to match à and às we have to write them as below because at the point we are trying to
-- match them, all text has been converted to canonical decomposed Unicode form. Writing "à" and "às" directly
-- won't work even if you type in the text using decomposed Unicode characters because all page contents are
-- automatically converted to canonical composed form when saved.
"ao", "aos", "a" .. GR, "a" .. GR .. "s", -- basic prepositions + combinations with articles
"em", "com", -- other prepositions
})
-- Special-case pronunciations for certain unstressed words with irregular pronunciations. The left side is the
-- original spelling after DOTUNDER or DOTOVER has been added; which diacritic gets added depends on whether the word
-- has vowel reduction (DOTOVER) or no vowel reduction (DOTUNDER). The right side is the respelling. See comment just
-- above for why we write "a" .. GR instead of "à".
local unstressed_notaunciation_substitution = {
= "a" .. DOTUNDER .. "u",
= "a" .. DOTUNDER .. "us",
= "a" .. DOTUNDER,
= "a" .. DOTUNDER .. "s",
= "pu" .. DOTOVER .. "r",
}
-- Dialects and subdialects:
local br_styles = {
= true,
= true,
= true,
= true,
}
local pt_styles = {
= true,
= true,
= true,
= true,
}
local nombre_completo = {
= "brasilero",
= "carioca",
= "paulista",
= "gaúcho",
= "europeo",
= "extremeño",
= "alentejano/algarvio",
= "portuense/transmontano"
}
-- el alfabeto
local pron_abc = {{"a"},{"bê"},{"cê"},{"dê"},{"e"},{"efe"},{"gê"},{"agá"},{"i"},
{"jota"},{"cá"},{"ele"},{"eme"},{"ene"},{"o"},{"pê"},{"quê"},{"erre"},{"esse"},{"tê"},{"u"},
{"vê"},{"dáblio"},{"chis"},{"ípsilon"},{"zê"}}
-- Reorder the diacritics (accent marks) in `text` according to a canonical order. Specifically, there can conceivably
-- be up to three accents on a vowel: a quality mark (acute/circumflex/grave/macron); a mark indicating secondary stress
-- (lineunder), tertiary stress (dotunder; i.e. no stress but no vowel reduction) or forced vowel reduction (dotover);
-- and a nasalization mark (tilde). Order them as follows: quality - stress - nasalization. `err` is a function of one
-- argument (an error string) and should throw an error if called.
local function reorder_accents(text)
local function reorder_accent_string(accentstr)
local accents = strexplode(accentstr)
local accent_order = {
= 1,
= 1,
= 1,
= 1,
= 2,
= 2,
= 2,
= 3,
}
sort(accents, function(ac1, ac2)
return accent_order < accent_order
end)
return concat(accents)
end
text = strsub(text, "(" .. accent_c .. "+)", reorder_accent_string)
-- Remove duplicate accents.
text = strsubrep(text, "(" .. accent_c .. ")%1", "%1")
-- Make sure we don't have more than one of a given class.
if strfind(text, quality_c .. quality_c) then
error("Two different quality diacritics cannot occur together")
end
if strfind(text, stress_c .. stress_c) then
error("Two different stress diacritics cannot occur together")
end
-- Only a/e/o can receive a circumflex, grave or macron.
if strfind(text, "") then
error("Only a/e/o can be followed by circumflex, grave or macron")
end
return text
end
-- Generate partial IPA for a single preprocessed term respelling `text` in the specified `style` ('gbr', 'rio', etc.;
-- see all_style_descs above). If `phonetic` is given, generate phonetic output, otherwise phonemic output. `err` is a
-- function of one argument (an error string) and should throw an error if called. This function is a subfunction of
-- `IPA` and cannot really be used by itself, because it generates output containing special symbols that need to be
-- postprocessed into multiple outputs (and in addition some other final postprocessing needs to happen, e.g. to get
-- stress marks in the right place). The function `IPA` is available be called externally.
local function one_term_ipa(text, style, phonetic)
local brazil = br_styles
local portugal = pt_styles
-- Initial x -> /ʃ/: ], ], ], etc.
text = strsub(text, "(" .. word_or_component_sep_c .. ")x", "%1ʃ")
-- Final x -> /ks/ (], ], ], ], ], ], etc.), but for now we map to
-- X because later on we open unstressed vowels before final x.
text = strsub(text, "x(" .. word_or_component_sep_c .. ")", "X%1")
-- x after certain dipthongs (ai, ei, oi, ou) and after -en- should be /ʃ/. Other diphthongs before x are rare
-- and mostly learned and we need to force explicit respelling.
text = strsub(text, "(()" .. charsep_c .. "*()" .. charsep_c .. "*)x",
function(all, a, b)
local ab = a .. b
-- ], ], ], ], ], etc.
if ab == "ai" or ab == "ei" or ab == "oi" or ab == "ou" or ab == "en" then
return all .. "ʃ"
else
return all .. "x"
end
end)
-- -exC- should be pronounced like -esC- in Brazil but -eisC- in Portugal. Cf. excelente, experiência, têxtil,
-- êxtase. Not with other vowels (cf. ], ], ]).
-- FIXME: Maybe this applies only to Lisbon and environs?
text = strsub(text, "(e" .. accent_c .. "*)x(" .. C .. ")", function(v, c)
if brazil then
return v .. "s" .. c
elseif c == "s" then
return v .. "isç"
else
return v .. "is" .. c
end
end)
if strfind(text, "x") then
--error("x must be respelled z, sh, cs, ss or similar")
-- En lugar de tirar error, asumo la pronunciación más probable https://rioandlearn.com/x-in-portuguese/
-- 1. Tras au, es ss (auxiliar) --> esto lo necesito al principio
text = strsub(text, "()x", "%1ss")
-- 2. Después de un diptongo o de "en" es sh (enxada, faixa)
text = strsub(text, "(".."?"..")".."x", "%1sh")
text = strsub(text, "(n)x", "%1sh")
-- 3. Entre vocales es z (exilio, exodo)
text = strsub(text, "("..""..accent_c.."?"..")".."x".."()", "%1z%2")
-- 4. Antes de p o de t es s (experiencia, texto)
text = strsub(text, "x()", "s%1")
-- 5. Antes de c es sc (exceçao)
text = strsub(text, "xc", "sc")
-- 6. Los que quedan, asumo que mapean a ks (torax, ortodoxo, etc.)
text = strsub(text, "x", "ks")
end
-- combinations with h; needs to precede handling of c and s, and needs to precede syllabification so that
-- the consonant isn't divided from the following h.
if style == "npt" then
-- In Northern Portugal the affricate tch is kept
text = strsub(text, "ch", "tʃ")
end
-- Else
text = strsub(text, "()h", {="ʃ", ="ʃ", ="ɲ", ="ʎ" })
-- remove initial <h>
text = strsub(text, "(" .. word_or_component_sep_c .. ")h()", "%1%2")
-- Betacism
if style == "npt" then
-- In Northern Portugal, <v> is realized as <b>
text = strsub(text, "v", "b")
end
-- c, g, q
-- This should precede syllabification especially so that the latter isn't confused by gu, qu, gü, qü
-- also, c -> ç before front vowel ensures that cc e.g. in ], ] isn't reduced to single c.
text = strsub(text, "c(" .. FRONTV .. ")", "ç%1")
text = strsub(text, "g(" .. FRONTV .. ")", "j%1")
text = strsub(text, "gu(" .. FRONTV .. ")", "g%1")
-- ], ], ], ], ], ]
text = strsub(text, "ng()", brazil and "n%1" or "ngh%1")
text = strsub(text, "qu(" .. FRONTV .. ")", "k%1")
text = strsub(text, "ü", "u") -- ], ], ], ], etc.
text = strsub(text, "()u(" .. V .. ")", "%1w%2") -- ], ], etc.
text = strsub(text, "", "k") -- ], ], ], etc.
-- y -> i between non-vowels, cf. ] /i.ta.ma.ɾa.ˈt(ʃ)i/, ] respelled 'Sýdjney' or similar
-- /ˈsid͡ʒ.nej/ (Brazilian). Most words with y need respelling in any case, but this may help.
text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")y(" .. accent_c .. "*" .. C_OR_WORD_BOUNDARY .. ")", "%1i%2")
-- Reduce double letters to single, except for rr, mm, nn and ss, which map to special single sounds. Do this
-- before syllabification so double letters don't get divided across syllables. The case of cci, cce is handled
-- above. nn always maps to /n/ and mm to /m/ and can be used to force a coda /n/ or /m/. As a result,
-- ] will need respelling 'comnôsco', 'cõnôsco' or 'con.nôsco', and ] will similarly
-- need respelling e.g. as 'comum.mente' or 'comũmente'. Examples of words with double letters (Brazilian
-- pronunciation):
-- * ] no respelling needed /ˈa.kɾɐ/;
-- * ] respelled 'Aléppo' /aˈlɛ.pu/;
-- * ] respelled 'bâfferh' /ˈbɐ.feʁ/;
-- * ] respelled 'chéddarh' /ˈʃɛ.daʁ/;
-- * ] respelled 'Ranna' /ˈʁɐ̃.nɐ/;
-- * ] respelled 'djézz' /ˈd͡ʒɛs/;
-- * ] respelled 'Minnessôta' /mi.neˈso.tɐ/;
-- * ] respelled 'nutélla' /nuˈtɛ.lɐ/;
-- * ] respeled 'shópping' /ˈʃɔ.pĩ/ or 'shóppem' /ˈʃɔ.pẽj̃/;
-- * ] respelled 'Sto̱wnn.rrendj' /ˌstownˈʁẽd͡ʒ/;
-- * ] no respelling needed /juˈnɐ̃/.
--
-- Note that further processing of r and s happens after syllabification and stress assignment, because we need
-- e.g. to know the distinction between final -s and -z to assign the stress properly.
text = strsub(text, "rr", "ʁ")
text = strsub(text, "nn", "N")
text = strsub(text, "mm", "M")
-- Deleted epenthetic /i/ should prevent preceding /m/, /n/ from being converted into nasalization.
text = strsub(text, "mƗ", "MƗ")
text = strsub(text, "nƗ", "NƗ")
-- Will map later to /s/; need to special case to support spellings like 'nóss' (= nós, plural of nó).
text = strsub(text, "ss", "S")
text = strsub(text, "(" .. C .. ")%1", "%1")
-- muit- is special and contains nasalization. Do before palatalization of t/d so ] works.
text = strsub(text, "(" .. word_or_component_sep_c .. "mu" .. stress_c .. "*)(it)", "%1" .. TILDE .. "%2")
-- Palatalize t/d + Ɨ -> affricates in Brazil. Use special unitary symbols, which we later convert to regular affricate
-- symbols, so we can distinguish palatalized d from written dj. We only do Ɨ now so we can delete it; we do another
-- palatalization round towards the end after raising e -> i.
local palatalize_td = { = "ʧ", = "ʤ"}
if brazil then
text = strsub(text, "()(" .. word_or_component_sep_c .. "*Ɨ)",
function(td, high_vocalic) return palatalize_td .. high_vocalic end)
-- Now delete the symbol for deleted epenthetic /i/; it still triggers palatalization of t and d.
text = strsub(text, "Ɨ", "")
end
-- Divide words into syllables.
-- First, change user-specified . into a special character so we won't move it around. We need to keep this
-- going forward until after we place the stress, so we can correctly handle initial i- + vowel, as in ],
-- ] and ]. We need to divide ] as 'i.a' but ] as 'ia.te' and ] as 'Ia.un.dé'.
-- In the former case, the stress goes on i but in the latter cases not; so we always divide <ia> as 'i.a',
-- and then after stress assignment remove the syllable divider if the <i> isn't stressed. The tricky thing is
-- that we want to allow the user to override this by explicitly adding a . between the <i> and <a>. So we need
-- to keep the distinction between user-specified . and auto-determined . until after stress assignment.
text = strsub(text, "%.", SYLDIV)
-- We have various characters indicating divisions between word components where we want to treat the components
-- more or less like separate words (e.g. -mente, -zinho/-zinha). Some such "characters" are digraphs, which we
-- convert internally to single characters to simplify the code. Here, -- separates off -mente/-zinho/-zinha and
-- ++ separates off prefixes. We want to ignore at least + and ++ (converted to *) for syllabification purposes.
text = strsub(text, "%-%-", "@")
text = strsub(text, "%+%+", "*")
-- Respell ] as 'homemzinho' so it is stressed correctly.
text = strsub(text, "n(" .. SYLDIV .. "?ziɲos?" .. word_or_component_sep_c .. ")", "m%1")
-- Divide before the last consonant (possibly followed by a glide). We then move the syllable division marker
-- leftwards over clusters that can form onsets. Note that syllable-transparent component separators will always
-- be (and will continue to be) to the left of syllable dividers rather than to the right, so we don't need to
-- check for the latter situation.
text = strsubrep(text, "(" .. V .. accent_c .. "*" .. C_OR_SYL_TRANSP .. "-)(" .. C .. H_GLIDE_OR_SYL_TRANSP .. "*" .. V .. ")", "%1.%2")
text = strsub(text, "(" .. H_OR_SYL_TRANSP .. "*)%.()", ".%1%2")
if portugal then
-- "Improper" clusters of non-sibiliant-obstruent + obstruent (pt, bt, bd, dk, kt; ps, bs, bv, bʒ, tz, dv, ks;
-- ft), non-sibiliant-obstruent + nasal (pn, bn, tm, tn, dm, dn, gm, gn), nasal + nasal (mn) are syllabified in
-- Portugal as .pt, .bv, .mn, etc. Note ʃ.t, ʃ.p, ʃ.k, etc. But in Brazil, all of these divide between the
-- consonants (p.t, b.v, ʃ.t, s.p, etc.). Particular case: ] divides as a.brr in Portugal but ab.rr
-- in Brazil.
text = strsub(text, "(" .. H_OR_SYL_TRANSP .. "*)%.()", ".%1%2")
text = strsub(text, "(" .. H_OR_SYL_TRANSP .. "*)%.()", ".%1%2")
else
-- /tʃ/, /dʒ/ are normally single sounds, but adj- in ], ] etc. should be 'ad.j'
text = strsub(text, "(t" .. STC .. ")%.(ʃ)", ".%1%2")
text = strsub(text, "(d" .. STC .. ")%.(j)", ".%1%2")
text = strsub(text, "(" .. word_or_component_sep_c .. "a" .. STC .. ")%.(d" .. STC .. ")(j)", "%1%2.%3")
end
-- All vowels should be separated from adjacent vowels by a syllable division except
-- (1) aeo + unstressed i/u, (], ], ], ], ], ]), except when
-- followed by nh or m/n/r/l + (non-vowel or word end), e.g. Bom.ba.im, ra.i.nha, Co.im.bra, sa.ir, but Jai.me,
-- a.mai.nar, bai.le, ai.ro.so, quei.mar, bei.ra;
-- (2) iu(s), ui(s) at end of word, e.g. fui, Rui, a.zuis, pa.riu, viu, sa.iu;
-- (3) ão, ãe, õe.
--
-- The easiest way to handle this is to put a special symbol between vowels that should not have a syllable
-- division between them.
--
-- First, put a syllable divider between ., as in ], ], ], ],
-- ], ]. Note that in cases like ], ], ], ], ], ],
-- where a vowel follows the m/n/l/r, there will already be a syllable division between i.m, u.n, etc., which will
-- block the following substitution.
text = strsub(text, "(" .. accent_c .. "*" .. STC .. ")(" .. STC .. ")", "%1.%2")
-- Also put a syllable divider between ..ɲ coming from 'nh' (], ]).
text = strsub(text, "(" .. accent_c .. "*" .. STC .. ")(" .. STC .. "%.ɲ)", "%1.%2")
-- Prevent syllable division between final -ui(s), -iu(s). This should precede the following rule that prevents
-- syllable division between ai etc., so that ] "he left" gets divided as sa.iu.
-- It doesn't make sense to have STC in the middle of a diphthong here.
text = strsub(text, "(u" .. accent_c .. "*)(is?" .. word_or_component_sep_c .. ")", "%1" .. TEMP1 .. "%2")
text = strsub(text, "(i" .. accent_c .. "*)(us?" .. word_or_component_sep_c .. ")", "%1" .. TEMP1 .. "%2")
-- Prevent syllable division between ai, ou, etc. unless either the second vowel is accented ]) or there's
-- a TEMP1 marker already after the second vowel (which will occur e.g. in ] divided as 'sa.iu').
text = strsubrep(text, "(" .. accent_c .. "*)()", "%1" .. TEMP1 .. "%2")
-- Prevent syllable division between nasal diphthongs unless somehow the second vowel is accented.
text = strsubrep(text, "(a" .. accent_c .. "*" .. TILDE .. ")()", "%1" .. TEMP1 .. "%2")
text = strsubrep(text, "(o" .. accent_c .. "*" .. TILDE .. ")(e)", "%1" .. TEMP1 .. "%2")
text = strsubrep(text, "(u" .. accent_c .. "*" .. TILDE .. ")(i)", "%1" .. TEMP1 .. "%2")
-- All other sequences of vowels get divided.
text = strsubrep(text, "(" .. V .. accent_c .. "*" .. STC .. ")(" .. V .. ")", "%1.%2")
-- Remove the marker preventing syllable division.
text = strsub(text, TEMP1, "")
-- An acute or circumflex not followed by a stress marker has primary stress, so indicate it.
text = strsubrep(text, "(" .. V .. quality_c .. ")()", "%1ˈ%2")
-- Line-under indicates secondary stress.
text = strsub(text, LINEUNDER, "ˌ")
-- Add primary stress to the word if not already present.
local function accent_word(word)
-- Check if stress already marked. We check first for primary stress before checking for tilde in case both
-- primary stress and tilde occur, e.g. ], ], ], ] respelled 'cõnôsco'.
if strfind(word, "ˈ") then
return word
end
-- Preserve the syllable divider, which may be auto-added or user-specified.
local syllables = strsplit(word, "()")
-- Check for nasal vowel marked with tilde and without non-primary stress; assign stress to the last such
-- syllable in case there's more than one tilde, e.g. ]. Note, this can happen in the part before
-- -mente, cf. ], and before -zinho, cf. ].
for i = #syllables, 1, -2 do -- -2 because of the syllable dividers; see above.
local changed
syllables, changed = strsubb(syllables, "(" .. V .. quality_c .. "*)" .. TILDE, "%1ˈ" .. TILDE)
if changed then
return concat(syllables)
end
end
-- Apply the default stress rule.
local sylno
-- Prefixes ending in a vowel such as pseudo- have a PSEUDOCONS after the final vowel, but we don't want that to
-- interfere in the stress-assignment algorithm.
if #syllables > 1 and (strfind(word, "?$") or strfind(word, "m$") or strfind(word, "ns$")) then
-- Stress the last syllable but one. The -2 is because of the syllable dividers; see above.
sylno = #syllables - 2
else
sylno = #syllables
end
-- Don't put stress on epenthetic i; instead, we stress the preceding syllable, as if epenthetic i weren't
-- there.
while sylno > 1 and strfind(syllables, "I") do
sylno = sylno - 2
end
-- It is (vaguely) possible that we have a one-syllable word beginning with a complex cluster such as gn-
-- followed by a normally unstressed ending such as -em. In this case, we want the ending to be stressed.
while sylno < #syllables and strfind(syllables, "I") do
sylno = sylno + 2
end
-- If we are on a syllable without a vowel (can happen if it's the last syllable in a non-final component of a
-- word, when using a component separator that is transparent to stress, such as in ] respelled
-- 'rapaz+inho'), stress the syllable to the left.
while sylno > 1 and not strfind(syllables, V) do
sylno = sylno - 2
end
if strfind(syllables, stress_c) then
-- Don't do anything if stress mark already present. (Since we check for primary stress above, this check
-- specifically affects non-primary stress.)
return word
end
-- Add stress mark after first vowel (and any quality mark).
syllables = strsub(syllables, "^(.-" .. V .. quality_c .. "*)", "%1ˈ")
return concat(syllables)
end
-- Split the text into words and the words into components so we can correctly add stress to components without it.
local words = strsplit(text, " ")
for j, word_with_boundary_markers in ipairs(words) do
-- Prefixes have a PREFIX_MARKER after the # at the end of the prefix; split it off.
local begin_marker, word, end_marker = strmatch(word_with_boundary_markers, "^(#*)(.-)(*)$")
-- Words ends in -mente, -zinho(s) or -zinha(s); add primary stress to the preceding portion as if stressed
-- (e.g. ] -> 'agitádamente') unless already stressed (e.g. ] respelled
-- 'rápidamente'). The primary stress will be converted to secondary stress further below. Essentially, we
-- rip the word apart into two words ('mente'/'zinho' and the preceding portion) and
-- stress each one independently. Note that the effect of adding a primary stress will also be to cause
-- an error if stressed 'e' or 'o' is not properly marked as é/ê or ó/ô; cf. ], which must
-- be respelled 'cértamente', and ], which must be respelled 'posteriôrmente', just as
-- with ] and ]. To prevent this happening, you can add an accent to -mente or
-- -zinho, e.g. ] respelled 'dormênte', ] respelled 'vizínho'.
if strfind(word, syldiv_c .. "men%.te$") then
word = strsub(word, syldiv_c .. "(men%.te)$", "@%1")
else
word = strsub(word, syldiv_c .. "(zi%.ɲs?)$", "@%1")
end
-- Split on components; preserve the component divider.
local components = strsplit(word, "(" .. component_sep_c .. syldiv_c .. "*)")
for k = 1, #components, 2 do -- 2 because of the component dividers.
-- Don't add stress to components followed by ++ (converted to *).
if k == #components or not strfind(components, "%*") then
components = accent_word(components)
end
end
-- Reconstruct the word.
words = begin_marker .. concat(components, "") .. end_marker
end
-- Reconstruct the text from the words.
text = concat(words, " ")
-- Add word boundaries around component separators. We add them on both sides of - and -- (converted to @), which
-- behave mostly like a true word separator, but only on the right side of other component separators (which
-- corresponds to the beginning of the word following the separator). Note that some component separators (+ and ++
-- ) are transparent to syllable boundaries, meaning that there may be a syllable divider directly
-- to the right of the component separator. To simplify the code below, we put the word boundary marker on the outside
-- of the syllable boundary marker.
text = strsub(text, "(" .. syldiv_c .. "?)", "#%1#")
text = strsub(text, "(" .. syldiv_c .. "?)", "%1#")
-- I has served its purpose (not considered when accenting).
text = strsub(text, "I", "i")
-- Remove hiatus between initial <i> and following vowel (]) unless the <i> is stressed (]) or the
-- user explicitly added a . (converted to SYLDIV above).
text = strsub(text, "#i%.(" .. V .. ")", "#y%1")
if brazil then
-- In Brazil, hiatuses involving i. or u. have two possibilities (full vowel or glide); represent using Ì. and Ù.,
-- which we later convert appropriately. Do this before eliminating SYLDIV so the user can force a hiatus using a
-- period.
local hiatus_to_optional_glide = { = "Ì", = "Ù"}
text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")()(%." .. V .. ")",
function(before, hiatus, after) return before .. hiatus_to_optional_glide .. after end)
-- In Brazil, hiatuses of the form í.o (e.g. ] "river", ]; but not ] "I laugh") have two
-- possibilities (i.u or iw); represent using Ú, which we later convert appropriately. Do this before eliminating
-- SYLDIV so the user can force a hiatus using a period, as in ] "I laugh" respelled 'ri.o'.
text = strsub(text, "(i" .. ipa_stress_c .. "%.)o(s?#)", "%1Ú%2")
else
-- Outside of Brazil, e.i -> a.i, e.g. ], ], ] respelled 'prote.inúrio'. But seems
-- not to happen in rei- (], ], ], etc.). Note, it does occur in ],
-- which needs respelling.
text = strsub(text, "(#re" .. syldiv_c .. ")(i)", "%1" .. TEMP1 .. "%2")
text = strsub(text, "e(" .. syldiv_c .. "i)", "a%1")
text = strsub(text, TEMP1, "")
-- Outside of Brazil, hiatuses involving 'e./i.' or 'o./u.' after obstruent + l/r preceding a vowel have two
-- possibilities (full vowel or glide), as in ], ], etc. Represent using Ì. and Ù., which
-- we later convert appropriately. Do this before eliminating SYLDIV so the user can force a hiatus using a
-- period.
local hiatus_to_optional_glide = { = "Ì", = "Ì", = "Ù", = "Ù"}
text = strsub(text, "(" .. H_OR_SYL_TRANSP .. "*)()(%." .. V .. ")",
function(before, hiatus, after) return before .. hiatus_to_optional_glide .. after end)
-- Outside of Brazil, remove hiatus more generally whenever 'e./i.' or 'o./u.' precedes a vowel. Do this before
-- eliminating SYLDIV so the user can force hiatus using a period.
local hiatus_to_glide = { = "y", = "y", = "w", = "w"}
text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")(%.)(" .. V .. ")",
function(before, hiatus, after) return before .. hiatus_to_glide .. after end)
end
-- Convert user-specified syllable division back to period. See comment above when we add SYLDIV.
text = strsub(text, SYLDIV, ".")
-- Vowel quality handling. First convert all a -> A, e -> E, o -> O. We will then convert A -> a/ɐ, E -> e/ɛ/ɨ,
-- O -> o/ɔ/u depending on accent marks and context. Ultimately all vowels will be one of the nine qualities
-- aɐeɛiɨoɔu and following each vowel will either be nothing (no stress), an IPA primary stress mark (ˈ) or an
-- IPA secondary stress mark (ˌ), in turn possibly followed by a tilde (nasalization). After doing everything
-- that depends on the position of stress, we will move the IPA stress marks to the beginning of the syllable.
text = strsub(text, "", { = "A", = "E", = "O"})
text = strsub(text, DOTOVER, "") -- eliminate DOTOVER; it served its purpose of preventing stress
-- Nasal vowel handling.
-- Final unstressed -am (in third-person plural verbs) pronounced like unstressed -ão.
text = strsub(text, "Am#", "A" .. TILDE .. "O#")
if portugal then
-- In Portugal, final -n is really /n/, and preceding unstressed e/o are open (], ], ];
-- ], ], ]).
text = strsub(text, "n#", "N#")
text = strsub(text, "()(N#)", "%1" .. AC .. "%2")
end
if brazil then
-- In Brazil, ] is pronounced like 'põe'.
text = strsub(text, "(Oˈ" .. TILDE .. ")(Em#)", "%1E#")
else
-- In Portugal, circumflex accent on final -em (], ], etc.) indicates a special double nasal diphthong
-- pronunciation.
text = strsub(text, "E" .. CFLEX .. "ˈm#", "E" .. CFLEX .. "ˈ" .. TILDE .. "y" .. TILDE .. ".E" .. CFLEX .. "m#")
-- In Portugal, ] is pronounced like 'põeem'.
text = strsub(text, "(Oˈ" .. TILDE .. ")(Em#)", "%1E.%2")
end
-- Acute accent on final -em (], ]) and final -ens (]) does not indicate an open
-- pronunciation.
text = strsub(text, "E" .. AC .. "(ˈs?#)", "E" .. CFLEX .. "%1")
-- Vowel + m/n within a syllable gets converted to tilde.
text = strsub(text, "(" .. V .. quality_c .. "*" .. stress_c .. "*)", "%1" .. TILDE)
-- Non-high vowel without quality mark + tilde needs to get the circumflex (possibly fed by the previous change).
text = strsub(text, "()(" .. stress_c .. "*)" .. TILDE, "%1" .. CFLEX .. "%2" .. TILDE)
-- Primary-stressed vowel without quality mark + m/n/nh across syllable boundary gets a circumflex, cf. ],
-- ], ] (excluding Northern Portugal)
if style == "npt" then
-- Northern Portugal keeps the open "a" vowels
text = strsub(text, "(A)(ˈ%.)", "%1" .. AC .. "%2")
end
text = strsub(text, "(" .. V .. ")(ˈ%.)", "%1" .. CFLEX .. "%2")
if brazil then
if style ~= "sbr" then -- Seems this happens less or not at all in South Brazil.
-- Primary-stressed vowel + m/n across syllable boundary gets nasalized in Brazil, cf. ], ].
text = strsub(text, "(" .. V .. quality_c .. "*)(ˈ%.)", "%1" .. TILDE .. "%2")
end
-- All vowels before nh (always across syllable boundary) get circumflexed and nasalized in Brazil,
-- cf. ]. I *think* the circumflexing but not nasalizing happens in South Brazil.
text = strsub(text, "(" .. V .. stress_c .. "*)(%.ɲ)", "%1" .. CFLEX .. "%2")
if style ~= "sbr" then -- I *think* this doesn't apply to South Brazil; need to verify.
text = strsub(text, "(" .. V .. quality_c .. "*" .. stress_c .. "*)(%.ɲ)", "%1" .. TILDE .. "%2")
end
-- Convert initial unstressed em-/en- before consonant to special symbol /Ẽ/, which later on is converted
-- to /e/ (careful pronunciation) or /i/ (natural pronunciation).
text = strsub(text, "(#E" .. CFLEX .. TILDE ..")(%." .. C ..")", "#Ẽ" .. TILDE .. "%2")
-- Same in ] standing alone (which will have a DOTUNDER in it), and in ].
text = strsub(text, "(#E" .. CFLEX .. DOTUNDER .. "?" .. TILDE ..")(#)", "#Ẽ" .. TILDE .. "%2")
end
-- Nasal diphthongs.
local nasal_termination_to_glide = { = "y", = "w"}
-- In ãe, ão, the second letter represents a glide.
text = strsub(text, "(A" .. CFLEX .. stress_c .. "*" .. TILDE .. ")()",
function(v1, v2) return v1 .. nasal_termination_to_glide .. TILDE end)
-- Likewise for õe.
text = strsub(text, "(O" .. CFLEX .. stress_c .. "*" .. TILDE .. ")E", "%1y" .. TILDE)
-- Likewise for ũi (generated above from muit-).
text = strsub(text, "(u" .. stress_c .. "*" .. TILDE .. ")i", "%1y" .. TILDE)
-- Final -em and -ens (stressed or not) pronounced /ẽj̃(s)/. (Later converted to /ɐ̃j̃(s)/ in Portugal.)
text = strsub(text, "(E" .. CFLEX .. stress_c .. "*" .. TILDE .. ")(s?#)", "%1y" .. TILDE .. "%2")
-- Oral diphthongs.
-- ei, eu, oi, ou -> êi, êu, ôi, ôu
text = strsub(text, "()(" .. stress_c .. "*)", "%1" .. CFLEX .. "%2")
-- ai, au -> ái, áu
text = strsub(text, "(A)(" .. stress_c .. "*)", "%1" .. AC .. "%2")
-- Convert A/E/O as appropriate when followed by a secondary or tertiary stress marker. If a quality is given,
-- it takes precedence; otherwise, act as if an acute accent were given.
text = strsub(text, "()(" .. non_primary_stress_c .. ")", "%1" .. AC .. "%2")
-- Stressed o in -dor, -dor, -sor (], ], ], ], etc.) and feminines and plurals
-- is closed /o/.
text = strsub(text, "()O(ˈr#)", "%1o%2")
text = strsub(text, "()O(ˈ%.rs?#)", "%1o%2")
-- Stressed o in -oso is closed /o/.
text = strsub(text, "O(ˈ%.sO#)", "o%1")
-- Stressed o in -osa, -osos, -osas is open /ɔ/.
text = strsub(text, "O(ˈ%.ss?#)", "ɔ%1")
-- Unstressed syllables.
-- Before final <x>, unstressed a/e/o are open, e.g. ], ], ].
text = strsub(text, "()(X)", "%1" .. AC .. "%2")
-- Capital X has served its purpose, so replace it.
text = strsub(text, "X", "kç")
if brazil then
if style ~= "sbr" then
-- Final unstressed -e(s), -o(s) -> /i/ /u/ (including before -mente)
local brazil_final_vowel = { = "i", = "u"}
text = strsub(text, "()(s?#)", function(v, after) return brazil_final_vowel .. after end)
-- Word-final unstressed -a(s) -> /ɐ/ (not before -mente)
text = strsub(text, "A(s?#)", function(after) return "ɐ" .. after end)
-- Word-final unstressed -ar -> /ɐr/ (e.g. ])
text = strsub(text, "A(r#)", function(after) return "ɐ" .. after end)
end
-- Initial unmarked unstressed non-nasal e- + -sC- -> /i/ or /e/ (], ]). To defeat this,
-- explicitly mark the <e> e.g. as <ệ> or <eh>. We reuse the special symbol /I/ for this purpose, which later
-- on is converted to /i/ or /e/. In South Brazil, however, the raised /i/ variant doesn't seem to exist.
if not strfind(text, "#Es.ç") then
text = strsub(text, "#E(s" .. C .. "*%.)", style == "sbr" and "#e%1" or "#I%1")
end
-- Remaining unstressed a, e, o without quality mark -> /a/ /e/ /o/.
local brazil_unstressed_vowel = { = "a", = "e", = "o"}
text = strsub(text, "()()",
function(v, after) return brazil_unstressed_vowel .. after end)
end
if portugal then
-- In Portugal, final unstressed -r opens preceding a/e/o (], ], ], ]
-- respelled 'ínter:...').
text = strsub(text, "()(r" .. word_or_component_sep_c .. ")", "%1" .. AC .. "%2")
-- In Portugal, unstressed a/e/o before coda l takes on an open quality. Note that any /l/ directly after a
-- vowel must be a coda /l/ because otherwise there would be a syllable boundary marker.
text = strsub(text, "()l", function(v)
-- The symbol Ɔ is later converted to /o/ or /ɔ/.
local vowel_to_before_l = { = "a", = "ɛ", = "Ɔ"}
return vowel_to_before_l .. "l"
end)
-- Unstressed 'ie' -> /jɛ/
text = strsub(text, "yE()", "yɛ%1")
-- Initial unmarked unstressed non-nasal e- + -sC- (seemingly also after des-, see Infopédia ]) ->
-- temporary symbol I (later changed to /(i)/, except after a vowel, in which case it is deleted). Note that /s/
-- directly after a vowel must be a coda /s/ because otherwise there would be a syllable boundary marker.
text = strsub(text, "#Es", "#Is")
-- Initial unmarked unstressed non-nasal e- -> /i/, including after des-.
text = strsub(text, "#E()", "#i%1")
-- Initial unmarked unstressed non-nasal o- -> /ɔ/ if another vowel follows (not 'o', 'os' by themselves).
text = strsub(text, "(#O)(.-#)", function(o, rest)
if strfind(rest, "^") and strfind(rest, V) then
return "#ɔ" .. rest
else
return o .. rest
end
end)
-- All other unmarked unstressed non-nasal e, o, a -> /ɨ/ /u/ /ɐ/
local portugal_unstressed_vowel = { = "ɐ", = "ɨ", = "u"}
text = strsub(text, "()()",
function(v, after) return portugal_unstressed_vowel .. after end)
end
-- Remaining vowels.
-- All remaining a -> /a/ (should always be stressed).
text = strsub(text, "A()", "a%1")
-- Ignore quality markers on i, u; only one quality.
text = strsub(text, "()" .. quality_c, "%1")
-- Convert a/e/o + quality marker appropriately.
local vowel_quality = {
= "a", = "ɐ",
= "ɛ", = "e",
= "ɔ", = "o",
}
text = strsub(text, "(" .. quality_c .. ")", vowel_quality)
-- Stressed o in hiatus (], ], ], etc.) is closed /o/.
text = strsub(text, "O(ˈ%." .. V .. ")", "o%1")
-- Stressed closed /o/ in Northern Portugal in hiatus has a following optional /w/.
if style=="npt" then
text = strsub(text, "(oˈ%.)(" .. V .. ")", "%1(w)%2")
end
-- En lugar de tirar error (lo que debería hacer), asumo que son graves ´ (lo más probable)
if strfind(text, "") then
--error("Stressed e or o not occurring nasalized or in a diphthong must be marked for quality using é/ê or ó/ô")
text = strsub(text, "()", "%1" .. AC)
text = strsub(text, "(" .. quality_c .. ")", vowel_quality)
end
-- Finally, eliminate DOTUNDER, now that we have done all vowel reductions.
text = strsub(text, DOTUNDER, "")
if brazil then
-- Epenthesize /(j)/ in ], ], ], ], ], ], ], etc. Note, this only
-- triggers at actual word boundaries (not before -mente), and not on nasal vowels or diphthongs. To defeat this
-- (e.g. in plurals), respell using 'ss' or 'hs'.
text = strsub(text, "(" .. V .. "ˈ)(#)", "%1Y%2")
-- Also should happen at least before + (cf. ] respelled 'rapaz+inho', ] respelled
-- 'vóz+inha').
text = strsub(text, "(" .. V .. "ˈ)(%.?%+)", "%1Y%2")
-- But should not happen after /i/.
text = strsub(text, "iˈY", "iˈ")
end
-- 'S' here represents earlier ss. Word-finally it is used to prevent epenthesis of (j) and should behave
-- like 's'. Elsewhere (between vowels) it should behave like 'ç'.
text = strsub(text, "S#", "s#")
text = strsub(text, "S", "ç")
-- s, z
-- s in trans + V -> z: ], ]
text = strsub(text, "(trɐ" .. stress_c .. "*" .. TILDE .. ".)s(" .. V .. ")", "%1z%2")
-- word final z -> s
text = strsub(text, "z#", "s#")
-- s is voiced between vowels (not nasalized) or between vowel and voiced consonant, including across word
-- boundaries; may be fed by previous rule. We have to split this into two rules before /s/ should not be voiced
-- between nasal vowel and another vowel (]) but should be voiced between nasal vowel and a voiced
-- consonant (]). Note that almost all occurrences of nasal vowel + s + voiced consonant are in
-- trans- which potentially could be handled above, but there may be others, e.g. ].
text = strsub(text, "(" .. V .. stress_c .. "*Y?%.?)s(" .. wordsep_c .. "*h?)", "%1z%2")
text = strsub(text, "(" .. V .. accent_c .. "*Y?%.?)s(" .. wordsep_c .. "*h?)", "%1z%2")
-- z before voiceless consonant, e.g. ]; c and q already removed
text = strsub(text, "z(" .. wordsep_c .. "*)", "s%1")
if portugal or style == "rio" then
-- In Portugal and Rio de Janeiro; s/z before consonant (including across word boundaries) or end of utterance -> ʃ/ʒ;
-- but not word-initially (e.g. ]).
local shibilant = { = "ʃ", = "j"}
text = strsub(text, "()(##)", function(sz, after) return shibilant .. after end)
-- s/z are maintained word-initially but not following : or similar component boundary (] respelled
-- 'antrópò:scopia'). To implement this, insert TEMP1 directly before the s/z we want to preserve, then check for this
-- TEMP1 not being present when converting to shibiliant, then remove TEMP1.
text = strsub(text, "(#)()", "%1" .. TEMP1 .. "%2")
text = strsubrep(text, "()()(" .. wordsep_c .. "*" .. C_NOT_H_OR_GLIDE .. ")",
function(before, sz, after) return before .. shibilant .. after end)
text = strsub(text, TEMP1, "")
end
text = strsub(text, "ç", "s")
text = strsub(text, "j", "ʒ")
-- Reduce identical sibilants/shibilants, including across word boundaries.
text = strsub(text, "()(" .. wordsep_c .. "*)(%1)", "%2%1")
if style == "rio" then
-- Also reduce shibilant + sibilant (], ] ]); not in Portugal, but in Portugal we later
-- generate two outputs in this case, either /ʃs/ and /ʒz/ (careful pronunciation) or /ʃ/ and /ʒ/ (natural
-- pronunciation). Note that the reduction of /ʃs/ to /ʃ/ in Portugal is different from the reduction of the
-- same to /s/ in Brazil.
text = strsub(text, "ʃ(" .. wordsep_c .. "*s)", "%1")
text = strsub(text, "ʒ(" .. wordsep_c .. "*z)", "%1")
end
-- N/M from double n/m
text = strsub(text, "", { = "n", = "m"})
if portugal then
--Drop unneeded 'h' in 'rh' respellings for European Portuguese
text = strsub(text, "rh","r")
if style=="spt" then
--Paragoge of i in word-final r/l in Southern Portugal
text = strsub(text, "ˈr#", "ˈ.ri#")
text = strsub(text, "ˈl#", "ˈ.li#")
end
end
-- r
-- Double rr -> ʁ already handled above.
-- Initial r or l/n/s/z + r -> strong r (ʁ).
text = strsub(text, "(%.?)r", "%1ʁ")
if brazil then
-- Word-final r before vowel in verbs is /(ɾ)/.
text = strsub(text, "(ˈ)r(#" .. wordsep_c .. "*h?" .. V .. ")", "%1(ɾ)%2")
-- Coda r before vowel is /ɾ/.
text = strsub(text, "r(" .. wordsep_c .. "*h?" .. V .. ")", "ɾ%1")
end
-- Word-final r in Brazil in verbs (not ]) is usually dropped. Use a spelling like 'marh' for ]
-- to prevent this. Make sure not to do this before -mente/-zinho (], ]).
if brazil then
text = strsub(text, "(ˈ)r(#)",
"%1(" .. (style == "sp" and "ɾ" or style == "sbr" and "ɻ" or "ʁ") .. ")%2")
if style ~= "sp" then
-- Coda r in Southern Brazil is , otherwise outside of São Paulo is /ʁ/.
text = strsub(text, "r(" .. C .. "*)", (style == "sbr" and "ɻ" or "ʁ") .. "%1")
end
end
-- All other r -> /ɾ/.
text = strsub(text, "r", "ɾ")
if brazil and phonetic then
-- "Strong" ʁ before voiced consonant is in much of Brazil, in Rio. Use R as a temporary symbol.
text = strsub(text, "ʁ(" .. wordsep_c .. "*)", style == "rio" and "R%1" or "ɦ%1")
-- Other "strong" ʁ is in much of Brazil, in Rio. Use H because later we remove all <h>.
text = strsub(text, "ʁ", style == "rio" and "χ" or "H")
text = strsub(text, "R", "ʁ")
end
-- Nasal Diphthong <ẽi> and <ɛi>
if portugal then
-- In Portugal, always lower e -> ɐ before j when nasalized.
text = strsub(text, "e(" .. accent_c .. TILDE .. "*%.?y)", "ɐ%1")
-- In Portugal, lower ɛ -> e before i in <ɛi> (the open pronunciation is dated)
text = strsub(text, "ɛ(" .. accent_c .. "*i)", "e%1")
end
-- Diphthong <ei> (if unchanged, it shall yield /ej/; this should be the case for African pronunciations)
if brazil then
-- In Brazil, add optional /j/ in <eir>, <eij>, <eig> and <eix> (as in ], ], ] and
-- ]).
text = strsub(text, "(e" .. accent_c .. "*)i(%.)", "%1(j)%2")
-- ] and ]).] -- This was added by an IP, see
-- ]; this seems non-standard to me. If we are to include it, it should
-- not be done this way, but as two separate outputs with the one lacking the /j/ marked with a qualifier such
-- as "non-standard"; compare the way the initial enC- is handled (near the end of export.IPA()), where there
-- are two outputs, with /ẽC-/ marked as "careful pronunciation" and /ĩC-/ marked as "natural pronunciation".
-- (Benwing2)
-- text = strsub(text, "(a" .. accent_c .. "*)i(%.ʃ)", "%1(j)%2")
elseif style == "spt" then
-- In Southern Portugal, <ei> monophthongizes to <e>
text = strsub(text, "(e" .. accent_c .. "*)i", "%1")
elseif style == "gpt" then
-- In general Portugal, lower e -> ɐ before i in <ei>.
text = strsub(text, "e(" .. accent_c .. "*i)", "ɐ%1")
-- In general Portugal, lower e -> ɐ before j
text = strsub(text, "e(" .. accent_c .. "*%.?y)", "ɐ%1")
-- In general Portugal, lower e -> ɐ(j) before other palatals.
text = strsub(text, "e(" .. stress_c .. "*)(%.?(" .. V .. "))", "ɐ%1(j)%2")
end
-- Diphthong <ou> (if unchanged, it shall yield /o(w)/)
if style == "spt" then
-- In Southern Portugal, <ou> always monophthongizes to <o>
text = strsub(text, "(o" .. accent_c .. "*)u", "%1")
elseif style == "npt" then
-- In Northern Portugal (and Galicia), <ou> is kept
text = strsub(text, "(o" .. accent_c .. "*)u", "%1w")
end
-- Else optional /w/ in <ou>
text = strsub(text, "(o" .. accent_c .. "*)u", "%1(w)")
-- Stop consonants.
if brazil then
-- Palatalize t/d + i/y -> affricates in Brazil.
text = strsub(text, "()(" .. word_or_component_sep_c .. "*)",
function(td, high_vocalic) return palatalize_td .. high_vocalic end)
elseif phonetic then
-- Fricativize voiced stops in Portugal when not utterance-initial or after a nasal; also not in /ld/.
-- Easiest way to do this is to convert all voiced stops to fricative and then back to stop in the
-- appropriate contexts.
local fricativize_stop = { = "β", = "ð", = "ɣ" }
local occlude_fricative = { = "b", = "d", = "g" }
text = strsub(text, "", fricativize_stop)
text = strsub(text, "##()", function(bdg) return "##" .. occlude_fricative end)
text = strsub(text, "(" .. TILDE .. wordsep_c .. "*)()", function(before, bdg) return before .. occlude_fricative end)
text = strsub(text, "(l" .. wordsep_c .. "*)ð", "%1d")
end
-- Glides and l. ou -> o(w) must precede coda l -> w in Brazil, because <ol> /ow/ cannot be reduced to /o/.
-- ou -> o(w) before conversion of remaining diphthongs to vowel-glide combinations so <ow> can be used to
-- indicate a non-reducible glide.
-- Handle coda /l/.
if brazil then
-- Coda l -> /w/ in Brazil.
text = strsub(text, "l(" .. C .. "*)", "w%1")
elseif phonetic then
-- Coda l -> in Portugal (and Rio Grande do Sul, according to Cunha-Cintra)
text = strsub(text, "l(" .. C .. "*)", "ɫ%1")
end
text = strsub(text, "y", "j")
if brazil then
text = strsub(text, "Y", "(j)") -- epenthesized in ], ], etc.
else
-- 'I' in Portugal represents word-initial (i) before sC, except after /i/ (e.g. ]), in which
-- case it is elided. In the latter case, we need to elide the word/component separators, otherwise we end up
-- with an extra syllable divider: /ˌɐ̃.ti.ʃˈta.ti.ku/ instead of correct /ˌɐ̃.tiʃˈta.ti.ku/.
text = strsub(text, "(i" .. accent_c .. "*)" .. word_or_component_sep_c .. "*#I", "%1")
text = strsub(text, "I", "(i)")
end
local vowel_termination_to_glide = brazil and phonetic and
{ = "ɪ̯", = "ɪ̯", = "ʊ̯", = "ʊ̯"} or
{ = "j", = "j", = "w", = "w"}
-- i/u as second part of diphthong becomes glide.
text = strsub(text, "(" .. V .. accent_c .. "*" .. "%(?)()",
function(v1, v2) return v1 .. vowel_termination_to_glide end)
-- nh
if brazil and phonetic and style ~= "sbr" then
-- ] pronounced ; nasalization of previous vowel handled above. But initial nh- e.g. ],
-- ], ] is . I *think* this doesn't happen in South Brazil. We do have the phonetic
-- representation given for ], but this is the only such case and may be a mistake.
text = strsub(text, "()ɲ", "%1j" .. TILDE)
end
if portugal then
-- Suppress final -ɨ before a vowel
text = strsub(text, "ɨ##(" .. V .. ")", "‿%1")
--Make optional utterance-finally
-- text = strsub(text, "ɨ##", "(ɨ)##")
-- (ɨ) after l when suppressed should convert to coda ɫ, so split it later into two pronuns.
-- text = strsub(text, "l%(ɨ%)##", "L##")
-- (ɨ) after r when suppressed should also convert to coda
-- text = strsub(text, "ɾ%(ɨ%)##", "R##")
end
text = strsub(text, "g", "ɡ") -- U+0261 LATIN SMALL LETTER SCRIPT G
text = strsub(text, "", { = "t͡ʃ", = "d͡ʒ"})
text = strsub(text, "tʃ", "t͡ʃ")
text = strsub(text, "dʒ", "d͡ʒ")
text = strsub(text, "h", "")
text = strsub(text, "H", "h")
return text
end
local function normalizar(texto)
texto = strlower(texto)
-- decompose everything but ç and ü
texto = strnfd(texto)
texto = strsub(texto, ".", {
= "ç",
= "ü",
})
texto = reorder_accents(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "+")
return texto
end
-- Generate the IPA for a single term respelling `text` in the specified `style` ('gbr', 'rio', etc.; see
-- all_style_descs above). Return value is a list of objects of the following form:
-- { phonemic = STRING, phonetic = STRING, qualifiers = {STRING, ...} }
-- Note that the returned qualifiers are only those generated automatically as a result of certain characteristics of
-- the respelling, e.g. in Brazil initial em-/en- + consonant has two outputs, one labeled "careful pronunciation" and
-- the other "natural pronunciation". User-specified qualifiers are added at the end by the caller of IPA(), and
-- prepended to the auto-generated qualifiers.
local function generar_pron(text)
text = normalizar(text)
local words_br, words_pt = strsplit(text, "(+)"), strsplit(text, "(+)")
local function procesar_palabras(words, brazil, portugal)
local function word_is_prefix(i)
-- Check for prefixes, either a final prefix (followed by "-" separator, then a blank word, then no more
-- words) or a non-final prefix (followed by "- " separator).
return i == #words - 2 and words == "-" and words == "" or i < #words and words == "- "
end
for i = 1, #words, 2 do
local word = words
-- Make prefixes unstressed with vowel reduction unless they have an explicit stress marker;
-- likewise for certain monosyllabic words (e.g. ], ], ], etc.; also ], ], etc.
-- in Portugal) without stress marks.
if word_is_prefix(i) and not strfind(words, accent_c) or unstressed_words or
portugal and unstressed_full_vowel_words_brazil then
-- add DOTOVER to the last vowel not the first one, or we will mess up 'que' by
-- adding the DOTOVER after the 'u'
word = strsub(word, "^(.*" .. V .. quality_c .. "*)", "%1" .. DOTOVER)
end
-- Make certain monosyllabic words (e.g. ], ]; also ], ], etc. in Brazil)
-- without stress marks be unstressed without vowel reduction.
if unstressed_full_vowel_words or brazil and unstressed_full_vowel_words_brazil then
-- add DOTUNDER to the first vowel not the last one, or we will mess up 'meu' by
-- adding the DOTUNDER after the 'u'; add after a quality marker for à, às
word = strsub(word, "^(.-" .. V .. quality_c .. "*)", "%1" .. DOTUNDER)
end
-- Some unstressed words need special pronunciation.
word = unstressed_notaunciation_substitution or word
words = word
end
local x = concat(words)
-- Now eliminate word-final question mark and exclamation point (converted to foot boundary above when word-medial).
x = strsub(x, "", "")
-- Apostrophe becomes tie (e.g. in ]).
x = strsub(x, "'", "‿")
-- User-specified # as in i# (= i. or y) and u# (= u. or w) becomes TEMP1 so we can add # for word boundaries.
x = strsub(x, "#", TEMP1)
-- Put # at word beginning and end and double ## at text/foot boundary beginning/end.
x = strsub(x, " | ", "# | #")
x = "##" .. strsub(x, " ", "# #") .. "##"
-- Eliminate hyphens indicating prefixes/suffixes; but preserve a marker indicating prefixes, so we can later
-- convert primary to secondary stress.
x = strsub(x, "(" .. V .. charsep_c .. "*)(%-#)", "%1" .. PSEUDOCONS .. "%2")
x = strsub(x, "%-#", "#" .. PREFIX_MARKER)
x = strsub(x, "#%-(" .. V .. ")", "#" .. PSEUDOCONS .. "%1")
x = strsub(x, "#%-", "#")
return x
end
local br, pt = procesar_palabras(words_br, true, false), procesar_palabras(words_pt, false, true)
--[=[
local variants
-- Map over each element in `variants`. If `from` is found in the element, replace the element with two elements, one
-- obtained by replacing `from` with `to1` and the other by replacing `from` with `to2`. If `to2` is nil, only one
-- element replaces the original element.
local function flatmap_and_sub_pre(from, to1, qual1, to2, qual2)
variants = flatmap(variants, function(item)
if strfind(item.respelling, from) then
local retval = {
{
respelling = strsub(item.respelling, from, to1),
qualifiers = combine_qualifiers(item.qualifiers, qual1),
}
}
if to2 then
insert(retval,
{
respelling = strsub(item.respelling, from, to2),
qualifiers = combine_qualifiers(item.qualifiers, qual2),
}
)
end
return retval
else
return {item}
end
end)
end
]=]--
-- Remove grave accents and macrons, which have special meaning only for Portugal. Do this before handling o^
-- and similar so we can write áutò^:... and have it correctly give 'autò-' in Portugal but 'áutu-,áuto-' in
-- Brazil.
br = strsub(br, "", "")
-- Convert grave accents and macrons to explicit dot-under + quality marker.
local grave_macron_to_quality = {
= AC,
= CFLEX,
}
pt = strsub(pt, "", function(acc) return grave_macron_to_quality .. DOTUNDER end)
-- ê*/ô* -> é/ó and é*/ó* -> ê/ô (reverse accents)
pt = strsub(pt, "()()%*", function(eo, acc)
return eo .. (acc == CFLEX and AC or CFLEX) end)
-- Treat vowel after des- as word-initial, as in Brazil.
--pt = strsub(pt, "(" .. word_or_component_sep_c .. ")des%^+", "%1des++")
-- Remove i*, i^ and i^^ not followed by a vowel (i.e. Brazilian epenthetic i), but not i^ and i^^ followed or
-- preceded by a vowel (which has a totally different meaning, i.e. i or y in Brazil).
-- Also remove all remaining ^.
--pt = strsub(pt, "i%^+(" .. V .. ")", "i%1")
--pt = strsub(pt, "(" .. V .. ")i%^+", "%1i")
--pt = strsub(pt, "i?+", "")
--[=[
if brazil then
-- Handle i^ and i^^ before a vowel = /i/ or /j/.
flatmap_and_sub_pre("i%^%^(" .. V .. ")", "y%1", nil, "i.%1", nil)
flatmap_and_sub_pre("i%^(" .. V .. ")", "i.%1", nil, "y%1", nil)
-- Handle i^ and i^^ after a vowel = /i/ or /j/; mostly useful for ui^
flatmap_and_sub_pre("(" .. V .. ")i%^%^", "%1y", nil, "%1.i", nil)
flatmap_and_sub_pre("(" .. V .. ")i%^", "%1.i", nil, "%1y", nil)
-- Handle i^ and i^^ not before a vowel = optional epenthetic /i/.
if style == "sbr" then
-- Epenthetic /i/ seems less common in South Brazil. Make i^^ not epenthesize (but still palatalize /t/ and /d/),
-- and i^ epenthesize but not as the first option.
flatmap_and_sub_pre("i%^%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil)
flatmap_and_sub_pre("i%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil, "I%1", nil)
else
flatmap_and_sub_pre("i%^%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil, "I%1", nil)
flatmap_and_sub_pre("i%^(" .. NV_NOT_SPACING_CFLEX .. ")", "I%1", nil, "Ɨ%1", nil)
end
-- Handle i* = epenthetic /i/.
flatmap_and_sub_pre("i%*", "I", nil)
-- Handle u^ and u^^ = /u/ or /w/.
flatmap_and_sub_pre("u%^%^", "w", nil, "u.", nil)
flatmap_and_sub_pre("u%^", "u.", nil, "w", nil)
if style == "sbr" then
-- The raised variant apparently does not occur in South Brazil.
flatmap_and_sub_pre("()%^+", "%1", nil)
else
-- Handle e^ and e^^ = /e/ or /i/; handle o^ and o^^ = /o/ or /u/.
-- Do e^ and o^ together so we get only two outputs, not four, if they cooccur.
-- Likewise for e^^ and o^^.
flatmap_and_sub_pre("()%^%^",
function(eo) return eo == "e" and "i" or "u" end, nil,
"%1", nil)
flatmap_and_sub_pre("()%^",
"%1", nil,
function(eo) return eo == "e" and "i" or "u" end, nil)
end
-- Handle ê*/ô*/é*/ó* = same as without asterisk.
flatmap_and_sub_pre("()%*", "%1", nil)
-- Handle des^ at beginning of word or component = des++ or dis++, and des^^ = opposite order. But apparently
-- not in South Brazil, where the raised variant doesn't occur.
if style == "sbr" then
flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^+", "%1des++", nil)
else
flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^%^", "%1dis++", nil, "%1des++", nil)
flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^", "%1des++", nil, "%1dis++", nil)
end
for _, variant in ipairs(variants) do
if strfind(variant.respelling, "") then
error(("* or ^ remains after applying all known replacements involving these characters (result is '%s')"):format(variant.respelling))
end
end
end
]=]--
--[=[
-- Replace i# and u# sequences (above we replaced # with TEMP1).
flatmap_and_sub_pre("i" .. TEMP1, "i.", nil, "y", {"faster pronunciation"})
flatmap_and_sub_pre("u" .. TEMP1, "u.", nil, "w", {"faster pronunciation"})
]=]--
local function generar(s, variante, fone)
local f = one_term_ipa(s, variante, fone)
local brazil = br_styles
local portugal = pt_styles
local normal, rapido, lento
if brazil then
-- Convert Ẽ from initial ] as a word by itself to either /ẽj̃/ and /ĩ/.
lento = strsub(f, "Ẽ" .. TILDE .. "#", "e" .. TILDE .. "j" .. TILDE .. "#")
normal = strsub(f, "Ẽ" .. TILDE .. "#", "i" .. TILDE .. "#")
-- Convert Ẽ from initial em-/en- + consonant to either /ẽ/ and /ĩ/.
--flatmap_and_sub_post("Ẽ", "e", {"careful pronunciation"}, "i", {"natural pronunciation"})
lento = strsub(lento, "Ẽ", "e")
normal = strsub(normal, "Ẽ", "i")
--flatmap_and_sub_post("I", "i", nil, "e", nil) en este paso no me queda claro si sería e o i (asumo que es "e" porqe sería la fonética??)
if fone then
lento = strsub(lento, "I", "e")
normal = strsub(normal, "I", "e")
else
lento = strsub(lento, "I", "i")
normal = strsub(normal, "I", "i")
end
-- Convert Ú resulting from stressed final '-io(s)'.
--flatmap_and_sub_post("%.Ú", ".u", nil, {"w", "ʊ̯"}, nil) -- misma observacion que antes
if fone then
lento = strsub(lento, "%.Ú", "ʊ̯")
normal = strsub(normal, "%.Ú", "ʊ̯")
else
lento = strsub(lento, "%.Ú", ".u")
normal = strsub(normal, "%.Ú", ".u")
end
else -- Portugal
lento = strsub(f, "ʃ(" .. wordsep_c .. "*)s", "ʃ%1s")
normal = strsub(f, "ʃ(" .. wordsep_c .. "*)s", "%1ʃ")
lento = strsub(lento, "ʒ(" .. wordsep_c .. "*)z", "ʒ%1z")
normal = strsub(normal, "ʒ(" .. wordsep_c .. "*)z", "%1ʒ")
if fone then
lento = strsub(lento, "Ɔ", "ɔ")
normal = strsub(normal, "Ɔ", "ɔ")
else
lento = strsub(lento, "Ɔ", "o")
normal = strsub(normal, "Ɔ", "o")
end
--flatmap_and_sub_post("ʃ(" .. wordsep_c .. "*)s",
-- "ʃ%1s", {"careful pronunciation"}, "%1ʃ", {"natural pronunciation"})
--flatmap_and_sub_post("ʒ(" .. wordsep_c .. "*)z",
-- "ʒ%1z", {"careful pronunciation"}, "%1ʒ", {"natural pronunciation"})
--flatmap_and_sub_post("Ɔ", "o", nil, "ɔ", nil)
-- Split (ɨ) after l or r into two pronuns, one with ɨ and the other without it (with one fewer syllables and coda).
--flatmap_and_sub_post("%.(-)L#", ".%1lɨ#", nil, {"%1l#", "%1ɫ#"}, nil)
--flatmap_and_sub_post("%.(-)R#", ".%1ɾɨ#", nil, {"%1ɾ#", "%1ɾ#"}, nil)
end
rapido = strsub(normal, "()%.", function(iu) return iu == "Ì" and "j" or "w" end)
normal = strsub(normal, "()%.", function(iu) return iu == "Ì" and "i." or "u." end)
lento = strsub(lento, "()%.", function(iu) return iu == "Ì" and "i." or "u." end)
-- Final changes to the generated IPA to produce what's shown to the user. We used to do this at the end of
-- one_term_ipa() but the stuff below needs to happen after the expansion of Ì. and Ù. in Brazil to either i./u.
-- or j/w, because the latter transformation involves removing a syllable boundary, which will cause a stress mark
-- on the following syllable to retract to the beginning of the newly combined syllable. To avoid lots of hassle,
-- we postpone this stress mark movement till now.
local function finalize_ipa(a)
-- Convert Brazil i/u in hiatus to ɪ/ʊ in the phonetic representation. This needs to happen after handling of
-- Ì. and Ù., which feeds this change.
if brazil and fone then
local phonetic_hiatus_iu_to_actual = { = "ɪ", = "ʊ"}
a = strsub(a, "()(%." .. V .. ")", function(iu, after) return phonetic_hiatus_iu_to_actual .. after end)
end
-- Stress marks and syllable dividers.
-- Component separators that aren't transparent to syllabification need to be made into syllable dividers.
a = strsub(a, non_syl_transp_component_sep_c, ".")
-- IPA stress marks in components followed by + should be removed.
a = strsub(a, ipa_stress_c .. "(*%+)", "%1")
-- Component separators that are transparent to syllabification need to be removed now, before moving IPA stress marks
-- to the beginning of the syllable, so they don't interfere in this process.
a = strsub(a, syl_transp_component_sep_c .. "#?", "")
-- Move IPA stress marks to the beginning of the syllable.
a = strsubrep(a, "()(*)(" .. ipa_stress_c .. ")", "%1%3%2")
-- Suppress syllable divider before IPA stress indicator.
a = strsub(a, "%.(#?" .. ipa_stress_c .. ")", "%1")
-- Make all primary stresses but the last one in a given word be secondary. May be fed by the first rule above.
a = strsubrep(a, "ˈ(+)ˈ", "ˌ%1ˈ")
-- Make primary stresses in prefixes become secondary.
a = strsubrep(a, "ˈ(*#" .. PREFIX_MARKER .. ")", "ˌ%1")
-- Remove # symbols at word/text boundaries, as well as _ (which forces separate interpretation), pseudo-consonant
-- markers (at edges of some prefixes/suffixes), and prefix markers, and recompose.
a = strsub(a, "", "")
a = strnfc(a)
return strhtml(a)
end
normal = finalize_ipa(normal)
rapido = finalize_ipa(rapido)
lento = finalize_ipa(lento)
if normal == rapido and normal == lento then
return {}, {normal}
elseif normal == rapido then
return {"normal", "lento"}, {normal, lento}
elseif normal == lento then
return {"normal", "rápido"}, {normal, rapido}
else
return {"normal", "rápido", "lento"}, {normal, rapido, lento}
end
end
local _, fono_ = generar(br, "gbr", false)
local fono = fono_
local gbr_nota, gbr_fone = generar(br, "gbr", true)
local rio_nota, rio_fone = generar(br, "rio", true)
local sp_nota, sp_fone = generar(br, "sp", true)
local sbr_nota, sbr_fone = generar(br, "sbr", true)
local gpt_nota, gpt_fone = generar(br, "gpt", true)
local cpt_nota, cpt_fone = generar(br, "cpt", true)
local spt_nota, spt_fone = generar(br, "spt", true)
local npt_nota, npt_fone = generar(br, "npt", true)
local pron = {{nombre_completo}}
local nota, fone = {gbr_nota}, {gbr_fone}
local son_iguales = m_table.deepEquals
if not son_iguales(gbr_fone, rio_fone) then
insert(pron, {nombre_completo})
insert(nota, rio_nota)
insert(fone, rio_fone)
end
if not son_iguales(gbr_fone, sp_fone) then
insert(pron, {nombre_completo})
insert(nota, sp_nota)
insert(fone, sp_fone)
end
if not son_iguales(gbr_fone, sbr_fone) then
insert(pron, {nombre_completo})
insert(nota, sbr_nota)
insert(fone, sbr_fone)
end
insert(pron, {nombre_completo})
insert(nota, gpt_nota)
insert(fone, gpt_fone)
if not son_iguales(gpt_fone, cpt_fone) then
insert(pron, {nombre_completo})
insert(nota, cpt_nota)
insert(fone, cpt_fone)
end
if not son_iguales(gpt_fone, spt_fone) then
insert(pron, {nombre_completo})
insert(nota, spt_nota)
insert(fone, spt_fone)
end
if not son_iguales(gpt_fone, npt_fone) then
insert(pron, {nombre_completo})
insert(nota, npt_nota)
insert(fone, npt_fone)
end
return pron, fone, nota, fono
end
--Se obtiene el tipo de acentuación
local function determinar_acentuacion(w)
if type(w) ~= "string" then
return nil
end
local silabas = {}
for s in strmatchit(w, "+") do
insert(silabas, s)
end
local L = #silabas
local sufijo = nil
if L >= 4 and silabas == "men" and silabas == "te" then
return "doble", L
elseif L == 1 then
return "monosílaba", L
else
local i = 1
for silaba in strmatchit(w, SEPARADORES_SILABICOS..'*'.."+") do
if strfind(silaba, primary_stress) then
local idx = L - i
if idx == 0 then
return "aguda", L
elseif idx == 1 then
return "llana", L
elseif idx == 2 then
return "esdrújula", L
else
return "sobreesdrújula", L
end
break
end
i = i + 1
end
error("Se esperaba que la pronunciación de la palabra hubiera sido generada con las marcas de acentuación")
end
end
function export.procesar_pron_args(titulo, args)
if #args < 1 then
args = titulo
end
local fono
if #args < 1 and #args < 1 then
if #titulo == 1 then
if titulo >= "a" and titulo <= "z" then
args = pron_abc
args = args
elseif titulo >= "A" and titulo <= "Z" then
args = pron_abc
args = args
end
end
args, args, args, fono = generar_pron(args)
local tiene_espacios = strfind(titulo, " ")
local rim = fono
rim = strsub(rim, "^.*"..primary_stress.."(.-)$", "%1")
args = strsub(rim, ".-".."("..V..".*"..")".."$", "%1")
if not tiene_espacios then
args, args = determinar_acentuacion(fono)
end
end
return args
end
return export
Existe la posibilidad de que además todo aquello que ahora ya sabes en referencia a la palabra Módulo:generar-pron/pt, también te enseñemos la manera de cómo separarla en sílabas. Si te interesa si deseas aprender a dividir Módulo:generar-pron/pt en sílabas.
Más abajo puedes ir al link que te lleva a una lista con los errores ortográficos más corrientes, de forma que los tengas en cuenta y sepas el modo de no incurrir en ellos.Sin más que agregar, aquí tienes el listado de errores ortográficos de Módulo:generar-pron/pt