Módulo:generar-pron/ru

La documentación para este módulo puede ser creada en Módulo:generar-pron/ru/doc
-- Tomado de en.wikt, implementado por Tmagc

local remove_grave_accents_from_phonetic_respelling = true -- Anatoli's desired value

local m_ru_translit = require("Módulo:translit/ru")

local export = {}

local insert = table.insert
local concat = table.concat
local remove = table.remove

local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strmatchit = m_str.gmatch
local strsubn = m_str.gsub
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strstrip = m_str.strip
local strlower = m_str.lower
local substr = m_str.sub
local strlen = m_str.len
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html

-- version of strsubn() that discards all but the first return value
local function strsub(term, foo, bar)
	local retval = strsubn(term, foo, bar)
	return retval
end


local function list_to_set(t)
	local set = {}
	for _, item in ipairs(t) do
		set = true
	end
	return set

end

local PUNTUACION = "%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

--ru.common

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local BREVE = u(0x0306) -- breve  ̆
local DIA = u(0x0308) -- diaeresis =  ̈
local CARON = u(0x030C) -- caron  ̌
local OGONEK = u(0x0328) -- ogonek  ̨
local DUBGR = u(0x030F) -- double grave =  ̏
local DOTABOVE = u(0x0307) -- dot above =  ̇
local DOTBELOW = u(0x0323) -- dot below =  ̣

local PSEUDOVOWEL = u(0xFFF1) -- pseudovowel placeholder
local PSEUDOCONS = u(0xFFF2) -- pseudoconsonant placeholder
local TEMPCFLEX = u(0xFFF3) -- placeholder to be converted to a circumflex
local TEMPSUB = u(0xFFF4) -- miscellaneous temporary placeholder

-- any accent
local accent = AC .. GR .. DIA .. BREVE .. CARON .. OGONEK
-- regex for any optional accent(s)
local opt_accent = "*"
-- any composed Cyrillic vowel with grave accent
local composed_grave_vowel = "ѐЀѝЍ"
-- any Cyrillic vowel except ёЁ
local vowel_no_jo = "аеиоуяэыюіѣѵАЕИОУЯЭЫЮІѢѴ" .. PSEUDOVOWEL .. composed_grave_vowel
-- any Cyrillic vowel, including ёЁ
local vowel = vowel_no_jo .. "ёЁ"
-- any vowel in transliteration
local tr_vowel = "aeěɛiouyAEĚƐIOUY" .. PSEUDOVOWEL
-- any consonant in transliteration, omitting soft/hard sign
local tr_cons_no_sign = "bcčdfghjklmnpqrsštvwxzžBCČDFGHJKLMNPQRSŠTVWXZŽ" .. PSEUDOCONS
-- any consonant in transliteration, including soft/hard sign
local tr_cons = tr_cons_no_sign .. "ʹʺ"
-- regex for any consonant in transliteration, including soft/hard sign,
-- optionally followed by any accent
local tr_cons_acc_re = "" .. opt_accent
-- any Cyrillic consonant except sibilants and ц
local cons_except_sib_c = "бдфгйклмнпрствхзьъБДФГЙКЛМНПРСТВХЗЬЪ" .. PSEUDOCONS
-- Cyrillic sibilant consonants
local sib = "шщчжШЩЧЖ"
-- Cyrillic sibilant consonants and ц
local sib_c = sib .. "цЦ"
-- any Cyrillic consonant
local cons = cons_except_sib_c .. sib_c
-- Cyrillic velar consonants
local velar = "кгхКГХ"
-- uppercase Cyrillic consonants
local uppercase = "АЕИОУЯЭЫЁЮІѢѴБДФГЙКЛМНПРСТВХЗЬЪШЩЧЖЦ"

local recomposer = {
	-- Cyrillic letters
	 = "ё",
	 = "Ё",
	 = "й",
	 = "Й",
	 = "ї",
	 = "Ї",
	-- Latin letters
	 = "č",
	 = "Č",
	 = "ě",
	 = "Ě",
	 = "ǒ",
	 = "Ǒ",
	 = "ǫ",
	 = "Ǫ",
	 = "š",
	 = "Š",
	 = "ž",
	 = "Ž",
	-- used in ru-pron:
	 = "ӂ", -- used in ru-pron
	 = "Ӂ",
	 = "ĵ",
	 = "Ĵ",
	 = "ǰ",
	-- no composed uppercase equivalent of J-caron
	 = "ǯ",
	 = "Ǯ",
}


-- Decompose acute, grave, etc. on letters (esp. Latin) into individivual
-- character + combining accent. But recompose Cyrillic and Latin characters
-- that we want to treat as units and get caught in the crossfire. We mostly
-- want acute and grave decomposed; perhaps should just explicitly decompose
-- those and no others.
local function decompose(text)
	text = strnfd(text)
	text = strsub(text, ".", recomposer)
	return text
end


local grave_deaccenter = {
	 = "", -- grave accent
	 = "е", -- composed Cyrillic chars w/grave accent
	 = "Е",
	 = "и",
	 = "И",
}
-- Remove grave accents; don't affect acute or composed diaeresis in ёЁ or
-- uncomposed diaeresis in -ѣ̈- (as in plural сѣ̈дла of сѣдло́).
-- NOTE: Translit must already be decomposed! See comment at top.
local function remove_grave_accents(word, tr)
	local ru_removed = strsubn(word, "", grave_deaccenter)
	if not tr then
		return ru_removed, nil
	end
	return ru_removed, (strsubn(tr, GR, ""))
end

-- local test_new_ru_pron_module = false
-- If enabled, do new code for final -е; else, the old way
local new_final_e_code = true
-- If enabled, do special case for final -е not before a pause
local final_e_non_pausal = false

local vow = 'aeiouyɛəäạëöü'
local ipa_vow = vow .. 'ɐɪʊɨæɵʉ'
local vowels, vowels_c = '', '()'
-- No need to include DUBGR here because we rewrite it to CFLEX very early
local acc = AC .. GR .. CFLEX .. DOTABOVE .. DOTBELOW
local accents = ''
local stress_accents = ''

local perm_syl_onset = list_to_set({
	'spr', 'str', 'skr', 'spl', 'skl',
	-- FIXME, do we want sc?
	'sp', 'st', 'sk', 'sf', 'sx', 'sc',
	'pr', 'br', 'tr', 'dr', 'kr', 'gr', 'fr', 'vr', 'xr',
	'pl', 'bl', 'kl', 'gl', 'fl', 'vl', 'xl',
	-- FIXME, do we want the following? If so, do we want vn?
	'ml', 'mn',
	-- FIXME, dž is now converted to ĝž, which will have a syllable
	-- boundary in between
	'šč', 'dž',
})

-- FIXME: Consider changing ӂ internally to ʑ to match ɕ (it is used externally
-- in e.g. дроӂӂи (pronunciation spelling of дрожжи)
local translit_conv = {
	 = 't͡s',  = 't͡ɕ',  = 't͡ʂ',
	 = 'ɡ',  = 'd͡ʐ',
	 = 'd͡z',  = 'd͡ʑ',  = 'ʑ',
	 = 'ʂ',  = 'ʐ'
}

local translit_conv_j = {
	 = 't͡sʲ',
	 = 'd͡zʲ'
}

-- Table of allophones. Each entry is a list of three values:
-- (1) the stressed value; (2) the value immediately before primary or
-- secondary stress; (3) the value elsewhere.
local allophones = {
	 = { 'a', 'ɐ', 'ə' },
	 = { 'e', 'ɪ', 'ɪ' },
	 = { 'i', 'ɪ', 'ɪ' },
	 = { 'o', 'ɐ', 'ə' },
	 = { 'u', 'ʊ', 'ʊ' },
	 = { 'ɨ', 'ɨ', 'ɨ' },
	 = { 'ɛ', 'ɨ', 'ɨ' },
	 = { 'a', 'ɪ', 'ɪ' },
	 = { 'a', 'ɐ', 'ə' },
	 = { 'e', 'ɪ', 'ɪ' },
	 = { 'ɵ', 'ɪ', 'ɪ' },
	 = { 'u', 'ʊ', 'ʊ' },
	 = { 'ə', 'ə', 'ə' },
}

local devoicing = {
	 = 'p',  = 't',  = 'k',
	 = 's',  = 'f',
	 = 'š',  = 'x',
	 = 'c',  = 'č',  = 'ĉ',
	 = 'ɕ',
}

local voicing = {
	 = 'b',  = 'd',  = 'g',
	 = 'z',  = 'v',
	 = 'ž',  = 'ĵ',  = 'ǰ',  = 'ĝ',
	 = 'ɣ',  = 'ӂ'
}

local iotating = {
	 = 'ä',
	 = 'ë',
	 = 'ö',
	 = 'ü'
}

local retracting = {
	 = 'ɛ',
	 = 'y',
}

local fronting = {
	 = 'æ',
	 = 'ʉ',
	 = 'ʉ',
}

local pron_abc = {
     = {"а"},
     = {"а"},
     = {"бэ"},
     = {"бэ"},
     = {"вэ"},
     = {"вэ"},
     = {"гэ"},
     = {"гэ"},
     = {"дэ"},
     = {"дэ"},
     = {"е"},
     = {"е"},
     = {"ё"},
     = {"ё"},
     = {"жэ"},
     = {"жэ"},
     = {"зэ"},
     = {"зэ"},
     = {"и"},
     = {"и"},
     = {"и краткое"},
     = {"и краткое"},
     = {"ка"},
     = {"ка"},
     = {"эл"},
     = {"эл"},
     = {"эм"},
     = {"эм"},
     = {"эн"},
     = {"эн"},
     = {"о"},
     = {"о"},
     = {"пэ"},
     = {"пэ"},
     = {"эр"},
     = {"эр"},
     = {"эс"},
     = {"эс"},
     = {"тэ"},
     = {"тэ"},
     = {"у"},
     = {"у"},
     = {"эф"},
     = {"эф"},
     = {"ха"},
     = {"ха"},
     = {"це"},
     = {"це"},
     = {"че"},
     = {"че"},
     = {"ша"},
     = {"ша"},
     = {"ща"},
     = {"ща"},
     = {"твёрдый знак"},
     = {"твёрдый знак"},
     = {"ы"},
     = {"ы"},
     = {"мягкий знак"},
     = {"мягкий знак"},
     = {"э"},
     = {"э"},
     = {"ю"},
     = {"ю"},
     = {"я"},
     = {"я"},
    -- letras viejas
     = {"і десятеричное"},
     = {"і десятеричное"},
     = {"ять"},
     = {"ять"},
     = {"ѳита"},
     = {"ѳита"},
     = {"ижица"},
     = {"ижица"},
     = {"ѕѣлѡ"},
     = {"ѕѣлѡ"},
     = {"ѯи"},
     = {"ѯи"},
     = {"ѱи"},
     = {"ѱи"},
     = {"ѡмега"},
     = {"ѡмега"},
     = {"юсъ большой"},
     = {"юсъ большой"},
     = {"юсъ малый"},
     = {"юсъ малый"},
     = {"юсъ большой іотированный"},
     = {"юсъ большой іотированный"},
     = {"юсъ малый іотированный"},
     = {"юсъ малый іотированный"}
}


-- Prefixes that we recognize specially when they end in a geminated
-- consonant. The first element is the result after applying voicing/devoicing,
-- gemination and other changes. The second element is the original spelling,
-- so that we don't overmatch and get cases like Поттер. We check for these
-- prefixes at the beginning of words and also preceded by ne-, po- and nepo-.
-- The third element should be true if the prefix produces  when assimilated
-- to a following ж, otherwise omitted. We use this as part of the
-- implementation of automatic ӂӂ pronunciation, which shouldn't happen at
-- prefix boundaries.
local geminate_pref = {
	--'abː', --'adː',
	{'beː', 'be', true},
	--'braomː',
	{'ː', 'v'},
	{'voː', 'vo', true},
	{'iː', 'i', true},
	--'^inː',
	{'kontrː', 'kontr'},
	{'superː', 'super'},
	{'tranː', 'trans', true},
	{'naː', 'nad'},
	{'niː', 'ni', true},
	{'oː', 'ot'}, --'^omː',
	{'oː', 'ob'},
	{'obeː', 'obe', true},
    {'poː', 'pod'},
	{'preː', 'pred'}, --'^paszː', '^pozː',
	{'raː', 'ra', true},
	{'ː', '', true},
	{'meː', 'mež', true},
	{'če?reː', 'če?re', true},
	-- certain double prefixes involving ra-
	{'predraː', 'predra', true},
	{'bezraː', 'bezra', true},
	{'naraː', 'nara', true},
	{'vraː', 'vra', true},
	{'doraː', 'dora', true},
	-- '^sverxː', '^subː', '^tröxː', '^četyröxː',
}

local sztab = { s='cs', z='ĵz' }
local function ot_pod_sz(pre, sz)
	return pre .. sztab
end

-- Ad-hoc phonetic substitutions to apply. Each entry is a two-element list,
-- the two arguments to 'strsub()'. These are applied in order, and are
-- carefully ordered to work correctly; don't reorder them unless you know
-- what you're doing. This is called fairly early on, after transliterating,
-- splitting on words, adding ⁀ at the beginning and end of all words, and
-- applying a few other changes. It mostly implements various sorts of
-- assimilations.
local phonetic_subs = {
	{'h', 'ɣ'},

	{'šč', 'ɕː'}, -- conversion of šč to geminate

	-- the following group is ordered before changes that affect ts
	{'nsk', 'n(t)sk'},
	{'ssk', 'sck'},
	-- -дцат- (in numerals) has optionally-geminated дц; if unstressed,
	-- pronounced as -дцыт-
	{'dca(' .. accents .. '?)t', function(accent)
		if accent == '' then
			return 'c(c)yt'
		else
			return 'c(c)a' .. accent .. 't'
		end
	end
	},

	-- Add / before цз, чж sequences (Chinese words) and assimilate чж
	{'cz', '/cz'},
	{'čž', '/ĝž'},

	-- main changes for affricate assimilation of  + sibilant, including ts;
	-- we either convert to "short" variants t͡s, d͡z, etc. or to "long" variants
	-- t͡ss, d͡zz, etc.
	-- 1. т с, д з across word boundary, also т/с, д/з with explicitly written
	--    slash, use long variants.
	{'(ʹ?+)s', 'c%1s'},
	{'(ʹ?+)z', 'ĵ%1z'},
	-- 2. тс, дз + vowel use long variants.
	{'(ʹ?)s(j?' .. vowels .. ')', 'c%1s%2'},
	{'(ʹ?)z(j?' .. vowels .. ')', 'ĵ%1z%2'},
	-- 3. тьс, дьз use long variants.
	{'ʹs', 'cʹs'},
	{'ʹz', 'ĵʹz'},
	-- 4. word-initial от-, под- use long variants because there is
	--    a morpheme boundary.
	{'(⁀o' .. accents .. '?)t()', ot_pod_sz},
	{'(⁀po' .. accents .. '?)d()', ot_pod_sz},
	-- 5. other тс, дз use short variants.
	{'s', 'c'},
	{'z', 'ĵ'},
	-- 6. тш, дж always use long variants (FIXME, may change)
	{'(ʹ?*)š', 'ĉ%1š'},
	{'(ʹ?*)ž', 'ĝ%1ž'},
	-- 7. soften palatalized hard hushing affricates resulting from the previous
	{'ĉʹ', 'č'},
	{'ĝʹ', 'ǰ'},

	-- changes that generate ɕː and ɕč through assimilation:
	-- зч and жч become ɕː, as does сч at the beginning of a word and in the
	-- sequence счёт when not following  (подсчёт); else сч becomes ɕč
	-- (отсчи́тываться), as щч always does (рассчитáть written ращчита́ть)
	{'sč', 'čɕː'},
	{'ɕːč', 'ɕč'},
	{'č', 'ɕː'},
	{'ɕː?', 'ɕː'},
	{'⁀sč', '⁀ɕː'},
	{'sč(j?' .. accents .. '?)t', 'ɕː%1t'},
	{'sč', 'ɕč'},

	-- misc. changes for assimilation of  + sibilants and affricates
	{'c', 'sc'},
	{'()()', '%1%2'},
	-- дц, тц, дч, тч + vowel always remain geminated, so mark this with ˑ;
	-- if not followed by a vowel, as in e.g. путч, use normal gemination
	-- (it will normally be degeminated)
	{'()(' .. vowels .. ')', '%1ˑ%2'},
	{'()', '%1%1'},
	-- the following is ordered before the next one, which applies assimilation
	-- of  to щ (including across word boundaries)
	{'nɕ', 'nɕ'},
	--  and ь before soft affricates , including across word
	-- boundaries; note that the common sequence сч has already been handled
	{'ʹ?(*)', 'ɕ%1'},
	-- reduction of too many ɕ's, which can happen from the previous
	{'ɕɕː', 'ɕː'},
	-- assimilation before  and ь before щ
	{'ʹ?(*)ɕ', 'č%1ɕ'},
	-- assimilation of  and ь before 
	{'(*)š', 'š%1š'},
	{'(*)ž', 'ž%1ž'},
	{'ʹ(*)š', 'ɕ%1š'},
	{'ʹ(*)ž', 'ӂ%1ž'},
	-- assimilation of ь before с (in imperatives esp. before ся)
	{'ʹs()', 'sˑ%1'},
	-- assimilation of ь before т (e.g. in imperatives esp. before те)
	{'ʹt()', 'tˑ%1'},

	-- optional palatalization of palatalized labials before another consonant
	-- in  (esp. in imperatives before -те, -ся)
	-- FIXME, perhaps we should either generalize this or restrict it only
	-- to imperatives
	{'()ʹ()', '%1(ʹ)%2'},

	{'sverxi', 'sverxy'},
	{'stʹd', 'zd'},
	-- this will often become degeminated
	{'tʹd', 'dd'},

	-- loss of consonants in certain clusters
	{'()g', '%1g'},
	{'zdn', 'zn'},
	{'lnc', 'nc'},
	{'t(li' .. accents .. '?v)', 's%1'},
	{'tn', 'sn'},
	{'lvstv', 'lstv'},

	-- initial unstressed э -> и; should precede backing of /i/ in close juncture	
	{'⁀ɛ()', '⁀i%1'},
	-- unstressed э after a vowel -> и; repeated to handle the unlikely case
	-- where two ээ occur in a row; FIXME, this is a type of ikanye, and we
	-- mostly implement ikanye later on using the chart in 'allophones', so
	-- it would be nice to merge these two cases, but I can't think of an
	-- obvious way to do it
	{'(' .. vowels .. accents .. '?)ɛ()', '%1i%2'},
	{'(' .. vowels .. accents .. '?)ɛ()', '%1i%2'},
	-- backing of /i/ after hard consonants in close juncture
	{'()⁀‿⁀i', '%1⁀‿⁀y'},
}

local cons_assim_palatal = {
	-- assimilation of tn, dn, sn, zn, st, zd, nč, nɕ is handled specially
	compulsory = list_to_set({'ntʲ', 'ndʲ', 'xkʲ',
	    'csʲ', 'ĵzʲ', 'ncʲ', 'nĵʲ'}),
	optional = list_to_set({'slʲ', 'zlʲ', 'nsʲ', 'nzʲ',
		'mpʲ', 'mbʲ', 'mfʲ', 'fmʲ'})
}

-- words which will be treated as accentless (i.e. their vowels will be
-- reduced), and which will liaise with a preceding or following word;
-- this will not happen if the words have an accent mark, cf.
-- по́ небу vs. по не́бу, etc.
local accentless = {
	-- class 'pre': particles that join with a following word
	pre = list_to_set({'bez', 'bliz', 'v', 'vo', 'da', 'do',
       'za', 'iz', 'iz-pod', 'iz-za', 'izo', 'k', 'ko', 'mež',
       'na', 'nad', 'nado', 'ne', 'ni', 'ob', 'obo', 'ot', 'oto',
       'pered', 'peredo', 'po', 'pod', 'podo', 'pred', 'predo', 'pri', 'pro',
       's', 'so', 'u', 'čerez'}),
	-- class 'prespace': particles that join with a following word, but only
	--   if a space (not a hyphen) separates them; hyphens are used here
	--   to spell out letters, e.g. а-эн-бэ́ for АНБ (NSA = National Security
	--   Agency) or о-а-э́ for ОАЭ (UAE = United Arab Emirates)
	prespace = list_to_set({'a', 'o'}),
	-- class 'post': particles that join with a preceding word
	post = list_to_set({'by', 'b', 'ž', 'že', 'li', 'libo', 'lʹ', 'ka',
	   'nibudʹ', 'tka'}),
	-- class 'posthyphen': particles that join with a preceding word, but only
	--   if a hyphen (not a space) separates them
	posthyphen = list_to_set({'to'}),
}

-- Pronunciation of final unstressed -е, depending on the part of speech and
--   exact ending. Also used for pronunciation of -ться in imperatives vs.
--   infinitives.
--
-- Endings:
--   oe = -ое
--   ve = any other vowel plus -е (FIXME, may have to split out -ее)
--   je = -ье
--   softpaired = soft paired consonant + -е
--   hardsib = hard sibilant (ц, ш, ж) + -е
--   softsib = soft sibilant (ч, щ) + -е
--
-- Parts of speech:
--   def = default used in absence of pos
--   n/noun = neuter noun in the nominative/accusative singular (but not ending
--     in adjectival -ое or -ее; those should be considered as adjectives)
--   pre = prepositional case singular
--   dat = dative case singular (treated same as prepositional case singular)
--   voc = vocative case (currently treated as 'mid')
--   nnp = noun nominative plural in -е (гра́ждане, боя́ре, армя́не); not
--     adjectival plurals in -ие or -ые, including adjectival nouns
--     (да́нные, а́вторские)
--   inv = invariable noun or other word (currently treated as 'mid')
--   a/adj = adjective or adjectival noun (typically either neuter in -ое or
--     -ее, or plural in -ие, -ые, or -ье, or short neuter in unpaired
--     sibilant + -е)
--   c/com = comparative (typically either in -ее or sibilant + -е)
--   adv = adverb
--   p = preposition (treated same as adverb)
--   v/vb/verb = finite verbal form (usually 2nd-plural in -те), but not
--     imperatives (use pos=imp) and not participle forms, which should be
--     treated as adjectives
--   pro = pronoun (кое-, какие-, ваше, сколькие)
--   num = number (двое, трое, обе, четыре; currently treated as 'mid')
--   pref = prefix (treated as 'high' because integral part of word)
--   hi/high = force high values ( or )
--   mid = force mid values ( or )
--   lo/low/schwa = force low, really schwa, values ()
--
-- Possible values:
--   1. ə , e , i  after a vowel or soft consonant
--   2. ə  or y  after a hard sibilant
--
-- If a part of speech doesn't have an entry for a given type of ending,
--   it receives the default value. If a part of speech's entry is a string,
--   it's an alias for another way of specifying the same part of speech
--   (e.g. n=noun).
local pos_properties = {
	def={oe='ə', ve='e', je='e', softpaired='e', hardsib='y', softsib='e', tsjapal='n'},
	noun={oe='ə', ve='e', je='e', softpaired='e', hardsib='ə', softsib='e'},
	n='noun',
	s='noun',
	sust='noun',
	pre={oe='e', ve='e', softpaired='e', hardsib='y', softsib='e'},
	prep='pre',
	dat='pre',
	voc='mid',
	nnp={softpaired='e'}, -- FIXME, not sure about this
	snp='nnp',
	inv='mid', --FIXME, not sure about this (e.g. вице-, кофе)
	adj={oe='ə', ve='e', je='ə'}, -- FIXME: Not sure about -ее, e.g. neut adj си́нее; FIXME, not sure about short neuter adj, e.g. похо́же from похо́жий, дорогосто́яще from дорогосто́ящий, should this be treated as neuter noun?
	a='adj',
	com={ve='e', hardsib='y', softsib='e'},
	c='com',
	adv={softpaired='e', hardsib='y', softsib='e'},
	p='adv', --FIXME, not sure about prepositions
	verb={softpaired='e'},
	v='verb',
	vb='verb',
	-- Imperatives like other verbs except that final -ться is palatalized
	imp={softpaired='e', tsjapal='y'},
	impv='imp',
	pro={oe='i', ve='i'}, --FIXME, not sure about ваше, сколькие, какие-, кое-
	num='mid', --FIXME, not sure about обе
	pref='high',
	-- forced values
	high={oe='i', ve='i', je='i', softpaired='i', hardsib='y', softsib='i'},
	alto='high',
	hi='high',
	mid={oe='e', ve='e', je='e', softpaired='e', hardsib='y', softsib='e'},
	med = 'mid',
	medio = 'mid',
	low={oe='ə', ve='ə', je='ə', softpaired='ə', hardsib='ə', softsib='ə'},
	bajo = 'low',
	lo='low',
	schwa='low'
}

-- remove accents that we don't want to appear in the phonetic respelling --> NO SE PARA QUE SERVIA, ESTABA EN EL MAIN DE ANTES
local function phon_respelling(text, remove_grave)
	text = strsub(text, '', '')
	-- Remove grave accents from annotations but maybe not from phonetic respelling
	if remove_grave then
		text = remove_grave_accents(text)
	end
	return text
end

-- Convert normalized spelling into actual pronunciation. Return value is a
-- list of one or more valid pronunciations. "Normalized" means that various
-- normalization transformations have been applied, e.g.
-- (1) text is transliterated and accents decomposed;
-- (2) ‿ is added where appropriate to join clitics to normally-stressed words;
-- (3) ⁀ is added at the beginning and end of all words;
-- (4) primary or tertiary stress may have been added to single-syllable words
--     as appropriate;
-- (5) punctuation is removed and replaced with spaces and/or IPA foot
--     boundaries;
-- (6) etc.
-- Note that normalization does *not* implement assimilations, conversion of
-- vowels or consonants to their IPA equivalents, or other intra-word changes.
local function ru_ipa_main(text, adj, gem, pos)
	-- save original word spelling before respellings, (de)voicing changes,
	-- geminate changes, etc. for implementation of geminate_pref
	local orig_word = strsplit(text, " ", true)
	local word

	-- remove any apostrophes, since any still present at this stage
	-- are purely cosmetic (e.g. in foreign names)
	-- any apostrophes in the input that are standing in for hard signs
	-- should have already been dealt with by the transliteration
	-- module
	text = strsub(text, '', '')

	-- insert or remove /j/ before  so that palatal versions of these
	-- vowels are always preceded by /j/ and non-palatal versions never are
	-- (do this before the change below adding tertiary stress to final
	-- palatal о):
	-- (1) Non-palatal  after always-hard шж (e.g. in брошю́ра, жю́ри)
	--     despite the spelling (FIXME, should this also affect ?)
	text = strsub(text, '()j()', '%2%3')
	-- (2) Palatal  after always-soft щчӂ and voiced variant ǰ (NOTE:
	--     this happens before the change šč -> ɕː in phonetic_subs)
	text = strsub(text, '()()', '%1j%2')
	-- (3) ьо is pronounced as ьйо, i.e. like (possibly unstressed) ьё, e.g.
	--     in Асунсьо́н
	text = strsub(text, 'ʹo', 'ʹjo')

	-- add tertiary stress to some final -о (this needs to be done before
	-- eliminating dot-above, after adding ⁀, after adding /j/ before palatal о):
	-- (1) after vowels, e.g. То́кио
	text = strsub(text, '(' .. vowels .. accents .. '?o)⁀', '%1' .. CFLEX .. '⁀')
	-- (2) when palatal, e.g. ра́нчо, га́учо, ма́чо, Ога́йо
	text = strsub(text, 'jo⁀', 'jo' .. CFLEX .. '⁀')

	-- eliminate dot-above, which has served its purpose of preventing any
	-- sort of stress (needs to be done after adding tertiary stress to
	-- final -о)
	text = strsub(text, DOTABOVE, '')
	-- eliminate dot-below (needs to be done after changes above that insert
	-- j before  after always-soft щчӂ)
	text = strsub(text, 'ja' .. DOTBELOW, 'jạ')
	if strfind(text, DOTBELOW) then
		error("Dot-below accent can only be placed on я or palatal а")
	end

	text = adj and strsub(text, '(.́?)go(' .. AC .. '?)⁀', '%1vo%2⁀') or text
	text = adj and strsub(text, '(.́?)go(' .. AC .. '?)sja⁀', '%1vo%2sja⁀') or text

	local function fetch_pos_property(i, ending)
		local thispos = pos or 'def'
		local chart = pos_properties
		while type(chart) == "string" do -- handle aliases
			chart = pos_properties
		end
		assert(type(chart) == "table")
		local sb = chart or pos_properties
		assert(sb)
		return sb
	end

	-- Pos-specific handling of final -ться: palatalized if pos=imp, else not
	-- (infinitives). If we have multiple parts of speech, we need to be
	-- trickier, splitting by word.
	local function final_tsja_processing(pron, i)
		local tsjapal = fetch_pos_property(i, 'tsjapal')
		if tsjapal == 'n' then
			-- FIXME!!! Should these also pay attention to grave accents?
			pron = strsub(pron, '́tʹ?sja⁀', '́cca⁀')
			pron = strsub(pron, '()tʹ?sja⁀', '%1ca⁀')
		end
		return pron
	end
	
	--split by word and process each word
	word = strsplit(text, " ", true)
	for i = 1, #word do
		word = final_tsja_processing(word, i)
	end
	text = concat(word, " ")

	--phonetic substitutions of various sorts
	for _, phonsub in ipairs(phonetic_subs) do
		text = strsub(text, phonsub, phonsub)
	end

	--voicing, devoicing
	--NOTE: v before an obstruent assimilates in voicing and triggers voicing
	--assimilation of a preceding consonant; neither happens before a sonorant
	--1. absolutely final devoicing
	text = strsub(text, '()(ʹ?⁀)$', function(a, b)
		return devoicing .. b end)
	--2. word-final devoicing before another word
	text = strsub(text, '()(ʹ?⁀ ⁀)', function(a, b)
		return devoicing .. b end)
	--3. voicing/devoicing assimilation; repeat to handle recursive assimilation
	while true do
		local new_text = strsub(text, '()(*)', function(a, b)
			return devoicing .. b end)
		new_text = strsub(new_text, '()(*v?*)', function(a, b)
			return voicing .. b end)
		if new_text == text then
			break
		end
		text = new_text
	end

	--re-notate orthographic geminate consonants
	text = strsub(text, '()' .. '%1', '%1ː')
	text = strsub(text, '()' .. '%(%1%)', '%1(ː)')

	--rewrite iotated vowels
	text = strsub(text, '(j*)()', function(a, b)
		return a .. iotating end)
	-- eliminate j after consonant and before iotated vowel (including
	-- semi-reduced ạ)
	text = strsub(text, '(/?)j()', '%1%2')

	--split by word and process each word
	word = strsplit(text, " ", true)

	for i = 1, #word do
		local pron = word

		-- Check for gemination at prefix boundaries; if so, convert the
		-- regular gemination symbol ː to a special symbol ˑ that indicates
		-- we always preserve the gemination unless gem=n. We look for
		-- certain sequences at the beginning of a word, but make sure that
		-- the original spelling is appropriate as well (see comment above
		-- for geminate_pref).
		if strfind(pron, 'ː') then -- optimize by only doing when gemination present
			local orig_pron = orig_word
			local deac = strsub(pron, accents, '')
			local orig_deac = strsub(orig_pron, accents, '')
			-- the following two are optimizations to reduce the number of regex
			-- checks in the majority of cases with words not beginning with ne-
			-- or po-.
			local is_ne = strfind(orig_deac, '⁀ne')
			local is_po = strfind(orig_deac, '⁀po')
			for _, gempref in ipairs(geminate_pref) do
				local newspell = gempref
				local oldspell = gempref
				-- FIXME! The rsub below will be incorrect if there is
				-- gemination in a joined preposition or particle
				if strfind(orig_deac, '⁀' .. oldspell) and strfind(deac, '⁀' .. newspell) or
					is_po and strfind(orig_deac, '⁀po' .. oldspell) and strfind(deac, '⁀po' .. newspell) or
					is_ne and strfind(orig_deac, '⁀ne' .. oldspell) and strfind(deac, '⁀ne' .. newspell) or
					is_ne and strfind(orig_deac, '⁀nepo' .. oldspell) and strfind(deac, '⁀nepo' .. newspell) then
					pron = strsub(pron, '(⁀*)ː', '%1ˑ')
				end
			end
		end

		--degemination, optional gemination
		local thisgem = gem or 'o'
		if thisgem == 'y' then
			-- leave geminates alone, convert ˑ to regular gemination; ˑ is a
			-- special gemination symbol used at prefix boundaries that we
			-- remove only when gem=n, else we convert it to regular gemination
			pron = strsub(pron, 'ˑ', 'ː')
		elseif thisgem == 'o' then
			-- make geminates optional, except for ɕӂ, also ignore left paren
			-- in (ː) sequence
			pron = strsub(pron, '()', '%1(ː)')
		elseif thisgem == 'n' then
			-- remove gemination, except for ɕӂ
			pron = strsub(pron, '()', '%1')
		else
			-- degeminate l's
			pron = strsub(pron, '(l)ː', '%1')
			-- preserve gemination between vowels immediately after the stress,
			-- special gemination symbol ˑ also remains, ɕӂ remain geminated,
			-- žn remain geminated between vowels even not immediately after
			-- the stress, n becomes optionally geminated when after but not
			-- immediately after the stress, ssk and zsk remain geminated
			-- immediately after the stress, else degeminate; we signal that
			-- gemination should remain by converting to special symbol ˑ,
			-- then removing remaining ː not after ɕӂ and left paren; do
			-- various subs repeatedly in case of multiple geminations in a word
			-- 1. immediately after the stress
			pron = strsubrep(pron, '(' .. vowels .. stress_accents .. ')ː(' .. vowels .. ')', '%1ˑ%2')
			-- 2. remaining geminate n after the stress between vowels
			pron = strsubrep(pron, '(' .. stress_accents .. '.-' .. vowels .. accents .. '?n)ː(' .. vowels .. ')', '%1(ː)%2')
			-- 3. remaining ž and n between vowels
			pron = strsubrep(pron, '(' .. vowels .. accents .. '?)ː(' .. vowels .. ')', '%1ˑ%2')
			-- 4. ž word initially before vowels (жжение, жжём, etc.)
			pron = strsubrep(pron, '(⁀ž)ː(' .. vowels .. ')', '%1ˑ%2')
			-- 5. ssk (and zsk, already normalized) immediately after the stress
			pron = strsub(pron, '(' .. vowels .. stress_accents .. '*s)ː(k)', '%1ˑ%2')
			-- 6. eliminate remaining gemination, except for ɕː and ӂː
			pron = strsub(pron, '()ː', '%1')
			-- 7. convert special gemination symbol ˑ to regular gemination
			pron = strsub(pron, 'ˑ', 'ː')
		end

		-- handle soft and hard signs, assimilative palatalization
		-- 1. insert j before i when required
		pron = strsub(pron, 'ʹi', 'ʹji')
		-- 2. insert glottal stop after hard sign if required
		pron = strsub(pron, 'ʺ()', 'ʔ%1')
		-- 3. (ь) indicating optional palatalization
		pron = strsub(pron, '%(ʹ%)', '⁽ʲ⁾')
		-- 4. assimilative palatalization of consonants when followed by
		--    front vowels or soft sign
		pron = strsub(pron, '()(*)', '%1ʲ%2')
		pron = strsub(pron, '()(*)', '%1ʲ%2')
		-- 5. remove hard and soft signs
		pron = strsub(pron, "", "")

		-- reduction of unstressed word-final -я, -е; but special-case
		-- unstressed не, же. Final -я always becomes ; final -е may
		-- become , ,  or  depending on the part of speech and
		-- the preceding consonants/vowels.
		pron = strsub(pron, '⁀', 'ə⁀')
		pron = strsub(pron, '⁀nʲe⁀', '⁀nʲi⁀')
		pron = strsub(pron, '⁀že⁀', '⁀žy⁀')
		-- function to fetch the appropriate value for ending and part of
		-- speech, handling aliases and defaults and converting 'e' to 'ê'
		-- so that the unstressed  sound is preserved
		local function fetch_e_sub(ending)
			local sub = fetch_pos_property(i, ending)
			if sub == 'e' then
				-- add TEMPCFLEX (which will be converted to CFLEX) to preserve
				-- the unstressed  sound, which will otherwise be converted
				-- to ; we do this instead of adding CFLEX directly because
				-- we later convert some instances of the resulting 'e' to
				-- 'i', and we don't want to do this when the user explicitly
				-- wrote a Cyrillic е with a circumflex on it. [NOTE that
				-- formerly applied when we added CFLEX directly: DO NOT
				-- use ê here directly because it's a single composed char,
				-- when we need the e and accent to be separate.]
				return 'e' .. TEMPCFLEX
			else
				return sub
			end
		end
		if new_final_e_code then
			-- as requested by Atitarev, final unstressed -ɛ should be unreduced
			pron = strsub(pron, 'ɛ⁀', 'ɛ' .. TEMPCFLEX .. '⁀')
			-- handle substitutions in two parts, one for vowel+j+e sequences
			-- and the other for cons+e sequences
			pron = strsub(pron, vowels_c .. '(' .. accents .. '?j)ë⁀', function(v, ac)
				 local ty = v == 'o' and 'oe' or 've'
				 return v .. ac .. fetch_e_sub(ty) .. '⁀'
			end)
			-- consonant may palatalized, geminated or optional-geminated
			pron = strsub(pron, '(.)(ʲ?*)⁀', function(ch, mod)
				 local ty = ch == 'j' and 'je' or
					strfind(ch, '') and 'hardsib' or
					strfind(ch, '') and 'softsib' or
					'softpaired'
				 return ch ..mod .. fetch_e_sub(ty) .. '⁀'
			end)
			if final_e_non_pausal then
				-- final  should become  when not followed by pause or
				-- end of utterance (in other words, followed by space plus
				-- anything but a pause symbol, or followed by tie bar).
				pron = strsub(pron, 'e' .. TEMPCFLEX .. '⁀‿', 'i⁀‿')
				if i < #word and word ~= '⁀|⁀' then
					pron = strsub(pron, 'e' .. TEMPCFLEX .. '⁀$', 'i⁀')
				end
			end
			-- now convert TEMPCFLEX to CFLEX; we use TEMPCFLEX so the previous
			-- two regexps won't affect cases where the user explicitly wrote
			-- a circumflex
			pron = strsub(pron, TEMPCFLEX, CFLEX)
		else
			-- Do the old way, which mostly converts final -е to schwa, but
			-- has highly broken retraction code for vowel +  + е (but
			-- not with accent on vowel!) before it that causes final -е in
			-- this circumstance to become , and a special hack for кое-.
			pron = strsub(pron, vowels_c .. '(*)', '%1%2ɛ')
			pron = strsub(pron, '⁀ko(' .. stress_accents .. ')jë⁀', '⁀ko%1ji⁀')
			pron = strsub(pron, '⁀', 'ə⁀')
		end

		-- retraction of е and и after цшж
		pron = strsub(pron, '(*)()', function(a, b)
			return a .. retracting end)

		--syllabify, inserting @ at syllable boundaries
		--1. insert @ after each vowel
		pron = strsub(pron, '(' .. vowels .. accents .. '?)', '%1@')
		--2. eliminate word-final @
		pron = strsub(pron, '@+⁀$', '⁀')
		--3. move @ forward directly before any ‿⁀, as long as at least
		--   one consonant follows that; we will move it across ‿⁀ later
		pron = strsub(pron, '@(*)(+)', '%1@%2')
		--4. in a consonant cluster, move @ forward so it's before the
		--   last consonant
		pron = strsub(pron, '@(*)(ʲ?*‿?)', '%1@%2')
		--5. move @ backward if in the middle of a "permanent onset" cluster,
		--   e.g. sk, str, that comes before a vowel, putting the @ before
		--   the permanent onset cluster
		pron = strsub(pron, '(?)(_*)()(_*)@()(ʲ?**)', function(a, aund, b, bund, c, d)
			if perm_syl_onset or c == 'j' and strfind(b, '') then
				return '@' .. a .. aund .. b .. bund .. c .. d
			elseif perm_syl_onset then
				return a .. aund .. '@' .. b .. bund .. c .. d
			end end)
		--6. if / is present (explicit syllable boundary), remove any @
		--   (automatic boundary) and convert / to @
		if strfind(pron, '/') then
			pron = strsub(pron, '+', function(x)
				if strfind(x, '/') then
					x = strsub(x, '@', '')
					x = strsub(x, '/', '@')
				end
				return x
			end)
		end
		--7. remove @ followed by a final consonant cluster
		pron = strsub(pron, '@(+⁀)$', '%1')
		--8. remove @ preceded by an initial consonant cluster (should only
		--   happen when / is inserted by user or in цз, чж sequences)
		pron = strsub(pron, '^(⁀+)@', '%1')
		--9. make sure @ isn't directly before linking ‿⁀
		pron = strsub(pron, '@(+)', '%1@')

		-- handle word-initial unstressed o and a; note, vowels always
		-- followed by at least one char because of word-final ⁀
		-- do after syllabification because syllabification doesn't know
		-- about ɐ as a vowel
		pron = strsub(pron, '^⁀()', '⁀ɐ%1')

		--split by syllable
		local syllable = strsplit(pron, '@', true)

		--create set of 1-based syllable indexes of stressed syllables
		--(acute, grave, circumflex)
		local stress = {}
		for j = 1, #syllable do
			if strfind(syllable, stress_accents) then
				stress = "real"
			elseif strfind(syllable, CFLEX) then
				stress = "cflex"
			end
		end

		-- iterate syllable by syllable to handle stress marks, vowel allophony
		local syl_conv = {}
		for j = 1, #syllable do
			local syl = syllable

			local alnum

			--vowel allophony
			if stress then
				-- convert acute/grave/circumflex accent to appropriate
				-- IPA marker of primary/secondary/unmarked stress
				alnum = 1
				syl = strsub(syl, '(.*)́', 'ˈ%1')
				syl = strsub(syl, '(.*)̀', 'ˌ%1')
				syl = strsub(syl, CFLEX, '')
			elseif stress == "real" then
				-- special-casing written а immediately before the stress,
				-- but only for primary/secondary stress, not circumflex
				alnum = 2
			else
				alnum = 3
			end
			syl = strsub(syl, vowels_c, function(a)
				if a ~= '' then
					return allophones
				end end)
			syl_conv = syl
		end

		pron = concat(syl_conv, "")

		-- Optional (j) before ɪ, which is always unstressed; not following
		-- consonant across a joined word boundary
		pron = strsub(pron, '(⁀‿⁀)jɪ', '%1' .. TEMPSUB .. 'ɪ')
		pron = strsub(pron, '⁀jɪ', '⁀(j)ɪ')
		pron = strsub(pron, '()jɪ', "%1(j)ɪ")
		pron = strsub(pron, TEMPSUB, 'j')

		--consonant assimilative palatalization of tn/dn/sn/zn, depending on
		--whether  precedes
		pron = strsub(pron, '(?)(*)(*nʲ)', function(a, b, c)
			if a == '' then
				return a .. b .. 'ʲ' .. c
			else
				return a .. b .. '⁽ʲ⁾' .. c
			end end)

		--consonant assimilative palatalization of st/zd, depending on
		--whether  precedes
		pron = strsub(pron, '(?)(?)(*ʲ)', function(a, b, c)
			if a == '' then
				return a .. b .. 'ʲ' .. c
			else
				return a .. b .. '⁽ʲ⁾' .. c
			end end)

		--general consonant assimilative palatalization
		pron = strsubrep(pron, '()(*)(ʲ)', function(a, b, c)
			if cons_assim_palatal then
				return a .. 'ʲ' .. b .. c
			elseif cons_assim_palatal then
				return a .. '⁽ʲ⁾' .. b .. c
			else
				return a .. b .. c
			end end)

		-- further assimilation before alveolopalatals
		pron = strsub(pron, 'n(*)()', 'nʲ%1%2')

		-- optional palatal assimilation of вп, вб only word-initially
		pron = strsub(pron, '⁀(*)(*ʲ)', '⁀%1⁽ʲ⁾%2')

		-- optional palatal assimilation of бв but not in обв-
		pron = strsub(pron, 'b(*vʲ)', 'b⁽ʲ⁾%1')
		if strfind(word, '⁀o' .. accents .. '?bv') then
			-- ə in case of a word with a preceding preposition
			pron = strsub(pron, '⁀(**)b⁽ʲ⁾(*vʲ)', '⁀%1b%2')
		end

		-- palatalized labials before /j/ should be optionally palatalized
		pron = strsub(pron, '()ʲ(*j)', '%1⁽ʲ⁾%2')

		-- Word-final -лся (normally in past verb forms) should have optional
		-- palatalization. Need to rewrite as -лсьа to defeat this.
		-- FIXME: Should we move this to phonetic_subs?
		if strfind(word, 'ls⁀') then
			pron = strsub(pron, 'lsʲə⁀', 'ls⁽ʲ⁾ə⁀')
		end

		word = pron
	end

	text = concat(word, " ")

	-- Front a and u between soft consonants. If between a soft and
	-- optionally soft consonant (should only occur in that order, shouldn't
	-- ever have a or u preceded by optionally soft consonant),
	-- split the result into two. We only split into two even if there
	-- happen to be multiple optionally fronted a's and u's to avoid
	-- excessive numbers of possibilities (and it simplifies the code).
	-- 1. First, temporarily add soft symbol to inherently soft consonants.
	text = strsub(text, '()', '%1ʲ')
	-- 2. Handle case of  between two soft consonants
	text = strsubrep(text, '(ʲ*)()(?.ʲ)', function(a, b, c)
		return a .. fronting .. c end)
	-- 3. Handle  between soft consonant and optional j, which is still fronted
	text = strsubrep(text, '(ʲ*)()(?%(jʲ%))', function(a, b, c)
			return a .. fronting .. c end)
	-- 4. Handle case of  between soft and optionally soft consonant
	if strfind(text, 'ʲ*?.⁽ʲ⁾') then
		local opt_hard = strsub(text, '(ʲ*)()(?.)⁽ʲ⁾', '%1%2%3')
		local opt_soft = strsub(text, '(ʲ*)()(?.)⁽ʲ⁾', function(a, b, c)
			return a .. fronting .. c .. 'ʲ' end)
		text = { opt_hard, opt_soft }
	else
		text = { text }
	end

	for i, pronunciation in ipairs(text) do
		-- 5. Undo addition of soft symbol to inherently soft consonants.
		pronunciation = strsub(pronunciation, '()ʲ', '%1')

		-- convert special symbols to IPA
		pronunciation = strsub(pronunciation, 'ʲ', translit_conv_j)
		pronunciation = strsub(pronunciation, '', translit_conv)

		-- Assimilation involving hiatus of ɐ and ə
		pronunciation = strsub(pronunciation, 'ə(*)', 'ɐ%1ɐ')

		-- Use ɫ for dark l
		pronunciation = strsub(pronunciation, 'l()', 'ɫ%1')

		-- eliminate ⁀ symbol at word boundaries
		-- eliminate _ symbol that prevents assimilations
		-- eliminate pseudoconsonant at beginning of suffixes or end of prefixes
		text = strsub(pronunciation, '', '')
	end

	return text
end

local function normalizar(t, gem_, pos_)
	t = strlower(t)
	
	if strfind(t, "") then
		error("El título o la ayuda deben estar en CIRILICO!")
	end
	--[[
	if strfind(t, "ч") then
		track("?")
	end
	if strfind(t, "ч") then
		track("??")
	end
	if strfind(t, CFLEX) then
		track("circun")
	end
	if strfind(t, DUBGR) then
		track("doble tilde?")
	end
	]]--

	t = strsub(t, "``", DUBGR)
	t = strsub(t, "`", GR)
	t = strsub(t, "@", DOTABOVE)
	t = strsub(t, "%^", CFLEX)
	t = strsub(t, DUBGR, CFLEX)

	-- translit doesn't always convert э to ɛ (depends on whether a consonant
	-- precedes), so do it ourselves before translit
	t = strsub(t, 'э', 'ɛ')
	-- vowel + йе should have double jj, but the translit module will translit
	-- it the same as vowel + е, so do it ourselves before translit
	t = strsub(t, '(' .. opt_accent .. ')й()',
		'%1йй%2')
	-- transliterate and decompose Latin vowels with accents, recomposing
	-- certain key combinations; don't include accent on monosyllabic ё, so
	-- that we end up without an accent on such words. NOTE: Not clear we
	-- need to be decomposing like this any more, although it is still
	-- useful if the user supplies Latin text, which we allow (although
	-- undocumented).
	t = decompose(m_ru_translit.tr_after_fixes(t))

	-- handle old ě (e.g. сѣдло́), ǒ (e.g. сѣ̈дла) and ǫ (e.g. ея̈)
	t = t:gsub("ě", "e")
		:gsub("ǒ", "o")
		:gsub("ǫ", "o")
	-- handle sequences of accents (esp from ё with secondary/tertiary stress)
	t = strsub(t, accents .. '+(' .. accents .. ')', '%1')
	
	t = strsubrep(t, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	t = strsubrep(t, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	
	-- necesitamos poner una seudoconsonante si es un prefijo o sufijo para que genere bien la pronunciación
	t = strsub(t, '^%s*', PSEUDOCONS)
	t = strsub(t, '%s*$', PSEUDOCONS)
	
	t = strsubrep(t, "", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
	t = strsubrep(t, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    t = strsubrep(t, "%s+", " ")
	t = strstrip(t, "+")
	
	local gem, pos = {}, {}
	local k = 1
	for p in strmatchit(t, "+") do
		if p == "|" then
			insert(gem, "o")
			insert(pos, "def")
		else
			insert(gem, gem_)
			insert(pos, pos_)
			k = k + 1
		end
	end
	
	-- Add primary stress to single-syllable words preceded or followed by
	-- unstressed particle or preposition. Add "tertiary" stress to remaining
	-- single-syllable words that aren't a particle, preposition, prefix or
	-- suffix and don't already bear an accent (including force-reduction
	-- accents, i.e. dot-above/dot-below); "tertiary stress" means a vowel is
	-- treated as stressed for the purposes of vowel reduction but isn't
	-- marked with a primary or secondary stress marker; we repurpose a
	-- circumflex for this purpose. We need to preserve the distinction
	-- between spaces and hyphens because (1) we only recognize certain
	-- post-accentless particles following a hyphen (to distinguish e.g.
	-- 'то' from '-то'); (2) we only recognize certain pre-accentless
	-- particles preceding a space (to distinguish particles 'о' and 'а' from
	-- spelled letters о and а, which should not be reduced); and (3) we
	-- recognize hyphens for the purpose of marking unstressed prefixes and
	-- suffixes.
	local word = strsplit(t, "(+)")
	for i = 1, #word do
		-- check for single-syllable words that need a stress; they must meet
		-- the following conditions:
		-- 1. must not be an accentless word, which is any of the following:
		--         1a. in the "pre" class, or
		if not word == "|" and not (accentless] or
				-- 1b. in the "prespace" class if followed by space and another word, or
				i < #word - 1 and accentless] and word == " " or
				-- 1c. in the "post" class if preceded by another word and
				--     not followed by a hyphen (this is because words like
				--     ка and же are also used for spelling initialisms), or
				i > 2 and accentless] and word ~= "-" or
				-- 1d. in the "posthyphen" class preceded by a hyphen and another word
				--     (and not followed by a hyphen, see 1c);
				i > 2 and accentless] and word == "-" and word ~= "-") and
		-- 2. must be one syllable;
			strlen(strsub(word, '', '')) == 1 and
		-- 3. must not have any accents (including dot-above, forcing reduction);
			not strfind(word, accents) and
		-- 4. must not be a prefix or suffix, identified by a preceding or trailing hyphen, i.e. one of the following:
		--         4a. utterance-initial preceded by a hyphen, or
			not (i == 3 and word == "-" and word == "" or
			    -- 4b. non-utterance-initial preceded by a hyphen, or
				i >= 3 and word == " -" or
			    -- 4c. utterance-final followed by a hyphen, or
				i == #word - 2 and word == "-" and word == "" or
			    -- 4d. non-utterance-final followed by a hyphen;
				i <= #word - 2 and word == "- ") then

		-- OK, we have a stressable single-syllable word; either add primary
		-- or tertiary stress:
		-- 1. add primary stress if preceded or followed by an accentless word,
			if (i > 2 and accentless] or
				i > 2 and word == " " and accentless] or
				i < #word - 1 and accentless] and word ~= "-" or
				i < #word - 1 and word == "-" and accentless] and word ~= "-") then
				word = strsub(word, vowels_c, '%1' .. AC)
		-- 2. else add tertiary stress
			else
				word = strsub(word, vowels_c, '%1' .. CFLEX)
			end
		end
	end
	
	-- make unaccented prepositions and particles liaise with the following or
	-- preceding word; in the process, fix up number of elements in gem/pos
	-- tables so there's a single element for the combined word
	local real_word_index = 0
	for i = 1, #word do
		if (i % 2) == 1 and word ~= "|" then
			real_word_index = real_word_index + 1
		end
		if i < #word - 1 and (accentless] or accentless] and word == " ") and
			-- don't add ‿ onto the end of a prefix; a prefix is a word followed by a hyphen that is in turn
			-- followed by a space or end of terms; note that ends of terms after a hyphen are marked by a blank
			-- string due to the way capturing_split() works
			not (word == "-" and (word == " " or word == "" and i == #word - 2)) then
			word = '‿'
			remove(gem, real_word_index)
			remove(pos, real_word_index)
		elseif i > 2 and (accentless] and word ~= "-" or
				accentless] and word == "-" and word ~= "-") then
			word = '‿'
			-- for unaccented words that liaise with the preceding word,
			-- remove the gemination spec corresponding to the unaccented word
			-- because the gemination in question is almost certainly in the
			-- preceding word, but remove the POS spec corresponding to the
			-- preceding word because it's the final -е of the unaccented word
			-- that the POS will refer to
			remove(gem, real_word_index)
			remove(pos, real_word_index - 1)
		end
	end
	
	t = concat(word, "")
	
	-- add a ⁀ at the beginning and end of every word and at close juncture
	-- boundaries; we will remove this later but it makes it easier to do
	-- word-beginning and word-end rsubs
	t = strsub(t, ' ', '⁀ ⁀')
	t = strstrip(t)
	t = '⁀' .. t .. '⁀'
	t = strsub(t, '‿', '⁀‿⁀')
	
	return t, gem, pos
end

local function generar_pron(text, adj, gem, pos, zhpal, is_transformed)
	if not is_transformed then
		local origtext, transformed_text = m_ru_translit.apply_tr_fixes(text)
		text = transformed_text
	end
	
	text, gem, pos = normalizar(text, gem, pos)

	-- At this point, the spelling has been normalized (see the comment to
	-- ru_ipa_main() below). Now we need to handle any pronunciation-spelling
	-- variants (particularly, handling зж and жж, which have both
	-- non-palatalized and palatalized variants except at prefix boundaries)
	-- and convert each variant to IPA.

	local alltext

	-- If zž or žž occur not at a prefix boundary, then generate two variants,
	-- the first with non-palatal  and the second with  (potentially
	-- with nearby vowels affected appropriately for the palatalization
	-- difference). But don't do this if zhpal=n.
	if zhpal == 'n' or not strfind(text, 'ž') then
		-- speed up the majority of cases where ž doesn't occur
		alltext = {text}
	else
		-- First, go through and mark all prefix boundaries where a ž directly 
		-- follows the prefix by inserting a ˑ between prefix and ž. This
		-- prevents us from generating the  variant (notated internally as
		-- ӂӂ). Don't do this if zhpal=y, which defeats this check.
		if zhpal ~= 'y' then
			for _, gempref in ipairs(geminate_pref) do
				local origspell = gempref
				local is_zh = gempref
				if is_zh then
					-- allow all vowels to have accents following them
					origspell = strsub(origspell, vowels_c, '%1' .. accents .. '?')
					text = strsub(text, '(⁀' .. origspell .. ')ž', '%1ˑž')
					text = strsub(text, '(⁀po' .. origspell .. ')ž', '%1ˑž')
					text = strsub(text, '(⁀ne' .. origspell .. ')ž', '%1ˑž')
					text = strsub(text, '(⁀nepo' .. origspell .. ')ž', '%1ˑž')
				end
			end
		end
		-- Then, if zž or žž are present (which will exclude prefix boundaries
		-- because a ˑ marker will intervene), generate the two possibilities,
		-- else generate only one.
		local alltext1
		if strfind(text, 'ž') then
			alltext1 = {text, strsub(text, 'ž', 'ӂӂ')}
		else
			alltext1 = {text}
		end
		-- Finally, remove the ˑ marker.
		alltext = {}
		for _, text in ipairs(alltext1) do
			insert(alltext, strsub(text, 'ˑ', ''))
		end
	end

	-- Now generate the pronunciation(s) for each of the spelling variants
	-- we generate above. (In some cases there are multiple pronunciation
	-- variants generated, e.g. in the sequence palatalized consonant + a/u +
	-- optionally palatalized consonant.)
	local allpron = {}
	for _, t in ipairs(alltext) do
		local thispron = ru_ipa_main(t, adj, gem, pos)
		for _, pron in ipairs(thispron) do
			insert(allpron, strhtml(pron))
		end
	end

	return {allpron}
end

function export.procesar_pron_args(titulo, args)
	local tit = titulo
	local vino_ayuda, x

	if #args < 1 then
		args = tit
	else
		vino_ayuda = true
	end

	if #args < 1 and #args < 1 then
		x = pron_abc]
		if x then
			args = x
			args = x
		end

		local A = #args
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local gem, cg, zhpal, adjno, shtono = {}, {}, {}, false, false
			local flags = args and strsplit(args, ";") or {}
			for _,flag in ipairs(flags) do
				if flag == "gemsí" or flag == "gemsi" or flag == "gems" then
					insert(gem, "y")
				elseif flag == "gemno" or flag == "gemn" then
					insert(gem, "n")
				elseif flag == "gemop" then
					insert(gem, "o")
				elseif flag == "zhpalno" or flag == "zhpaln" then
					insert(zhpal, "n")
				elseif flag == "zhpalsí" or flag == "zhpalsi" or flag == "zhpals" then
					insert(zhpal, "y")
				elseif flag == "adjno" or flag == "adjn" then
					adjno = true
				elseif flag == "shtono" or flag == "shton" then
					shtono = true
				elseif pos_properties then
					insert(cg , flag)
				end
			end
			local origtext, transformed_text = m_ru_translit.apply_tr_fixes(args,
			adjno, shtono)
			args =  m_ru_translit.tr_after_fixes(transformed_text)
			if vino_ayuda then
				args = {origtext}
			end
			local fone = generar_pron(transformed_text, nil, gem, cg, zhpal, true)
			for i,_ in ipairs(fone) do
				insert(args, fone)
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end
	end

	local tiene_espacios = strfind(tit, "%s")
	if args and args then
		local rim = strsub(args, ".*%s(+)$", "%1") -- me quedo con la última palabra
		rim = strsub(rim, "^.*ˈ(.-)$", "%1")
		args = strsub(rim, ".-".."(.*"..")".."$", "%1")
	end

	return args
end


return export
Módulo:generar-pron/ru

Separar Módulo:generar-pron/ru en sílabas

Listado de errores ortográficos de Módulo:generar-pron/ru

Aquí puedes ir al link que te lleva a una lista con los principales errores ortográficos, de forma que estés atento y sepas el modo de no incurrir en ellos.Sin más demora, aquí tienes el listado de errores ortográficos de Módulo:generar-pron/ru

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot