local export = {}
-- ɟ is used internally to represent
function export.show(word, LatinAmerica, phonetic, do_debug)
local debug = {}
if type(word) == 'table' then
do_debug = word.args
word = word.args
end
local orig_word = word
word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
word = mw.ustring.gsub(word, "", "")
table.insert(debug, word)
local V = "" -- vowel
local W = ""
local C = "" -- consonant
--determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix
word = mw.ustring.gsub(word, "y(" .. C .. ")", "i%1")
word = mw.ustring.gsub(word, "y(" .. V .. ")", "ɟ%1") -- not the real sound
word = mw.ustring.gsub(word, "hi(" .. V .. ")", "ɟ%1")
word = mw.ustring.gsub(word, "y$", "ï")
word = mw.ustring.gsub(word, "mente$", "ménte")
--x
word = mw.ustring.gsub(word, "x", "ks")
--"c" & "g" before "i" and "e" and all that stuff
word = mw.ustring.gsub(word, "c()", (LatinAmerica and 's' or 'θ') .. "%1")
word = mw.ustring.gsub(word, "gü()", "ɡw%1")
word = mw.ustring.gsub(word, "ü", "")
word = mw.ustring.gsub(word, "gu()", "ɡ%1")
word = mw.ustring.gsub(word, "g()", "x%1")
table.insert(debug, word)
--alphabet-to-phoneme
word = mw.ustring.gsub(word, "qu", "c")
word = mw.ustring.gsub(word, "ch", "t͡ʃ")
word = mw.ustring.gsub(word, '',
--='ɡ': U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{='k', ='ɡ', ='x', ='ɲ', ='ɾ', ='b', ='ʃ'})
-- trill in #r, lr, nr, rr
local match_count = 0
word = mw.ustring.gsub(
word,
'(.?)ɾ(.?)',
function (before, after)
match_count = match_count + 1
-- mw.log(word, before, after)
if match_count == 1 and before == '' or before == 'l' or before == 'n'
or after ~= '' and ('bdfɡklʎmnɲpstxzʃɟ'):match(after) then
return before .. 'r' .. after
elseif before == 'ɾ' then
return 'r' .. after
elseif after == 'ɾ' then
return before .. 'r'
end
end)
word = mw.ustring.gsub(word, 'n()', 'm%1')
word = mw.ustring.gsub(word, 'll', LatinAmerica and 'ɟ' or 'ʎ')
word = mw.ustring.gsub(word, 'z', LatinAmerica and 'z' or 'θ') -- not the real LatAm sound
table.insert(debug, word)
--syllable division
for _ = 1, 2 do
word = mw.ustring.gsub(word,
"(" .. V .. ")(" .. C .. W .. "?" .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,
"(" .. V .. C .. ")(" .. C .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,
"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
"%1.%2")
end
word = mw.ustring.gsub(word, "()%.()", ".%1%2")
word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
word = mw.ustring.gsub(word, "()()", "%1.%2")
word = mw.ustring.gsub(word, "()()", "%1.%2")
word = mw.ustring.gsub(word, "()()", "%1.%2")
table.insert(debug, word)
--diphthongs
word = mw.ustring.gsub(word, 'ih?()', 'j%1')
word = mw.ustring.gsub(word, 'uh?()', 'w%1')
table.insert(debug, word)
--accentuation
local syllables = mw.text.split(word, "%.")
if mw.ustring.find(word, "") then
for i = 1, #syllables do
if mw.ustring.find(syllables, "") then
syllables = "ˈ"..syllables
end
end
else
if mw.ustring.find(word, "$") then
syllables = "ˈ" .. syllables
else
if #syllables > 1 then
syllables = "ˈ" .. syllables
end
end
end
table.insert(debug, word)
--syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
local remove_accent = { = 'a', = 'e', = 'i', = 'o', = 'u'}
local nasalize = { = 'ã', = 'ẽ', = 'ĩ', = 'õ', = 'ũ' }
for i = 1, #syllables do
syllables = mw.ustring.gsub(syllables, '', remove_accent)
if phonetic and mw.ustring.find(syllables, '' .. C .. '?$') then
syllables = mw.ustring.gsub(syllables, '', nasalize)
end
syllables = mw.ustring.gsub(syllables, '$', { = 'b', = 'd', = 'ɡ' })
end
word = table.concat(syllables)
--real sound of LatAm Z
word = mw.ustring.gsub(word, 'z', 's')
--secondary stress
word = mw.ustring.gsub(word, 'ˈ(.+)ˈ', 'ˌ%1ˈ')
word = mw.ustring.gsub(word, 'ˈ(.+)ˌ', 'ˌ%1ˌ')
word = mw.ustring.gsub(word, 'ˌ(.+)ˈ(.+)ˈ', 'ˌ%1ˌ%2ˈ')
--phonetic transcription
if phonetic then
--θ, s, f before voiced consonants
local voiced = 'mnɲbdɟɡʎ'
local r = 'ɾr'
local tovoiced = {
= 'θ̬',
= 'z',
= 'v',
}
local function voice(sound, following)
return tovoiced..following
end
word = mw.ustring.gsub(word, '()(?)', voice)
word = mw.ustring.gsub(word, '(f)(?)', voice)
local stop_to_fricative = {='β', ='ð', ='ʝ', ='ɣ'}
local fricative_to_stop = {='b', ='d', ='ɟ', ='ɡ'}
--lots of allophones going on
word = mw.ustring.gsub(word, '', stop_to_fricative)
word = mw.ustring.gsub(
word,
'()(?)()',
function (pos, stress, fricative)
-- Matching the character before the fricative in the pattern
-- doesn't work because sometimes there are two fricatives in
-- a row.
local before = pos > 1 and mw.ustring.sub(word, pos - 1, pos - 1)
-- mw.log(orig_word, before, stress, fricative)
if not before or (fricative == 'ɣ' or fricative == 'β') and ('mnɲ'):find(before)
or (fricative == 'ð' or fricative == 'ʝ') and ('lʎmnɲ'):find(before) then
return stress .. fricative_to_stop
end -- else no change
end)
word = mw.ustring.gsub(word, '', {='t̪', ='d̪'})
--nasal assimilation before consonants
local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
'ɱ', 'n̪', 'n̟', 'nʲ', 'ɲ', 'ŋ'
local nasal_assimilation = {
= labiodental,
= dentialveolar, = dentialveolar,
= dental,
= alveolopalatal,
= palatal, = palatal,
= velar, = velar, = velar,
}
word = mw.ustring.gsub(
word,
'n(?)(.)',
function (stress, following)
return (nasal_assimilation or 'n') .. stress .. following
end)
--lateral assimilation before consonants
word = mw.ustring.gsub(
word,
'l(?)(.)',
function (stress, following)
local l = 'l'
if following == 't' or following == 'd' then -- dentialveolar
l = 'l̪'
elseif following == 'θ' then -- dental
l = 'l̟'
elseif following == 'ʃ' then -- alveolopalatal
l = 'lʲ'
end
return l .. stress .. following
end)
--semivowels
word = mw.ustring.gsub(word, '()', '%1̯')
word = mw.ustring.gsub(word, '()', '%1̯')
end
table.insert(debug, word)
word = mw.ustring.gsub(word, 'h', '') --silent "h"
word = mw.ustring.gsub(word, 'ɟ', 'ɟ͡ʝ') --fake "y" to real "y"
word = mw.ustring.gsub(word, 'ï', 'i') --fake "y$" to real "y$"
if do_debug == 'yes' then
return word .. table.concat(debug, "")
else
return word
end
end
function export.LatinAmerica(frame)
return export.show(frame, true)
end
function export.phonetic(frame)
return export.show(frame, false, true)
end
function export.phoneticLatinAmerica(frame)
return export.show(frame, true, true)
end
return export