local export = {}
local m_izh = require("Module:izh")
local m_IPA = require("Module:IPA")
local gsub_lookahead = require("Module:gsub lookahead")
local lang = m_izh.lang
local U = mw.ustring.char
--- <<< DATA START >>> ---
local LONG = "ː"
local SEMILONG = "ˑ"
local STRESS_PRIMARY = "ˈ"
local STRESS_SECONDARY = "ˌ"
local FRONTAL = U(0x0308)
local NONSYLLABIC = U(0x032F)
local TIE = U(0x0361)
local VERYSHORT = U(0x0306)
local PALATAL = "ʲ"
local IPA_VOWELS = "ɑeiouyæøɨə"
local AUTO_STRESS = U(0xEEEE)
local VIRTUAL_BREAK = U(0xEEEC)
local J_PALATALIZE = U(0xEEEA)
local REALLY_JUST_PALATAL = U(0xEEE8)
local VIRTUAL_BREAK_UNGEMINATE = U(0xEEE6)
local IPA_CONSONANTS = m_izh.consonants .. "ɫʃʒ"
local IPA_CONSONANTS_GEMINATABLE = m_izh.consonants_geminatable .. "ɫ" .. "z" .. "ž"
local PALATALIZE = "'"
local UNGEMINATE = "/"
local ANY_DIACRITICS = "*"
local SOME_DIACRITICS = "+"
--- <<< DATA END >>> ---
--- <<< COMMON START >>> ---
local function split_syllables(word, keep_sep_symbols)
local consonant = ""
local consonant_diacritic = ""
local vowel = m_izh.vowel
local consonants_geminatable = IPA_CONSONANTS_GEMINATABLE
local sep_symbols = m_izh.sep_symbols .. VIRTUAL_BREAK
local vowel_sequences = m_izh.vowel_sequences
local res = {}
local syllable = ""
local pos = 1
local found_vowel = false
while pos <= #word do
if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. consonant_diacritic .. "**" .. vowel, pos) then
-- CV: end current syllable if we have found a vowel
if found_vowel then
if #syllable > 0 then table.insert(res, syllable) end
found_vowel = false
syllable = ""
end
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. TIE .. consonant, pos) then
-- /t͡s/
if found_vowel and #syllable > 0 then table.insert(res, syllable) end
syllable = mw.ustring.sub(word, pos, pos + 2)
pos = pos + 3
found_vowel = false
else
-- C: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
if found_vowel then
-- already found a vowel, end current syllable
if #syllable > 0 then
table.insert(res, syllable)
end
syllable = ""
end
found_vowel = true
-- check for diphthongs or long vowels
local seq_ok = false
local search_from = mw.ustring.gsub(mw.ustring.lower(mw.ustring.sub(word, pos)), "", "")
for k, v in pairs(vowel_sequences) do
if mw.ustring.find(search_from, "^" .. v) then
seq_ok = true
break
end
end
if seq_ok then
local total = mw.ustring.len(select(3, mw.ustring.find(mw.ustring.lower(word), "^(" .. vowel .. "*" .. vowel .. ")", pos)))
syllable = syllable .. mw.ustring.sub(word, pos, pos + total - 1)
pos = pos + total
else
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
elseif mw.ustring.find(mw.ustring.lower(word), "^", pos) then
-- separates syllables
if #syllable > 0 then
table.insert(res, syllable)
end
local sepchar = mw.ustring.sub(word, pos, pos)
syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or ""
pos = pos + 1
found_vowel = false
else
-- ?: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
end
if #syllable > 0 then
table.insert(res, syllable)
end
return res
end
local function zeroth_round_of_common_replacements(text)
text = mw.ustring.gsub(text, "ts", "t͡s")
text = mw.ustring.gsub(text, "()(" .. m_izh.vowel .. ")" .. UNGEMINATE .. "i", "%1%2" .. VIRTUAL_BREAK_UNGEMINATE .. "i")
return text
end
local function first_round_of_common_replacements(text)
text = mw.ustring.gsub(text, "n", {
= "ŋk",
= "ŋg",
})
text = mw.ustring.gsub(text, "", {
= "ɑ",
= "æ",
= "ø",
= "ʋ",
= "ɨ",
= ".",
-- = STRESS_SECONDARY,
})
return text
end
local function second_round_of_common_replacements(text, do_palatal_repls)
text = mw.ustring.gsub(text, "", {
= "t͡ʃ",
= "ʃ",
= "ʒ"
})
text = mw.ustring.gsub(text, "h()", "x%1")
if do_palatal_repls then
text = mw.ustring.gsub(text, "()" .. PALATAL, {
= "ɲ",
= "ɕ",
= "ʑ"
})
end
text = mw.ustring.gsub(text, "ɫ" .. PALATAL, "l" .. PALATAL)
text = mw.ustring.gsub(text, "g", "ɡ")
return text
end
local function automatic_palatalization(text, filter)
return mw.ustring.gsub(text, "(" .. filter .. ")j(.?)", function (prev, next)
if next == PALATALIZE then
return prev .. PALATAL .. next
else
return prev .. PALATAL .. LONG .. next
end
end)
end
local function manual_palatalization(text)
if mw.ustring.find(text, PALATALIZE) then
text = mw.ustring.gsub(text, "()" .. PALATALIZE, "%1" .. PALATAL)
text = mw.ustring.gsub(text, PALATALIZE, "")
text = mw.ustring.gsub(text, PALATAL .. PALATAL, PALATAL)
end
text = mw.ustring.gsub(text, "(t)()(t" .. PALATAL .. ")", "%1" .. PALATAL .. "%2%3")
return text
end
local IPA_diphthongs = {
"i",
"u",
"y"
}
local function long_vowels_and_diphthongs(text)
text = mw.ustring.gsub(text, "()%1", "%1" .. LONG)
for _, diphthong in ipairs(IPA_diphthongs) do
local mod_diphthong
if mw.ustring.find(diphthong, "%]$") then
mod_diphthong = mw.ustring.gsub(diphthong, "(.)(%]-%])", "%1" .. VERYSHORT .. "?%2")
mod_diphthong = mw.ustring.gsub(diphthong, "(%]-%])(%]-%])", "%1" .. VERYSHORT .. "?%2")
else
mod_diphthong = mw.ustring.sub(diphthong, 1, -2) .. VERYSHORT .. "?" .. mw.ustring.sub(diphthong, -1, -1)
end
text = mw.ustring.gsub(text, "(" .. mod_diphthong .. ")", "%1" .. NONSYLLABIC)
end
return text
end
local function long_consonants(text)
text = mw.ustring.gsub(text, "(%a)%1", "%1" .. LONG)
text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
return text
end
local function standard_sandhi(text)
text = mw.ustring.gsub(text, "n(*)", "m%1")
text = mw.ustring.gsub(text, "n(*)", "ŋ%1")
return text
end
local function add_primary_stress(text)
text = mw.ustring.gsub(text, AUTO_STRESS, "-")
text = mw.ustring.gsub(text, "-%.", "-")
text = mw.ustring.gsub(text, "-", STRESS_SECONDARY)
return mw.ustring.toNFC(STRESS_PRIMARY .. mw.ustring.gsub(text, " ", " " .. STRESS_PRIMARY))
end
local function is_stressed_syllable(syllable)
return mw.ustring.find(syllable, "^")
end
local function add_secondary_stress(syllables)
local distance = 0
for index, syllable in ipairs(syllables) do
if index == #syllables then break end
local stressed = index == 1 or is_stressed_syllable(syllable)
if stressed then
distance = 0
else
distance = distance + 1
if distance == 2 then
distance = 0
if not is_stressed_syllable(syllables) then
syllables = AUTO_STRESS .. syllable
end
end
end
end
end
local function clean_virtual_break(text)
if mw.ustring.find(text, "") then
local cleaned = mw.ustring.gsub(mw.ustring.gsub(text, "", ""), VIRTUAL_BREAK_UNGEMINATE, UNGEMINATE)
local syllables = split_syllables(cleaned, true)
add_secondary_stress(syllables)
text = table.concat(syllables)
end
return text
end
local function clean_ungeminate(text)
return mw.ustring.gsub(text, "", "")
end
local function do_gemination(syllables, diacritic)
local try_to_geminate = false
for index, syllable in ipairs(syllables) do
local stressed = index == 1 or is_stressed_syllable(syllable)
if try_to_geminate and not stressed then
-- check if the initial consonant in this syllable is followed by two vowels
local rest = syllable .. (syllables or "")
if mw.ustring.find(rest, "^*" .. m_izh.vowel .. m_izh.vowel) then
-- CVCVV -> CVC:VV
local cg = select(3, mw.ustring.find(syllable, "^(*)"))
syllables = syllables .. cg
syllables = mw.ustring.gsub(syllable, "^" .. cg, diacritic)
end
end
try_to_geminate = stressed and mw.ustring.find(syllable, "^?*" .. m_izh.vowel .. "$")
end
end
local REDUCED = U(0x0325) .. U(0x0306)
local reduce_final_vowel = {
= "o" .. REDUCED,
= "ø" .. REDUCED,
= "u" .. REDUCED,
= "y" .. REDUCED,
}
local function to_schwa(letter, filter)
return (not filter or mw.ustring.find(letter, "")) and "ə" or letter .. VERYSHORT
end
local function split_syllables_by_words(syllables)
local i = 1
return function()
local r = {}
local e = i
if e <= #syllables then
table.insert(r, (mw.ustring.gsub(syllables, "^%s+", "")))
e = e + 1
while e <= #syllables and not mw.ustring.find(syllables, "^%s") do
table.insert(r, syllables)
e = e + 1
end
i = e
return r
end
end
end
local function do_by_word_syllables(out_syllables, fn)
local old_syllables = {}
for k, v in pairs(out_syllables) do
old_syllables = v
out_syllables = nil
end
local next_word = false
for syllables in split_syllables_by_words(old_syllables) do
fn(syllables)
for i, syllable in ipairs(syllables) do
if next_word and i == 1 then
table.insert(out_syllables, " " .. syllable)
else
table.insert(out_syllables, syllable)
end
end
next_word = true
end
end
local function begins_with_affricate(syllable)
return syllable and mw.ustring.find(syllable, "^" .. TIE)
end
local function do_reduction_internal(syllables, replacement)
local prev_was_stressed = false
local prev_was_long = false
local syllables_since_last_stressed = 0
for index, syllable in ipairs(syllables) do
local stressed = index == 1 or is_stressed_syllable(syllable)
local final = index == #syllables
if stressed then
syllables_since_last_stressed = 0
else
syllables_since_last_stressed = syllables_since_last_stressed + 1
end
prev_was_long = prev_was_long or begins_with_affricate(syllable)
if mw.ustring.find(syllable, "^j'") and prev_was_long then
-- hack. /Cj'/ is one consonant.
local previous_syllable = syllables
if mw.ustring.find(previous_syllable, m_izh.vowel .. "") then
prev_was_long = mw.ustring.find(previous_syllable, m_izh.vowel .. m_izh.vowel) or mw.ustring.find(previous_syllable, m_izh.vowel .. "")
end
end
if not stressed and ((prev_was_stressed and prev_was_long) or (index > 1 and final and (syllables_since_last_stressed > 1 or prev_was_long))) then
syllables = mw.ustring.gsub(syllable, "(" .. m_izh.vowel .. "+)(.*)", function (nucleus, coda) return replacement(nucleus, coda, index) end)
end
-- reduce the next syllable only if the current syllable is stressed and not short
prev_was_stressed = stressed
prev_was_long = mw.ustring.find(syllable, m_izh.vowel .. "")
end
end
local function do_reduction_word(syllables)
do_reduction_internal(syllables, function (nucleus, coda, index)
local final = index == #syllables
local never_open = false
if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
return mw.ustring.sub(nucleus, 1, 1) .. coda
end
if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
if mw.ustring.sub(nucleus, 2) ~= "i" then
coda = mw.ustring.sub(nucleus, 2) .. coda
else
never_open = true
end
nucleus = mw.ustring.sub(nucleus, 1, 1)
end
local open = #coda == 0 and not never_open
if final then
if open then
-- reduced, but simply drop it
return (reduce_final_vowel or "") or coda
else
if coda == "" and reduce_final_vowel then
-- /oi/, /ui/, /yi/, /øi/
return reduce_final_vowel
end
local reduced
if nucleus == "e" then
reduced = "e"
else
reduced = to_schwa(nucleus, "aä")
end
return reduced .. coda
end
else
local next_syllable = syllables
local next_syllable_starts_with_vowel = mw.ustring.find(next_syllable, "^?%.?" .. m_izh.vowel)
local next_syllable_stressed = is_stressed_syllable(next_syllable)
local next_syllable_open = not (mw.ustring.find(next_syllable, "$") or begins_with_affricate(syllables))
if next_syllable_starts_with_vowel then
return nucleus .. coda
elseif next_syllable_stressed then
return to_schwa(nucleus, "aäe") .. coda
elseif next_syllable_open then
return to_schwa(nucleus) .. coda
else
return to_schwa(nucleus, "aäe") .. coda
end
end
end)
end
local function do_coalesce_rhyme_word(syllables)
local vowel = mw.ustring.match(syllables, "^$")
if mw.ustring.match(syllables, "^()$") and #syllables > 1 and not is_stressed_syllable(syllables .. syllables) then
local replacement
local prefinal = mw.ustring.sub(syllables, -1)
if vowel == "a" then
replacement = ({ = "e", = "o", = "o" })
elseif vowel == "ä" then
replacement = ({ = "e", = "ö", = "ö" })
end
if replacement then
syllables = mw.ustring.gsub(mw.ustring.sub(syllables, 1, -2) .. replacement .. replacement, "^" .. AUTO_STRESS, "")
syllables = nil
end
end
end
local function do_reduction_rhyme_word(syllables)
do_reduction_internal(syllables, function (nucleus, coda, index)
local final = index == #syllables
if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
return nucleus .. coda
end
if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
if mw.ustring.sub(nucleus, 2) ~= "i" then
coda = mw.ustring.sub(nucleus, 2) .. coda
else
return nucleus .. coda
end
nucleus = mw.ustring.sub(nucleus, 1, 1)
end
local open = #coda == 0
if final and open then
-- reduced, but simply drop it
return (reduce_final_vowel and nucleus or "") or coda
else
return nucleus .. coda
end
end)
end
local function do_final_vowel_dropping_word(syllables)
if #syllables == 1 or not mw.ustring.find(table.concat(syllables, ""), "" .. m_izh.vowel .. "$") then return end
local final = mw.ustring.sub(syllables, -1, -1)
if reduce_final_vowel then return end
local reduced = {}
for _, syllable in ipairs(syllables) do
table.insert(reduced, syllable)
end
do_reduction_word(reduced)
if not mw.ustring.find(reduced, m_izh.vowel .. "$") then
local leftovers = ""
if mw.ustring.find(syllables, "i$") then
leftovers = REALLY_JUST_PALATAL
end
syllables = mw.ustring.gsub(syllables .. reduced .. leftovers, "^" .. AUTO_STRESS, "")
syllables = nil
end
end
local function do_reduction(syllables)
do_by_word_syllables(syllables, do_reduction_word)
end
local function do_reduction_rhyme(syllables)
do_by_word_syllables(syllables, do_reduction_rhyme_word)
end
local function do_coalesce_rhyme(syllables)
do_by_word_syllables(syllables, do_coalesce_rhyme_word)
end
local function do_final_vowel_dropping(syllables)
do_by_word_syllables(syllables, do_final_vowel_dropping_word)
end
local function do_narrow_l(text)
-- failsafe
if not mw.ustring.find(text, "l") then return text end
if mw.ustring.find(text, "l" .. PALATALIZE) then return text end
local velar_l = "ɫ"
local palatal_l = U(0xEEEF)
text = mw.ustring.gsub(text, "()l(" .. m_izh.consonant .. ")", function (before, after)
if after == "l" or after == "j" then
return before .. "l" .. after
elseif mw.ustring.find(before, "") then
return before .. velar_l .. after
else
return before .. palatal_l .. after
end
end)
local length = mw.ustring.len(text)
local l_indexes = {}
local i = 1
local env = {}
while true do
local index = mw.ustring.find(text, "l", i)
if index == nil then break end
table.insert(l_indexes, index)
i = index + 1
end
local env_tags = {
= "a", = "a", = "a", = "i", = "j",
= "ä", = "ä", = "ä", = "e", = "_",
= "_", = "a"
}
local cleaned = mw.ustring.gsub(text, "", "") .. " "
local env_index = 1
local current_env = "_"
local current_env_before = "_"
local backburner, backburner_count = {}, 0
for c in mw.ustring.gmatch(cleaned, ".") do
if c == "l" then
env = current_env_before
backburner_count = backburner_count + 1
backburner = env_index
env_index = env_index + 1
else
current_env = env_tags or "_"
for i = 1, backburner_count do
local back_index = backburner
env = env .. current_env
end
backburner_count = 0
if current_env ~= "j" then
current_env_before = current_env
end
end
end
old_text = text
text = ""
i = 1
local l_conv = {
= palatal_l, = palatal_l, = palatal_l,
= palatal_l, = palatal_l, = palatal_l,
= palatal_l, = palatal_l, = palatal_l,
= palatal_l, = palatal_l,
= velar_l, = velar_l, = velar_l,
= velar_l, = velar_l, = velar_l
}
for env_index, l_index in ipairs(l_indexes) do
text = text .. mw.ustring.sub(old_text, i, l_index - 1) .. (l_conv] or "l")
i = l_index + 1
end
text = text .. mw.ustring.sub(old_text, i, length)
text = mw.ustring.gsub(text, palatal_l .. palatal_l, "ll" .. PALATALIZE)
text = mw.ustring.gsub(text, palatal_l, "l" .. PALATALIZE)
return text
end
local reduce_a_diphthong = {
= "e", = "e",
= "o", = "ö",
= "o", = "ö",
}
local reduce_e_diphthong = {
= "o", = "ö",
}
local function do_additional_reduction(syllables)
-- /VA/ (V != A) never in the same syllable
local last_stressed = 1
for i = 1, #syllables - 1 do
if i == 1 or is_stressed_syllable(syllables) then
last_stressed = i
else
local nucleus = mw.ustring.match(syllables, m_izh.vowel .. "+")
if i - last_stressed <= 2 and nucleus then
nucleus = select(3, mw.ustring.find(nucleus, "^" .. UNGEMINATE .. "?(" .. m_izh.vowel .. ")$"))
if nucleus then
local next_syllable_onset, next_syllable_onset_end, consequent = mw.ustring.find(syllables, "^" .. UNGEMINATE .. "?()")
if next_syllable_onset then
if mw.ustring.find(consequent, "") and reduce_a_diphthong then
syllables = mw.ustring.gsub(syllables, nucleus, reduce_a_diphthong .. reduce_a_diphthong) .. mw.ustring.sub(syllables, next_syllable_onset_end + 1)
syllables = ""
elseif consequent == "e" and reduce_e_diphthong then
syllables = mw.ustring.gsub(syllables, nucleus, reduce_e_diphthong .. reduce_e_diphthong) .. mw.ustring.sub(syllables, next_syllable_onset_end + 1)
syllables = ""
end
end
end
end
end
end
-- remove empty syllables
local i, j = 1, 1
while i <= #syllables do
if mw.ustring.len(syllables) > 0 then
syllables = syllables
j = j + 1
end
i = i + 1
end
while j < i do
syllables = nil
j = j + 1
end
end
local function pass_diacritics_through(map, consonant)
local consonant, diacritics = mw.ustring.match(consonant, "()(?)")
return map .. diacritics
end
local voiced_consonants = "jlɫmnŋrvʋ"
local voiced_sounds = IPA_VOWELS .. m_izh.vowels .. voiced_consonants
local function do_voicing(text)
text = mw.ustring.gsub(text, "", { = "p", = "t", = "k", = "s", ="š" })
local voice = { = "g", = "b", = "d", = "z", = "ž" }
local semivoice = { = "g̊", = "b̥", = "d̥", = "z̥", = "ž̥" }
local consonants_to_voice = "?"
local vowel = ""
-- k/p/t/s/š is semivoiced if it follows a voiced sound and is followed by a short vowel or a voiced consonant
text = gsub_lookahead(text, "(" .. ANY_DIACRITICS .. PALATAL .. "??)(" .. consonants_to_voice .. ")(" .. ANY_DIACRITICS .. ".?)",
function (before, consonant, after)
if mw.ustring.find(after, vowel .. ANY_DIACRITICS .. vowel) then
return before .. consonant, after
else
return before .. pass_diacritics_through(semivoice, consonant), after
end
end)
-- k/p/t/s/š is semivoiced if it follows a voiced sound and is not followed by anything
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. PALATAL .. "??)(" .. consonants_to_voice .. ")$",
function (before, consonant)
return before .. pass_diacritics_through(semivoice, consonant)
end)
-- k/p/t/s/š is voiced if it follows a voiced sound and the next sound in the next word is a voiced sound
-- k/p/t/s/š is semivoiced if it follows a voiced sound and the next sound in the next word is not a voiced sound
text = gsub_lookahead(text, "(" .. ANY_DIACRITICS .. "?)(" .. consonants_to_voice .. ")(+)(.)",
function (before, consonant, space, after)
if mw.ustring.find(after, "^") then
return before .. pass_diacritics_through(voice, consonant) .. space, after
else
return before .. consonant .. space, after
end
end)
-- devoice word-initial
text = mw.ustring.gsub(text, "^()?", { = "p", = "t", = "k", = "s", ="š" })
return text
end
local function do_phonetic_alalaukaa_voicing(text)
local voice = { = "g", = "b", = "d", = "z", = "ž" }
local voiced_pre = "" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?"
-- k, p, t, s get voiced before j, l, r, v, if preceded by a voiced sound
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")()(" .. PALATALIZE .. "?)",
function (before, consonant, after)
if after == "j" .. PALATALIZE then
return before .. consonant .. after
end
return before .. pass_diacritics_through(voice, consonant) .. after
end)
-- word-final s, t get voiced
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")s$", "%1z")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")s(?)", "%1z%2")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")t$", "%1d")
text = mw.ustring.gsub(text, "(" .. voiced_pre .. ")t(?)", "%1d%2")
return text
end
local function do_vowel_replacements(text, vowels_find, vowels_short, vowels_long)
return gsub_lookahead(text, "()(.?)",
function (vowel, post)
if post == LONG then
return vowels_long, post
else
return vowels_short, post
end
end
)
end
local function cleanup_palatal(text)
text = mw.ustring.gsub(text, REALLY_JUST_PALATAL, PALATAL)
text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
text = mw.ustring.gsub(text, PALATAL .. "+", PALATAL)
text = mw.ustring.gsub(text, "()()%1" .. PALATAL, "%1" .. PALATAL .. "%2%1" .. PALATAL)
return text
end
--- <<< COMMON END >>> ---
--- <<< DIALECTS START >>> ---
-- narrow_level 0 = broad, 1 = rhyme, 2 = narrow
-- Ala-Laukaa
local function IPA_alalaukaa(text, narrow_level)
if narrow_level <= 1 then
text = mw.ustring.gsub(text, "j?" .. PALATALIZE, { = "", = PALATALIZE })
end
text = mw.ustring.gsub(text, "()h", "%1")
text = mw.ustring.gsub(zeroth_round_of_common_replacements(text), VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
if narrow_level > 0 then
if narrow_level > 1 then
text = do_narrow_l(text)
text = mw.ustring.gsub(mw.ustring.gsub(text, "l", "l" .. PALATALIZE), "l" .. PALATALIZE .. "l" .. PALATALIZE, "ll" .. PALATALIZE)
text = mw.ustring.gsub(text, PALATALIZE .. PALATALIZE, PALATALIZE)
text = mw.ustring.gsub(text, "l" .. PALATALIZE .. "j", "lj")
text = do_phonetic_alalaukaa_voicing(text)
end
text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
local syllables = split_syllables(text, true)
add_secondary_stress(syllables)
if narrow_level > 1 then
do_final_vowel_dropping(syllables)
do_gemination(syllables, LONG)
do_additional_reduction(syllables)
do_reduction(syllables)
elseif narrow_level == 1 then
do_final_vowel_dropping(syllables)
do_coalesce_rhyme(syllables)
do_reduction_rhyme(syllables)
end
text = table.concat(syllables)
text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
if narrow_level > 1 then
text = automatic_palatalization(text, "") -- palatalization
text = mw.ustring.gsub(text, "h()", "x%1")
end
text = clean_virtual_break(text)
end
text = clean_ungeminate(text)
text = mw.ustring.gsub(text, "j" .. PALATALIZE, PALATALIZE)
text = manual_palatalization(text)
text = first_round_of_common_replacements(text)
text = long_vowels_and_diphthongs(text)
text = long_consonants(text)
text = second_round_of_common_replacements(text, narrow_level > 1)
if narrow_level > 1 then
local vowels_short = { = "e̞", = "o̞", = "ø̞" }
local vowels_long = { = "e", = "o", = "ø" }
text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
text = mw.ustring.gsub(text, "", { = "s̠", = "z̠" })
text = standard_sandhi(text)
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. PALATAL .. "?)j%f", "%1i")
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. PALATAL .. "?)j$", "%1i")
end
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?)", "%1i" .. NONSYLLABIC .. "%2")
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
text = cleanup_palatal(text)
return add_primary_stress(text)
end
-- Soikkola
local function IPA_soikkola(text, narrow_level)
text = zeroth_round_of_common_replacements(text)
if narrow_level > 0 then
if narrow_level > 1 then
text = do_narrow_l(text)
text = mw.ustring.gsub(text, "h()", "x%1")
end
text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
local syllables = split_syllables(text, true)
add_secondary_stress(syllables)
if narrow_level > 1 then
do_gemination(syllables, SEMILONG)
end
text = table.concat(syllables)
text = mw.ustring.gsub(text, VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
end
text = mw.ustring.gsub(text, "(.)" .. PALATALIZE,
function (preceding)
if preceding == "l" then
return preceding .. PALATALIZE
elseif preceding == "j" then
return PALATALIZE
else
return preceding
end
end)
text = manual_palatalization(text)
if narrow_level > 1 then text = do_voicing(text) end
if narrow_level > 0 then text = clean_virtual_break(text) end
text = first_round_of_common_replacements(text)
text = clean_ungeminate(text)
text = long_vowels_and_diphthongs(text)
text = long_consonants(text)
text = second_round_of_common_replacements(text, narrow_level > 1)
if narrow_level > 1 then
local vowels_short = { = "e̞", = "o̞", = "ø̞" }
local vowels_long = { = "e̝", = "o̝", = "ø̝" }
text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
text = mw.ustring.gsub(mw.ustring.gsub(text, "^s", "ʃ"), "()s", "%1ʃ")
text = mw.ustring.gsub(mw.ustring.gsub(text, "^z", "ʒ"), "()z", "%1ʒ")
text = standard_sandhi(text)
end
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?)", "%1i" .. NONSYLLABIC .. "%2")
text = mw.ustring.gsub(text, "(" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
return add_primary_stress(text)
end
-- Hevaha
local function IPA_hevaha(text, narrow_level)
text = IPA_soikkola(text, narrow_level)
text = mw.ustring.gsub(text, "ˑ", "ː")
if narrow_level > 1 then
text = mw.ustring.gsub(text, "()" .. U(0x0325) .. "()", "%1%2")
text = mw.ustring.gsub(text, "ɡ" .. U(0x030A) .. "()", "ɡ%1")
end
return text
end
-- Ylä-Laukaa
local function IPA_ylalaukaa(text, narrow_level)
error("Ylä-Laukaa not implemented") -- TODO
end
--- <<< DIALECTS END >>> ---
--- <<< INTERFACE START >>> ---
local function cleanup_IPA(ipa)
return mw.ustring.gsub(ipa, "g", "ɡ")
end
local function cleanup_for_hyphenate(text)
local no_hyph_symbols = ""
return mw.ustring.gsub(text, no_hyph_symbols, "")
end
local function cleanup_for_hyphenate_int(text)
local no_hyph_symbols = ""
return mw.ustring.gsub(text, no_hyph_symbols, "")
end
local function cleanup_for_hyphenate_final(sp)
-- allow final /oi/, /ui/, /yi/, /øi/ for <o>, <u>, <y>, <ö>
return (mw.ustring.gsub(sp, "()i$", "%1"))
end
local function match_spelling_with_title_for_hyphenation(sp, title)
if mw.ustring.find(sp, "i$") and not mw.ustring.find(title, "i$") then
sp = mw.ustring.gsub(sp, "i$", "")
end
if mw.ustring.lower(title) == title then
return mw.ustring.lower(sp)
else
-- find letters in title
local letters = {}
for letter in mw.ustring.gmatch(title, "%a") do
table.insert(letters, letter)
end
local respelled = ""
local letter_index = 1
for character in mw.ustring.gmatch(sp, ".") do
if mw.ustring.match(character, "%a") then
local next_letter = letters
if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then
respelled = respelled .. next_letter
letter_index = letter_index + 1
else
respelled = respelled .. character
end
else
respelled = respelled .. character
end
end
return respelled
end
end
local function hyphenate_matches(sp, title)
return cleanup_for_hyphenate_final(mw.ustring.lower(mw.ustring.gsub(cleanup_for_hyphenate_int(sp), "%.", ""))) == cleanup_for_hyphenate_final(mw.ustring.lower(title))
end
local function hyphenate(text)
return split_syllables(cleanup_for_hyphenate(text))
end
local function spell_long_consonants(text)
return mw.ustring.gsub(text, "()" .. "(" .. PALATALIZE .. "?)" .. LONG,
function (c, p) return c == "j" and "ij" or c .. c .. p end)
end
local function generate_rhyme(tuple)
local text = tuple.rhyme
text = mw.ustring.gsub(cleanup_IPA(text), STRESS_PRIMARY, "")
local index = mw.ustring.find(text, STRESS_SECONDARY .. "*$")
if index ~= nil then text = mw.ustring.sub(text, index + 1) end
index = mw.ustring.find(text, "")
if index == nil then return nil end
return mw.ustring.sub(text, index)
end
local function make_IPAs(fn, forms, variety)
local p = {}
for _, form in ipairs(forms) do
form = mw.ustring.lower(form)
local suffix = mw.ustring.find(form, "^%-")
local prefix = mw.ustring.find(form, "%-$")
if suffix then form = mw.ustring.gsub(form, "^%-", "") end
if prefix then form = mw.ustring.gsub(form, "%-$", "") end
local broad = fn(form, 0)
local rhyme = fn(form, 1)
local narrow = fn(form, 2)
if prefix then
broad = broad .. "-"
rhyme = nil
narrow = narrow .. "-"
end
if suffix then
broad = "-" .. mw.ustring.gsub(broad, "^" .. STRESS_PRIMARY, "")
rhyme = nil
narrow = "-" .. mw.ustring.gsub(narrow, "^" .. STRESS_PRIMARY, "")
end
table.insert(p, { broad = broad, rhyme = rhyme, narrow = narrow })
end
local result = {
forms = p,
varieties = { variety }
}
return result
end
local function format_IPAs(tuple, title, has_spaces)
local dialects = require("Module:accent qualifier").format_qualifiers(lang, tuple.varieties)
local p = {}
for _, form in ipairs(tuple.forms) do
table.insert(p, {pron = "/" .. cleanup_IPA(form.broad) .. "/"})
table.insert(p, {pron = ""})
end
return "* " .. dialects .. " " .. m_IPA.format_IPA_full { lang = lang, items = p, no_count = has_spaces }
end
local function get_arg_list(param, fallback, allow_dash)
if not param or #param == 0 then return fallback end
if not allow_dash and #param == 1 and param == "-" then return {} end
return param
end
local varieties = {
{"A", "Ala-Laukaa", IPA_alalaukaa, false},
{"S", "Soikkola", IPA_soikkola, false},
{"H", "Hevaha", IPA_hevaha, true},
-- {"Y", "Ylä-Laukaa", IPA_ylalaukaa, true},
}
-- rhymes only for these varieties
local varieties_with_rhymes = {
= true,
= true
}
local function get_variety(variety_code)
for _, variety in ipairs(varieties) do
if variety == variety_code then
return variety
end
end
error("Unrecognized variety code: " .. variety_code)
end
function export.get_variety(variety_code)
return get_variety(variety_code)
end
local function allow_rhyme_for_varieties(varieties)
for _, variety in ipairs(varieties) do
if varieties_with_rhymes then
return true
end
end
return false
end
function export.generate_one(form, variety_code, transcription)
local param, name, fn = unpack(get_variety(variety_code))
local result = make_IPAs(fn, {form}, name).forms
if transcription then result = result end
return result
end
function export.generate_multiple(forms, variety_code, transcription)
local param, name, fn = unpack(get_variety(variety_code))
local result = make_IPAs(fn, forms, name).forms
if transcription then
for i, form in ipairs(result) do
result = form
end
end
return result
end
function export.show(frame)
local title = mw.title.getCurrentTitle().text
local hyphenation = nil
local rhymes = nil
local categories = {}
local params = {
= { list = true },
= { list = true }, -- Ala-Laukaa
= { list = true }, -- Soikkola
= { list = true }, -- Hevaha
= { list = true }, -- Ylä-Laukaa
= {}, -- for debugging or demonstration only
}
local args = require("Module:parameters").process(frame:getParent().args, params)
title = args or title
local spellings = get_arg_list(args, { mw.ustring.lower(title) }, true)
local IPAs = {}
for _, variety in ipairs(varieties) do
local param, name, fn, optional = unpack(variety)
local forms = get_arg_list(args, not optional and spellings or nil, true)
if forms then
table.insert(IPAs, make_IPAs(fn, forms, name))
end
end
local results = {}
local has_spaces = mw.ustring.find(title, " ")
if not hyphenation then
hyphenation = {}
if not has_spaces then
local sp = spellings
if not hyphenate_matches(sp, title) then
-- try to geminate
local syllables = split_syllables(sp, true)
do_gemination(syllables, LONG)
sp = spell_long_consonants(clean_ungeminate(table.concat(syllables)))
end
if hyphenate_matches(sp, title) then
table.insert(hyphenation, hyphenate(match_spelling_with_title_for_hyphenation(sp, title)))
end
end
end
if not rhymes then
rhymes = {}
if not has_spaces then
local found_rhymes = {}
for _, tuple in ipairs(IPAs) do
if allow_rhyme_for_varieties(tuple.varieties) then
for _, form in ipairs(tuple.forms) do
if form.rhyme then
local rhyme = generate_rhyme(form)
if not found_rhymes then
found_rhymes = true
table.insert(rhymes, rhyme)
end
end
end
end
end
end
end
for _, tuple in ipairs(IPAs) do
table.insert(results, format_IPAs(tuple, title, has_spaces))
end
if #rhymes > 0 then
local sylkeys = {}
local sylcounts = {}
-- get all possible syllable counts from syllabifications
for i, h in ipairs(hyphenation) do
local hl = #h
if hl > 0 and not sylkeys then
table.insert(sylcounts, hl)
sylkeys = true
end
end
local rhymeobjs = {}
for _, rhyme in ipairs(rhymes) do
table.insert(rhymeobjs, {rhyme = rhyme})
end
table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }))
end
if #hyphenation > 0 then
local hyphs = {}
for i, h in ipairs(hyphenation) do
table.insert(hyphs, { = h })
end
table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
{ lang = lang, hyphs = hyphs, caption = "Hyphenation" }))
end
return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end
--- <<< INTERFACE END >>> ---
return export