local export = {}
local m_fi_p = require("Module:fi-pronunciation")
local langcode = "fi"
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local diphthong = U(0x361)
local hiatus = U(0x1DFC)
local unreleased = U(0x2FE)
local nasalized = U(0x330)
local letters_phonemes = {
= "o",
= "ü",
= "k",
= "ks",
= "ts",
= "ŋŋ",
= "ŋk",
= "kv",
= "ˣ",
= "₍",
= "₍",
}
local lookahead = 3 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables
local long = "̄"
local vowels = "aeiouüäö"
local vowel = ""
local consonants = "kptgbdfˀsnmŋlrhvšžrjɦχ"
local consonant = ""
local diacriticsvv = long .. "̝̞̠̟̪́" .. unreleased
local diacriticsv = diacriticsvv .. diphthong .. nasalized
local diacritics = diacriticsv .. hiatus
local diacritic = ""
local spelled_consonants = "cvwxz"
local spelled_consonant = ""
local spelled_vowels = "y"
local spelled_vowel = ""
local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?
export.tertiary = tertiary
local stress_indicator = ""
local plosives = "kptbdg"
local use_UPA_stress = true
local stress_p = ""
local stress_s = ""
local stress_pd = ""
local stress_sd = ""
-- This adds letters_phonemes = "e", letters_phonemes = "i", etc.
for letter in mw.ustring.gmatch("aeiouäödhfjklmnprstuv", ".") do
letters_phonemes = letter
end
--[[ This regex finds the diphthongs in the UPA transcription,
so that the correct tie diacritic can be added. ]]
-- /_i/ diphthongs can appear in any syllable
local diphthongs_i = {
"()(i)"
}
-- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda)
local diphthongs_u = {
"()(u)",
"()(ü)",
}
-- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.)
local diphthongs_rising = {
"(u)(o)",
"(i)(e)",
"(ü)(ö)",
}
local function apply_post_fixes(p)
-- initial <gn> is /gn/
p = mw.ustring.gsub(p, "ˈŋn", "ˈɡn")
-- ŋ is short before consonant (by default)
p = mw.ustring.gsub(p, "ŋŋ("..consonant..")", "ŋ%1")
-- dissimilation of vowels by sandhi
p = mw.ustring.gsub(p, "("..vowel..diacritic.."*"..long.."?)("..stress_s..")%1", "%1₍%2%1")
return p
end
local function apply_post_fixes_narrow(p)
-- long j, v after i, u diphthong
p = mw.ustring.gsub(p, "("..diphthong.."i)j("..vowel..")", "%1j("..long..")%2")
-- /ʋ/ after /u/ usually realized as /w/ (see Suomi, Toivanen and Ylitalo 2008, p. )
p = mw.ustring.gsub(p, "("..diphthong.."u)v("..vowel..")", "%1w(w)%2")
-- cleanup
p = mw.ustring.gsub(p, "("..stress_s..")%.", "%1")
-- tautosyllabic nasals nasalize vowels between them (see Suomi, Toivanen and Ylitalo 2008, p. 22)
p = mw.ustring.gsub(p, "(?)("..vowel..")("..diacritic.."*)()(.?)", function (n0, nv, nvd, n1, anchor)
-- this cannot be simplified to "(.?)" => "(?)", otherwise a vowel after would match
if not mw.ustring.find(anchor, vowel) then
return n0 .. nv .. nasalized .. nvd .. n1 .. anchor
end
end)
-- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
p = mw.ustring.gsub(p, "n%s-("..stress_pd.."?%s*)()", "ŋ‿%1%2")
p = mw.ustring.gsub(p, "n%s-("..stress_pd.."?%s*)()", "m‿%1%2")
p = mw.ustring.gsub(p, "%s-("..stress_pd.."?%s*)()", "ᴍ͔‿%1%2")
p = mw.ustring.gsub(p, "n("..stress_pd.."?%s*)()", "ŋ%1%2")
p = mw.ustring.gsub(p, "n("..stress_pd.."?%s*)()", "m%1%2")
p = mw.ustring.gsub(p, "("..stress_pd.."?%s*)()", "ᴍ͔%1%2")
-- handle potentially long consonants over secondary stresses
p = mw.ustring.gsub(p, "("..stress_s..")("..consonant..diacritic.."*)%(%2%)", "(%2)%1%2")
p = mw.ustring.gsub(p, "("..consonant..diacritic.."*)%(%1%)("..stress_s..")", "%2%1(%1)")
p = mw.ustring.gsub(p, "(ŋ"..diacritic.."*)"..tertiary.."ɡ", "%1"..tertiary.."ŋ")
-- allophone before front vowels (see Suomi, Toivanen and Ylitalo 2008, p. 27)
p = mw.ustring.gsub(p, "k()", "k̟%1")
return p
end
function export.is_light_syllable(syllable)
return mw.ustring.find(mw.ustring.lower(syllable), "^?" .. spelled_consonant .. "?" .. spelled_vowel .. "%(?%*?%)?$")
end
function export.has_later_heavy_syllable(hyph, start)
local stop = math.min(start + lookahead, #hyph - 1)
for index = start, stop do
if not export.is_light_syllable(hyph) then
return true
end
end
return false
end
-- applied *before* UPA conversion
local function add_secondary_stress(word)
-- keep_sep_symbols = true
local hyph = m_fi_p.syllabify(word, true)
local res = ""
local last_index = #hyph
-- find stressed syllables and add secondary stress before each syllable
for index, syllable in ipairs(hyph) do
local stressed = false
local has_symbol = mw.ustring.find(syllable, "^")
if has_symbol then
-- check if symbol indicates stress
stressed = mw.ustring.find(syllable, "^" .. stress_indicator)
has_symbol = stressed
end
if not stressed then
if index == 1 then
stressed = true
elseif not prev_stress and index < last_index then
-- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)
stressed = index == last_index - 1 or not export.is_light_syllable(syllable) or not export.has_later_heavy_syllable(hyph, index + 1)
end
if stressed then
last_stressed = index
end
end
-- check if next syllable already stressed
-- if is, do not stress this syllable
if stressed and index < last_index then
stressed = stressed and not mw.ustring.find(hyph, "^" .. stress_indicator)
end
if index > 1 and stressed and not has_symbol then
res = res .. "-$"
end
res = res .. syllable
prev_stress = stressed
end
local noninitial = {}
local index = 1
res = mw.ustring.gsub(res, "-(?)",
function (dollar)
index = index + 1
noninitial = #dollar > 0
return #dollar > 0 and tertiary or "-"
end)
return res, noninitial
end
local function handle_diphthongs(UPA, strict_initial)
for _, diphthong_regex in pairs(diphthongs_i) do
UPA = mw.ustring.gsub(UPA, diphthong_regex, "%1" .. diphthong .. "%2")
end
local only_initial = "(" .. stress_indicator .. "*)"
if strict_initial then
only_initial = "^(*)"
end
for _, diphthong_regex in pairs(diphthongs_rising) do
-- initial syllables
UPA = mw.ustring.gsub(UPA, only_initial .. diphthong_regex, "%1%2" .. diphthong .. "%3")
end
for _, diphthong_regex in pairs(diphthongs_u) do
-- initial syllables
UPA = mw.ustring.gsub(UPA, only_initial .. diphthong_regex, "%1%2" .. diphthong .. "%3")
local open_noninitial =
function(v1, v2, after)
if mw.ustring.find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then
-- consonant after diphthong
-- must be followed by vowel so that it's part of the
-- following syllable, else it's in this syllable
-- and thus this syllabie is closed
return v1 .. diphthong .. v2 .. after
elseif mw.ustring.find(after, "^" .. consonant) then
-- consonant after diphthong
-- must be in this syllable
return v1 .. hiatus .. v2 .. after
end
-- no consonant after diphthong => open
return v1 .. diphthong .. v2 .. after
end
-- open non-initial syllables
UPA = mw.ustring.gsub(UPA, diphthong_regex .. "(.+)", open_noninitial)
UPA = mw.ustring.gsub(UPA, diphthong_regex .. "($)", open_noninitial)
end
UPA = mw.ustring.gsub(UPA, "(" .. vowel .. "*)(" .. vowel .. "*)", "%1" .. hiatus .. "%2")
return UPA
end
local function UPA_word(term, is_narrow, has_initial)
local rest = term
local phonemes = {}
while mw.ustring.len(rest) > 0 do
-- Find the longest string of letters that matches a recognised sequence in the list
local longestmatch = ""
for letter, phoneme in pairs(letters_phonemes) do
if mw.ustring.sub(rest, 1, mw.ustring.len(letter)) == letter and mw.ustring.len(letter) > mw.ustring.len(longestmatch) then
longestmatch = letter
end
end
-- Convert the string to UPA
if mw.ustring.len(longestmatch) > 0 then
table.insert(phonemes, letters_phonemes)
rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
else
table.insert(phonemes, mw.ustring.sub(rest, 1, 1))
rest = mw.ustring.sub(rest, 2)
end
end
local result = table.concat(phonemes)
if is_narrow then
-- articulation of h (Suomi, Toivanen & Ylitalo 2008, p. 28)
result = mw.ustring.gsub(result, "(.?)h(.?)",
function (before, after)
local h
if after ~= "" and after ~= "h" then
if before ~= "" and vowels:find(before) then
if consonants:find(after) then
-- vihma, yhtiö
if before == "i" or before == "ü" then
h = "h́"
-- mahti, kohme, tuhka
elseif before == "a" or before == "o" or before == "u" then
h = "χ"
end
-- maha
elseif vowels:find(after) then
h = "ɦ"
end
end
end
if h then
return before .. h .. after
end
end)
-- double letter replacement and diphthongs must be handled earlier here
result = mw.ustring.gsub(result, "(" .. vowel .. ")%1", "%1" .. long)
if has_initial then
result = handle_diphthongs(result, true)
end
end
return result
end
function export.UPA_wordparts(term, is_narrow)
term = mw.ustring.lower(term)
local notinitial = {} -- true if the component is not an initial component
local hyphenstress = "ˌ" -- secondary by default
local is_prefix = false
local is_suffix = false
if mw.ustring.find(term, "%/") then
hyphenstress = tertiary -- tertiary if we have slashes
end
if is_narrow then
term, notinitial = add_secondary_stress(term)
end
local found
term, found = mw.ustring.gsub(term, "^%-+", "")
is_suffix = found > 0
term, found = mw.ustring.gsub(term, "%-+$", "")
is_prefix = found > 0
-- make sure we keep slashes to figure out if secondary or tertiary
term = mw.ustring.gsub(term, "%/", "-%1")
local wordparts = mw.text.split(term, "-", true)
for key, val in ipairs(wordparts) do
local stress = key > 1 and hyphenstress or "ˈ"
local part = val
if mw.ustring.find(part, "^%/") then
stress = "ˌ" -- always secondary
part = part:sub(2)
end
wordparts = stress .. UPA_word(part, is_narrow, not notinitial)
end
UPA = table.concat(wordparts, "")
if is_narrow then
-- handle * in narrow transcription
UPA = mw.ustring.gsub(UPA, "ˣ(%)?%s*"..stress_p.."?)((.?)" .. diacritic .. "*)",
function (post, after, potential_consonant)
if potential_consonant == "" then
if mw.ustring.find(post, "^%)") then
return "ˀ" .. post .. after
else
return post .. "(ˀ)" .. after
end
elseif consonants:find(potential_consonant) then
if #post > 0 then
local amark = ""
if plosives:find(mw.ustring.sub(after, 1, 1)) then
amark = unreleased
end
return after .. amark .. post .. after
else
return post .. after .. after
end
else
return post .. "ˀ" .. after
end
end)
else
-- Replace double letters (vowels or consonants) with single letter plus length sign.
UPA = gsub(UPA, "(" .. vowel .. ")%1", "%1" .. long)
UPA = handle_diphthongs(UPA, false)
end
UPA = apply_post_fixes(UPA)
if is_narrow then
UPA = apply_post_fixes_narrow(UPA)
end
if is_prefix then
UPA = UPA .. "-"
end
if is_suffix then
UPA = "-" .. UPA
end
if use_UPA_stress then
UPA = mw.ustring.gsub(UPA, "()(-)(+)", "%2%3%1")
UPA = mw.ustring.gsub(UPA, "()" .. hiatus, "%1")
UPA = mw.ustring.gsub(UPA, "ˈ", "·")
UPA = mw.ustring.gsub(UPA, "ˌ", ":")
end
return UPA
end
function export.UPA(term)
if type(term) == "table" then
term = term:getParent().args
end
local title = mw.title.getCurrentTitle().text
if not term then
term = title
elseif term == "*" then
term = title .. "*"
end
--local no_count = mw.ustring.match(term, " ")
UPA_narrow = export.UPA_wordparts(term, true)
UPA = export.UPA_wordparts(term, false)
return require("Module:User:Surjection/UPA").format_UPA_full(require("Module:languages").getByCode(langcode), {{pron = UPA, phonetic = false}, {pron = UPA_narrow, phonetic = true}})
end
return export