Dokumentation för denna modul finns på /dok (redigera), /test
Enbart via {{ipa}}
.
-- based on https://en.wiktionary.orghttps://dictious.com/sv/Module:fi-IPA
-- and https://en.wiktionary.orghttps://dictious.com/sv/Module:fi-hyphenation
-- this module must NOT be "updated" by blindly copying from those pages
-- authors: "Surjection" et al
-- modification for non-en wiktionaries: "Taylor 49"
local export = {}
local woowels = "aeiouyåäö"
local woowel = ""
local coonsonants = "bcdfghjklmnpqrstvwxzšžʔ*"
local coonsonant = ""
-- orthographic symbols that signify separation of syllables
local sep_symbols = "-'’./ "
-- these signify that the next syllable is an "initial" syllable in a word
-- all symbols from here should also be in sep_symbols
local stressed_symbols = "-/ "
-- diphthongs and long woowels
-- in initial syllables
local woowel_sequences_initial = {
"i",
"u",
"y",
"uo",
"ie",
"yö",
"aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}
-- in non-initial syllables
-- further, diphthongs ending _u or _y are diphthongs only
-- in non-initial syllables if the syllable is open
local woowel_sequences_noninitial = {
"i",
"aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}
-- in non-initial *open* syllables, in addition to above
local woowel_sequences_noninitial_open = {
"u",
"y"
}
-- allow_diphthongs_everywhere is only for backwards compatibility, assume false
function generate_hyphenation(word, keep_sep_symbols, allow_diphthongs_everywhere)
local res = {}
local syllable = ""
local pos = 1
local found_woowel = false
local initial_syllable = true
while pos <= #word do
if mw.ustring.find(mw.ustring.lower(word), "^" .. coonsonant .. woowel, pos) then
-- CV: end current syllable if we have found a woowel
if found_woowel then
if syllable then
table.insert(res, syllable)
initial_syllable = false
end
found_woowel = false
syllable = ""
end
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. coonsonant, pos) then
-- C: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
elseif mw.ustring.find(mw.ustring.lower(word), "^" .. woowel, pos) then
if found_woowel then
-- already found a woowel, end current syllable
if syllable then
table.insert(res, syllable)
initial_syllable = false
end
syllable = ""
end
found_woowel = true
-- check for diphthongs or long woowels
local woowel_sequences = (allow_diphthongs_everywhere or initial_syllable) and woowel_sequences_initial or woowel_sequences_noninitial
local seq_ok = false
for k, v in pairs(woowel_sequences) do
if mw.ustring.find(mw.ustring.lower(word), "^" .. v, pos) then
seq_ok = true
break
end
end
if not seq_ok and not initial_syllable then
for k, v in pairs(woowel_sequences_noninitial_open) do
if mw.ustring.find(mw.ustring.lower(word), "^" .. v .. "", pos) or mw.ustring.find(mw.ustring.lower(word), "^" .. v .. "", pos) then
seq_ok = true
break
end
end
end
if seq_ok then
syllable = syllable .. mw.ustring.sub(word, pos, pos + 1)
pos = pos + 2
else
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
elseif mw.ustring.find(mw.ustring.lower(word), "^", pos) then
-- separates syllables
if syllable then
table.insert(res, syllable)
end
local sepchar = mw.ustring.sub(word, pos, pos)
initial_syllable = mw.ustring.find(sepchar, "^")
syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or ""
pos = pos + 1
found_woowel = false
else
-- ?: continue
syllable = syllable .. mw.ustring.sub(word, pos, pos)
pos = pos + 1
end
end
if syllable then
table.insert(res, syllable)
end
return res
end--function generate_hyphenation
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local nonsyllabic = U(0x32F) -- inverted breve below
local unreleased = U(0x31A)
local long = "ː"
local letters_phonemes = {
= "ɑ",
= "æ",
= "ø",
= "o",
= "ɡ",
= "k",
= "ʋ",
= "ʃ",
= "ʒ",
= "ks",
= "ts",
= "ŋː",
= "ŋk",
= "ŋkː",
= "kʋ",
= "ˣ",
= ".",
}
local lookahead = 3 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables
local vowels = "ɑeiouyæø"
local vowel = ""
local consonants = "kptɡgbdfʔsnmŋlrhʋʃʒrjçɦx"
local consonant = ""
local diacritics = "̝̞̠̪"
local diacritic = ""
local spelled_consonants = "cšvwxzž"
local spelled_consonant = ""
local spelled_vowels = "aäö"
local spelled_vowel = ""
local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?
local stress_indicator = ""
local plosives = "kptbdɡ"
local stress_p = ""
local stress_s = ""
local replacements_narrow = {
= "e̞",
= "ø̞",
= "o̞",
= "t̪",
= "s̠"
}
-- This adds letters_phonemes = "e", letters_phonemes = "i", etc.
for letter in mw.ustring.gmatch("eiouydhfjklmnprstu", ".") do
letters_phonemes = letter
end
--[[ This regex finds the diphthongs in the IPA transcription,
so that the nonsyllabic diacritic can be added. ]]
-- /_i/ diphthongs can appear in any syllable
local diphthongs_i = {
"i"
}
-- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda)
local diphthongs_u = {
"u",
"y",
}
-- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.)
local diphthongs_rising = {
"uo",
"ie",
"yø",
}
local post_fixes = {
= "ts̠", -- t is alveolar in /ts/
= "n̪t̪", -- n is dental in /nt/
= "ˈɡn", -- initial <gn> is /gn/
-- ŋ is short before consonant (by default)
= "ŋ%1",
-- dissimilation of vowels by sandhi
= "%1%2(ʔ)%1"
}
local post_fixes_narrow = {
-- long j, v after i, u diphthong
= "%1j("..long..")%2",
-- /ʋ/ after /u/ usually realized as /w/
-- (see Suomi, Toivanen and Ylitalo 2008)
= "%1w("..long..")%2",
-- cleanup
= "%1",
-- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
= "m" .. long,
)"] = "m%1",
)"] = "ŋ%1%2",
)"] = "m%1%2",
-- handle potentially long consonants over secondary stresses
= "(%2)%1%2",
= "%2%1("..long..")",
= "%1"..tertiary.."ŋ"
}
function is_light_syllable(syllable)
return mw.ustring.len(syllable) < 4 and mw.ustring.find(mw.ustring.lower(syllable), "^?" .. spelled_consonant .. "?" .. spelled_vowel .. "$")
end
function has_later_heavy_syllable(hyph, start)
local stop = math.min(start + lookahead, #hyph - 1)
for index = start, stop do
if not is_light_syllable(hyph) then
return true
end
end
return false
end
-- applied *before* IPA conversion
local function add_secondary_stress(word)
local hyph = generate_hyphenation(word, true)
local res = ""
local last_index = #hyph
-- find stressed syllables and add secondary stress before each syllable
for index, syllable in ipairs(hyph) do
local stressed = false
local has_symbol = mw.ustring.find(syllable, "^")
if has_symbol then
-- check if symbol indicates stress
stressed = mw.ustring.find(syllable, "^" .. stress_indicator)
has_symbol = stressed
end
if not stressed then
if index == 1 then
stressed = true
elseif not prev_stress and index < last_index then
-- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)
stressed = index == last_index - 1 or not is_light_syllable(syllable) or not has_later_heavy_syllable(hyph, index + 1)
end
if stressed then
last_stressed = index
end
end
-- check if next syllable already stressed
-- if is, do not stress this syllable
if stressed and index < last_index then
stressed = stressed and not mw.ustring.find(hyph, "^" .. stress_indicator)
end
if index > 1 and stressed and not has_symbol then
res = res .. "-$"
end
res = res .. syllable
prev_stress = stressed
end
local noninitial = {}
local index = 1
res = mw.ustring.gsub(res, "-(?)",
function (dollar)
index = index + 1
noninitial = #dollar > 0
return #dollar > 0 and tertiary or "-"
end)
return res, noninitial
end
local function handle_diphthongs(IPA, strict_initial)
-- Add nonsyllabic diacritic after last vowel of diphthong.
for _, diphthong_regex in pairs(diphthongs_i) do
IPA = mw.ustring.gsub(IPA, diphthong_regex, "%0" .. nonsyllabic)
end
local only_initial = stress_indicator .. "*"
if strict_initial then
only_initial = "^*"
end
for _, diphthong_regex in pairs(diphthongs_rising) do
-- initial syllables
IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)
end
for _, diphthong_regex in pairs(diphthongs_u) do
-- initial syllables
IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)
local open_noninitial =
function(diphthong, after)
if mw.ustring.find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then
-- consonant after diphthong
-- must be followed by vowel so that it's part of the
-- following syllable, else it's in this syllable
-- and thus this syllabie is closed
return diphthong .. nonsyllabic .. after
elseif mw.ustring.find(after, "^" .. consonant) then
-- consonant after diphthong
-- must be in this syllable
return diphthong .. after
end
-- no consonant after diphthong => open
return diphthong .. nonsyllabic .. after
end
-- open non-initial syllables
IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")(.+)", open_noninitial)
IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")($)", open_noninitial)
end
return IPA
end
local function IPA_word(term, is_narrow, has_initial)
local rest = term
local phonemes = {}
while mw.ustring.len(rest) > 0 do
-- Find the longest string of letters that matches a recognised sequence in the list
local longestmatch = ""
for letter, phoneme in pairs(letters_phonemes) do
if mw.ustring.sub(rest, 1, mw.ustring.len(letter)) == letter and mw.ustring.len(letter) > mw.ustring.len(longestmatch) then
longestmatch = letter
end
end
-- Convert the string to IPA
if mw.ustring.len(longestmatch) > 0 then
table.insert(phonemes, letters_phonemes)
rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
else
table.insert(phonemes, mw.ustring.sub(rest, 1, 1))
rest = mw.ustring.sub(rest, 2)
end
end
local result = table.concat(phonemes)
if is_narrow then
-- articulation of h
result = mw.ustring.gsub(result, "(.?)h(.?)",
function (before, after)
local h
if after ~= "" then
if before ~= "" and vowels:find(before) then
if consonants:find(after) then
-- vihma, yhtiö
if before == "i" or before == "y" then
h = "ç"
-- mahti, kohme, tuhka
elseif before == "ɑ" or before == "o" or before == "u" then
h = "x"
end
-- maha
elseif vowels:find(after) then
h = "ɦ"
end
end
end
if h then
return before .. h .. after
end
end)
-- double letter replacement and diphthongs must be handled earlier here
result = mw.ustring.gsub(result, "(%a)%1", "%1" .. long)
if has_initial then
result = handle_diphthongs(result, true)
end
for letter, phoneme in pairs(replacements_narrow) do
result = mw.ustring.gsub(result, letter, phoneme)
end
end
return result
end
function IPA_wordparts(term, is_narrow)
term = mw.ustring.lower(term)
local notinitial = {} -- true if the component is not an initial component
local hyphenstress = "ˌ" -- secondary by default
local is_prefix = false
local is_suffix = false
if mw.ustring.find(term, "%/") then
hyphenstress = tertiary -- tertiary if we have slashes
end
if is_narrow then
term, notinitial = add_secondary_stress(term)
end
local found
term, found = mw.ustring.gsub(term, "^%-+", "")
is_suffix = found > 0
term, found = mw.ustring.gsub(term, "%-+$", "")
is_prefix = found > 0
-- make sure we keep slashes to figure out if secondary or tertiary
term = mw.ustring.gsub(term, "%/", "-%1")
local wordparts = mw.text.split(term, "-", true)
for key, val in ipairs(wordparts) do
local stress = key > 1 and hyphenstress or "ˈ"
local part = val
if mw.ustring.find(part, "^%/") then
stress = "ˌ" -- always secondary
part = part:sub(2)
end
wordparts = stress .. IPA_word(part, is_narrow, not notinitial)
end
IPA = table.concat(wordparts, "")
if is_narrow then
-- handle * in narrow transcription
IPA = mw.ustring.gsub(IPA, "ˣ(%s*)("..stress_p.."?)((.?)" .. diacritic .. "*)",
function (space, stress, after, potential_consonant)
if potential_consonant == "" then
return space .. stress .. "(ʔ)" .. after
elseif consonants:find(potential_consonant) then
if #space > 0 or #stress > 0 then
local amark = ""
if plosives:find(mw.ustring.sub(after, 1, 1)) then
amark = unreleased
end
return after .. amark .. space .. stress .. after
else
return space .. after .. long
end
else
return space .. stress .. "ʔ" .. after
end
end)
else
-- Replace double letters (vowels or consonants) with single letter plus length sign.
IPA = gsub(IPA, "(%a)%1", "%1" .. long)
IPA = handle_diphthongs(IPA, false)
end
for letter, phoneme in pairs(post_fixes) do
IPA = mw.ustring.gsub(IPA, letter, phoneme)
end
if is_narrow then
for letter, phoneme in pairs(post_fixes_narrow) do
IPA = mw.ustring.gsub(IPA, letter, phoneme)
end
end
if is_prefix then
IPA = IPA .. "-"
end
if is_suffix then
IPA = "-" .. IPA
end
return IPA
end
function export.IPA (arxframent)
local pron = ''
local IPA_input = mw.title.getCurrentTitle().text -- {{PAGENAME}}
arxourown = arxframent.args
local strover = arxourown -- use only if needed
if (type(strover)=="string") then
if (string.len(strover)~=0) then
IPA_input = strover -- override only if parameter is non-empty (due to forwarding)
end--if
end--if
pron = "/" .. IPA_wordparts(IPA_input, false) .. "/, "
return pron
end
return export