local export = {}
local lang = require("Module:languages").getByCode("uk")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local com = require("Module:uk-common")
local lower = m_str_utils.lower
local rfind = m_str_utils.find
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local AC = u(0x301)
local GR = u(0x300)
local vowel_no_i = "aɛɪuɔɐoʊe"
local vowel = vowel_no_i .. "i"
local vowel_c = ""
local consonant_no_w = "bdzʒɡɦmnlrpftskxʃj"
local consonant_no_w_c = ""
local consonant = consonant_no_w .. "ʋβ̞wʍ"
local consonant_c = ""
local palatalizable = "tdsznlrbpʋfɡmkɦxʃʒ"
local palatalizable_c = ""
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
function export.remove_pron_notations(text, remove_grave)
-- Remove grave accents from annotations but maybe not from phonetic respelling
if remove_grave then
text = toNFC(rsub(toNFD(text), GR, ""))
end
return text
end
local perm_syl_onset = m_table.listToSet({
'spr', 'str', 'skr', 'spl', 'skl',
'sp', 'st', 'sk', 'sf', 'sx',
'pr', 'br', 'tr', 'dr', 'kr', 'gr', 'ɦr', 'fr', 'xr',
'pl', 'bl', 'kl', 'gl', 'ɦl', 'fl', 'xl',
})
function export.pronunciation(text, allow_unstressed, output, ann)
if type(text) == "table" then
local iparams = {
= {},
}
local params = {
= {},
= {type = "boolean"},
= {},
= {},
}
local iargs = require("Module:parameters").process(text.args, iparams)
local args = require("Module:parameters").process(text:getParent().args, params)
text, allow_unstressed, output, ann = args, args.allow_unstressed, iargs.output, args.ann
end
if not text then
text = mw.title.getCurrentTitle().text
end
-- Returns an error if the text contains alphabetic characters that are not Cyrillic.
require("Module:script utilities").checkScript(text, "Cyrl")
local origterm = text
-- Lowercase and decompose ѐ and ѝ into letter + accent char
text = lower(com.decompose_grave(text))
if not allow_unstressed and com.needs_accents(text) then
error("Multisyllabic words that are not prefixes or suffixes must have an acute accent marking the stress, unless allow_unstressed=1 is given: " .. text)
end
-- convert commas and en/en dashes to IPA foot boundaries
text = rsub(text, '%s*%s*', ' | ')
-- canonicalize multiple spaces
text = rsub(text, '%s+', ' ')
local phonetic_chars_map = {
-- single characters that map to IPA sounds; these are processed last
= {
= "a", = "b", = "ʋ", = "ɦ", = "ɡ",
= "d", = "ɛ", = "jɛ", = "ʒ", = "z",
= "ɪ", = "i", = "ji", = "j", = "k",
= "l", = "m", = "n", = "ɔ", = "p",
= "r", = "s", = "t", = "u", = "f",
= "x", = "t͡s", = "t͡ʃ", = "ʃ", = "ʃt͡ʃ",
= "ʲ", = "ju", = "ja", = "j",
-- accented vowels
= "ˈ", = "ˌ",
},
-- character sequences of two that map to IPA sounds
= {
= "d͡ʒ", = "d͡z",
-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
= "d͡zs", = "d͡ʒʃ", = "d͡ʒt͡ʃ", = "d͡zt͡s",
= "t͡s", = "t͡ʃʃ", = "t͡ʃː", = "t͡sː",
},
-- character sequences of three that map to IPA sounds
= {
= "d͡zʲ",
-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
= "t͡sʲː"
},
}
local pronuns = {}
-- FIXME, not completely correct, we need to treat hyphens at beginning and end of
-- a word as indicating unstressed pronunciation.
for _, phonetic in ipairs(rsplit(text, "+")) do
phonetic = "#" .. phonetic .. "#"
local orthographic_replacements = {
-- first apply consonant cluster simplifications that always occur orthographically
= "ньськ",
= "ськ",
= "нст",
= "шч",
= "зд",
= "сː",
= "ськ",
= "#шː",
= "жш",
= "#шч",
= "жч",
-- then long consonants that are orthographically geminated.
)%1" ] = "%1ː",
+)жж" ] = "%1жː", -- джж sequence encode diphonemic дж
+)зз" ] = "%1зː", -- дзз sequence encode diphonemic дз
)%1"] = "%1ː",
= "джː",
= "дзː",
}
for regex, replacement in pairs(orthographic_replacements) do
phonetic = rsub(phonetic, regex, replacement)
end
-- remap apostrophe to '!' so that it doesn't conflict with IPA stress mark
phonetic = rsub(phonetic, "'", "!")
-- replace multiple letter sequences
for _, replacements in ipairs(phonetic_chars_map) do
for key, replacement in pairs(replacements) do
phonetic = rsub(phonetic, key, replacement)
end
end
-- move stress mark, added by phonetic_chars_map, before vowel
phonetic = rsub(phonetic, "()()", "%2%1")
-- add accent if the word is monosyllabic and not allow_unstressed,
-- so that monosyllabic words without explicit stress marks get stressed
-- vowel allophones; we use a different character from the regular
-- primary stress mark so we can later remove it without affecting
-- explicitly user-added accents on monosyllabic words, as in нема́ за́ що.
local _, numberOfVowels = rsubn(phonetic, "", "")
if (numberOfVowels == 1) and not allow_unstressed then
phonetic = rsub(phonetic, "()", "⁀%1")
end
-- palatalizable consonants before /i/ or /j/ become palatalized
phonetic = rsub(phonetic, "(" .. palatalizable_c .. ")(?)(?)i", "%1ʲ%2%3i")
phonetic = rsub(phonetic, "(" .. palatalizable_c .. ")(?)j", "%1ʲ%2")
-- eliminate garbage sequences of resulting from -тьс- cluster followed by
phonetic = rsub(phonetic, "ʲːj", "ʲː")
-- consonant simplification: ст + ц' → . We do it here because of palatalization.
-- Due to the т +ц → rule length is present. According to Орфоепскі словник p. 13,
-- both forms are proper, without length in normal (colloquial) speech and with length
-- in slow speech, so we parenthesize the length as optional.
phonetic = rsub(phonetic, "st͡sʲ(?)", "sʲt͡sʲ(%1)")
-- assimilation: voiceless + voiced = voiced + voiced
-- should /ʋ/ be included as voiced? Орфоепічний словник doesn't voice initial cluster of шв (p. 116)
local voiced_obstruent = ""
local voicing = {
= "b",
= "v",
= "d",
= "dʲ",
= "z",
= "zʲ",
= "ʒ",
= "ɡ",
= "ɦ",
= "d͡z",
= "d͡zʲ",
= "d͡ʒ",
= "ʒd͡ʒ",
}
for voiceless, voiced in pairs(voicing) do
phonetic = rsub(phonetic, voiceless .. "(" .. voiced_obstruent .. "+)", voiced .. "%1")
end
-- In the sequence of two consonants, of which the second is soft, the first is pronounced soft too
-- unless the first consonant is a labial, namely б, п, в, ф, м.
phonetic = rsub(phonetic, "()(.)ʲ", "%1ʲ%2ʲ")
phonetic = rsub(phonetic, "()t͡sʲ", "%1ʲt͡sʲ")
phonetic = rsub(phonetic, "()d͡zʲ", "%1ʲd͡zʲ")
phonetic = rsub(phonetic, "t͡s(.)ʲ", "t͡sʲ%1ʲ")
phonetic = rsub(phonetic, "d͡z(.)ʲ", "d͡zʲ%1ʲ")
phonetic = rsub(phonetic, "d͡zt͡sʲ", "d͡zʲt͡sʲ")
phonetic = rsub(phonetic, "t͡sd͡zʲ", "t͡sʲd͡zʲ")
-- Hushing consonants ж, ч, ш assimilate to the following hissing consonants, giving a long hissing consonant:
-- + → , + → , + → , + →
phonetic = rsub(phonetic, "ʒt͡sʲ", "zʲt͡sʲ")
phonetic = rsub(phonetic, "t͡ʃt͡sʲ", "t͡sʲː")
phonetic = rsub(phonetic, "ʃt͡sʲ", "sʲt͡sʲ")
phonetic = rsub(phonetic, "ʃsʲ", "sʲː")
-- Hissing consonants before hushing consonants within a word assimilate - on зш and зч word-initially and
-- word-medially see above.
-- + → , + → , + →
-- + →
phonetic = rsub(phonetic, "zʒ", "ʒː")
phonetic = rsub(phonetic, "sʃ", "ʃː")
phonetic = rsub(phonetic, "zt͡ʃ", "ʒt͡ʃ")
phonetic = rsub(phonetic, "zd͡ʒ", "ʒd͡ʒ")
phonetic = rsub(phonetic, "t͡ʒ", "d͡ʒ")
phonetic = rsub(phonetic, "t͡z", "d͡z")
-- cleanup: excessive palatalization: CʲCʲCʲ → CCʲCʲ
phonetic = rsub(phonetic, "(+)ʲ(+)ʲ(+)ʲ", "%1%2ʲ%3ʲ")
-- unstressed /a/ has an allophone
phonetic = rsub(phonetic, "()a", "%1ɐ")
-- unstressed /u/ has an allophone
phonetic = rsub(phonetic, "()u", "%1ʊ")
-- unstressed /ɔ/ has by assimilation an allophone before a stressed syllable with /u/ or /i/
phonetic = rsub(phonetic, "ɔ(+)()", "o%1%2")
-- one allophone covers unstressed /ɛ/ and /ɪ/
phonetic = rsub(phonetic, "()", "%1e")
-- Remove the monosyllabic stress we auto-added to ensure that vowels in
-- monosyllabic words get stressed allophones. Do this before vocalizing
-- /ʋ/ and /j/. NOTE: Nothing below should depend on stress marks being
-- present.
phonetic = rsub(phonetic, "⁀", "")
-- /ʋ/ has an allophone in a syllable coda
phonetic = rsub(phonetic, "(" .. vowel_c .. ")ʋ()", "%1u̯%2")
-- /ʋ/ has an allophone before /ɔ, u/ and voiced consonants (not after a vowel; before vowel already converted)
phonetic = rsub(phonetic, "ʋ(?)", "w%1")
-- /ʋ/ has an allophone before remaining vowels besides /i/
-- Not sure whether this looks good.
-- phonetic = rsub(phonetic, "ʋ(*)", "β̞%1")
-- /ʋ/ has an allophone before before voiceless consonants (not after a vowel; before vowel already converted)
phonetic = rsub(phonetic, "ʋ()", "ʍ%1")
-- in a syllable-final position (i.e. the first position of a syllable coda) /j/ has an allophone :
phonetic = rsub(phonetic, "(" .. vowel_c .. ")j()", "%1i̯%2")
-- also at the beginning of a word before a consonant
phonetic = rsub(phonetic, "#j(" .. consonant_no_w_c .. ")", "#i̯%1")
-- remove old orthographic apostrophe
phonetic = rsub(phonetic, "!", "")
-- stress mark in correct place
-- (1) Put the stress mark before the final consonant of a cluster (if any).
phonetic = rsub(phonetic, "(?*)()", "%2%1")
-- (2) Continue moving it over the rest of an affricate with a tie bar.
phonetic = rsub(phonetic, "(͡)()", "%2%1")
-- (3) Continue moving it over any "permanent onset" clusters (e.g. st, skr, pl, also Cj).
phonetic = rsub(phonetic, "(.)(ʲ?)(" .. consonant_c .. ")(ʲ?)()(" .. consonant_c .. ")",
function(a, aj, b, bj, stress, c)
if perm_syl_onset then
return stress .. a .. aj .. b .. bj .. c
elseif perm_syl_onset or c == "j" then
return a .. aj .. stress .. b .. bj .. c
else
return a .. aj .. b .. bj .. stress .. c
end
end)
-- (4) If we're in the middle of an affricate with a tie bar, continue moving back
-- if the following consonant is /j/, else move forward.
phonetic = rsub(phonetic, "(͡)()(.ʲ?j)", "%2%1%3")
phonetic = rsub(phonetic, "(͡)()(.ʲ?)", "%1%3%2")
-- (5) Move back over any remaining consonants at the beginning of a word.
phonetic = rsub(phonetic, "#(+)()", "#%2%1")
-- (6) Move back over u̯ or i̯ at the beginning of a word.
phonetic = rsub(phonetic, "#(̯)()", "#%2%1")
phonetic = rsub(phonetic, "ʲ?ːʲ", "ʲː")
-- use dark for non-palatal /l/
phonetic = rsub(phonetic, "l()", "ɫ%1")
table.insert(pronuns, phonetic)
end
phonetic = rsub(table.concat(pronuns, " "), "#", "")
if output == "template" then
local ipa = m_IPA.format_IPA_full {
lang = lang,
items = {{ pron = "" }},
}
local anntext
if ann == "1" or ann == "y" then
-- remove secondary stress annotations
anntext = "'''" .. export.remove_pron_notations(origterm, true) .. "''': "
elseif ann then
anntext = "'''" .. ann .. "''': "
else
anntext = ""
end
return anntext .. ipa
else
return phonetic
end
end
return export