Module:uk-pronunciation

The following documentation is located at Module:uk-pronunciation/documentation. Categories were auto-generated by Module:module categorization.

Useful links: subpage list • links • transclusions • testcases • sandbox

Diphonemic sequences of consonants that represent a single sound should be spelled with the second component geminated when occurring at morpheme boundary:
дж (dž) → джж (džž): віджи́лий (vidžýlyj) → віджжи́лий (vidžžýlyj)

дз (dz) → дзз (dzz): підзе́мний (pidzémnyj) → підззе́мний (pidzzémnyj)

Testcases

Module:uk-pronunciation/testcases

References

Solomija Buk, Ján Mačutek, Andrij Rovenchak (2008), Some properties of the Ukrainian writing system
Ian Press, Stefan Pugh (2005), Ukrainian: A Comprehensive Grammar, Routledge: London, p. 18-47
М.І. Погрібний (redactor) (1986), Орфоепічний словник, Радянська школа: Київ, p. 3-14
М. А. Жовтобрюх (editor-in-chief), (1973), Українська літературна вимова і наголос: Словник - довідник, Накова думка: Київ, p. 5-14
George Y. Shevelov (1993), Ukrainian, in "The Slavonic languages", Bernard Comrie and Greville G. Corbett (editors), Routledge: London, p. 948-952
Tonia Bilous (2005), Українська мова засобами Міжнародного фонетичного алфавіту

local export = {}

local lang = require("Module:languages").getByCode("uk")

local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local com = require("Module:uk-common")

local lower = m_str_utils.lower
local rfind = m_str_utils.find
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char

local AC = u(0x301)
local GR = u(0x300)

local vowel_no_i = "aɛɪuɔɐoʊe"
local vowel = vowel_no_i .. "i"
local vowel_c = ""
local consonant_no_w = "bdzʒɡɦmnlrpftskxʃj"
local consonant_no_w_c = ""
local consonant = consonant_no_w .. "ʋβ̞wʍ"
local consonant_c = ""
local palatalizable = "tdsznlrbpʋfɡmkɦxʃʒ"
local palatalizable_c = ""

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


function export.remove_pron_notations(text, remove_grave)
	-- Remove grave accents from annotations but maybe not from phonetic respelling
	if remove_grave then
		text = toNFC(rsub(toNFD(text), GR, ""))
	end
	return text
end

	
local perm_syl_onset = m_table.listToSet({
	'spr', 'str', 'skr', 'spl', 'skl',
	'sp', 'st', 'sk', 'sf', 'sx',
	'pr', 'br', 'tr', 'dr', 'kr', 'gr', 'ɦr', 'fr', 'xr',
	'pl', 'bl', 'kl', 'gl', 'ɦl', 'fl', 'xl',
})


function export.pronunciation(text, allow_unstressed, output, ann)
	if type(text) == "table" then
		local iparams = {
			 = {},
		}
		local params = {
			 = {},
			 = {type = "boolean"},
			 = {},
			 = {},
		}
		local iargs = require("Module:parameters").process(text.args, iparams)
		local args = require("Module:parameters").process(text:getParent().args, params)
		text, allow_unstressed, output, ann = args, args.allow_unstressed, iargs.output, args.ann
	end
	
	if not text then
		text = mw.title.getCurrentTitle().text
	end

	--	Returns an error if the text contains alphabetic characters that are not Cyrillic.
	require("Module:script utilities").checkScript(text, "Cyrl")

	local origterm = text
	-- Lowercase and decompose ѐ and ѝ into letter + accent char
	text = lower(com.decompose_grave(text))

	if not allow_unstressed and com.needs_accents(text) then
		error("Multisyllabic words that are not prefixes or suffixes must have an acute accent marking the stress, unless allow_unstressed=1 is given: " .. text)
	end

	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, '%s*%s*', ' | ')

	-- canonicalize multiple spaces
	text = rsub(text, '%s+', ' ')

	local phonetic_chars_map = {
	
		-- single characters that map to IPA sounds; these are processed last
		 = {
			 = "a",	 = "b",	 = "ʋ",	 = "ɦ",	 = "ɡ", 
			 = "d",	 = "ɛ",	 = "jɛ",	 = "ʒ",	 = "z", 
			 = "ɪ",	 = "i",	 = "ji",	 = "j",	 = "k", 
			 = "l",	 = "m",	 = "n",	 = "ɔ",	 = "p", 
			 = "r",	 = "s",	 = "t",	 = "u",	 = "f", 
			 = "x",	 = "t͡s",	 = "t͡ʃ",	 = "ʃ",	 = "ʃt͡ʃ", 
			 = "ʲ",	 = "ju",	 = "ja",	 = "j",
			-- accented vowels
			 = "ˈ",  = "ˌ",
		},
	
		-- character sequences of two that map to IPA sounds
		 = {
			 = "d͡ʒ",	 = "d͡z",
		-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
			 = "d͡zs",    = "d͡ʒʃ",    = "d͡ʒt͡ʃ",  = "d͡zt͡s",
			 = "t͡s",	 = "t͡ʃʃ",    = "t͡ʃː",  = "t͡sː", 
		},
	
		-- character sequences of three that map to IPA sounds
		 = {
			 = "d͡zʲ", 
		-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
			 = "t͡sʲː"
		},
	}
	
	local pronuns = {}
	-- FIXME, not completely correct, we need to treat hyphens at beginning and end of
	-- a word as indicating unstressed pronunciation.
	for _, phonetic in ipairs(rsplit(text, "+")) do
		phonetic = "#" .. phonetic .. "#"
		local orthographic_replacements = {
			-- first apply consonant cluster simplifications that always occur orthographically
			 = "ньськ",
			 = "ськ",
			 = "нст",
			 = "шч",
			 = "зд",
			 = "сː",
			 = "ськ",
			 = "#шː",
			 = "жш",
			 = "#шч",
			 = "жч",
		
			-- then long consonants that are orthographically geminated.
			)%1"			] = "%1ː",
			+)жж"			] = "%1жː", -- джж sequence encode diphonemic дж
			+)зз"			] = "%1зː", -- дзз sequence encode diphonemic дз
			)%1"] = "%1ː",
			 = "джː",
			 = "дзː",
		}
		
		for regex, replacement in pairs(orthographic_replacements) do
			phonetic = rsub(phonetic, regex, replacement)
		end
		
		-- remap apostrophe to '!' so that it doesn't conflict with IPA stress mark
		phonetic = rsub(phonetic, "'", "!")
		
		-- replace multiple letter sequences
		for _, replacements in ipairs(phonetic_chars_map) do
			for key, replacement in pairs(replacements) do
				phonetic = rsub(phonetic, key, replacement)
			end
		end
		
		-- йо never palatalizes and does not need an apostrophe
		phonetic = rsub(phonetic, "jɔ", "!jɔ")

		-- move stress mark, added by phonetic_chars_map, before vowel
		phonetic = rsub(phonetic, "()()", "%2%1")

		-- add accent if the word is monosyllabic and not allow_unstressed,
		-- so that monosyllabic words without explicit stress marks get stressed
		-- vowel allophones; we use a different character from the regular
		-- primary stress mark so we can later remove it without affecting
		-- explicitly user-added accents on monosyllabic words, as in нема́ за́ що.
		local _, numberOfVowels = rsubn(phonetic, "", "")
		if (numberOfVowels == 1) and not allow_unstressed then
			phonetic = rsub(phonetic, "()", "⁀%1")
		end
		
		-- palatalizable consonants before /i/ or /j/ become palatalized
		phonetic = rsub(phonetic, "(" .. palatalizable_c .. ")(?)(?)i", "%1ʲ%2%3i")
		phonetic = rsub(phonetic, "(" .. palatalizable_c .. ")(?)j", "%1ʲ%2")

		-- eliminate garbage sequences of  resulting from -тьс- cluster followed by 
		phonetic = rsub(phonetic, "ʲːj", "ʲː")

		-- consonant simplification: ст + ц' → . We do it here because of palatalization.
		-- Due to the т +ц →  rule length is present. According to Орфоепскі словник p. 13,
		-- both forms are proper, without length in normal (colloquial) speech and with length
		-- in slow speech, so we parenthesize the length as optional.
		phonetic = rsub(phonetic, "st͡sʲ", "sʲt͡sʲ")
		phonetic = rsub(phonetic, "st͡sʲː", "sʲt͡sʲ(ː)")
		
		-- assimilation: voiceless + voiced = voiced + voiced
		-- should /ʋ/ be included as voiced? Орфоепічний словник doesn't voice initial cluster of шв (p. 116)
		local voiced_obstruent = ""
		local voicing = {
			 = "b",
			 = "v",
			 = "d",
			 = "dʲ",
			 = "z",
			 = "zʲ",
			 = "ʒ",
			 = "ɡ",
			 = "ɦ",
			 = "d͡z",
			 = "d͡zʲ",
			 = "d͡ʒ",
			 = "ʒd͡ʒ",
		}
		for voiceless, voiced in pairs(voicing) do
			phonetic = rsub(phonetic, voiceless .. "(" .. voiced_obstruent .. "+)", voiced .. "%1")
		end

		-- In the sequence of two consonants, of which the second is soft, the first is pronounced soft too
		-- unless the first consonant is a labial, namely б, п, в, ф, м.
		phonetic = rsub(phonetic, "()(.)ʲ", "%1ʲ%2ʲ")
		phonetic = rsub(phonetic, "()t͡sʲ", "%1ʲt͡sʲ")
		phonetic = rsub(phonetic, "()d͡zʲ", "%1ʲd͡zʲ")
		phonetic = rsub(phonetic, "t͡s(.)ʲ", "t͡sʲ%1ʲ")
		phonetic = rsub(phonetic, "d͡z(.)ʲ", "d͡zʲ%1ʲ")
		phonetic = rsub(phonetic, "d͡zt͡sʲ", "d͡zʲt͡sʲ")
		phonetic = rsub(phonetic, "t͡sd͡zʲ", "t͡sʲd͡zʲ")

		-- Hushing consonants ж, ч, ш assimilate to the following hissing consonants, giving a long hissing consonant:
		--  +  → ,  +  → ,  +  → ,  +  → 
		phonetic = rsub(phonetic, "ʒt͡sʲ", "zʲt͡sʲ")
		phonetic = rsub(phonetic, "t͡ʃt͡sʲ", "t͡sʲː")
		phonetic = rsub(phonetic, "ʃt͡sʲ", "sʲt͡sʲ")
		phonetic = rsub(phonetic, "ʃsʲ", "sʲː")

		-- Hissing consonants before hushing consonants within a word assimilate - on зш and зч word-initially and 
		-- word-medially see above.
		--  +  → ,   +  → ,  +  → 
		--  +  → 
		phonetic = rsub(phonetic, "zʒ", "ʒː")
		phonetic = rsub(phonetic, "sʃ", "ʃː")
		phonetic = rsub(phonetic, "zt͡ʃ", "ʒt͡ʃ")
		phonetic = rsub(phonetic, "zd͡ʒ", "ʒd͡ʒ")
		phonetic = rsub(phonetic, "t͡ʒ", "d͡ʒ")
		phonetic = rsub(phonetic, "t͡z", "d͡z")
		
		-- cleanup: excessive palatalization: CʲCʲCʲ → CCʲCʲ
		phonetic = rsub(phonetic, "(+)ʲ(+)ʲ(+)ʲ", "%1%2ʲ%3ʲ")

		-- unstressed /a/ has an allophone 
		phonetic = rsub(phonetic, "()a", "%1ɐ")
		-- unstressed /u/ has an allophone 
		phonetic = rsub(phonetic, "()u", "%1ʊ")
		-- unstressed /ɔ/ has by assimilation an allophone  before a stressed syllable with /u/ or /i/
		phonetic = rsub(phonetic, "ɔ(+)()", "o%1%2")
		-- one allophone  covers unstressed /ɛ/ and /ɪ/
		phonetic = rsub(phonetic, "()", "%1e")

		-- Remove the monosyllabic stress we auto-added to ensure that vowels in
		-- monosyllabic words get stressed allophones. Do this before vocalizing
		-- /ʋ/ and /j/. NOTE: Nothing below should depend on stress marks being
		-- present.
		phonetic = rsub(phonetic, "⁀", "")

		-- /ʋ/ has an allophone  in a syllable coda
		phonetic = rsub(phonetic, "(" .. vowel_c .. ")ʋ()", "%1u̯%2")
		-- /ʋ/ has an allophone  before /ɔ, u/ and voiced consonants (not after a vowel;  before vowel already converted)
		phonetic = rsub(phonetic, "ʋ(?)", "w%1")
		-- /ʋ/ has an allophone  before remaining vowels besides /i/
		-- Not sure whether this looks good.
		-- phonetic = rsub(phonetic, "ʋ(*)", "β̞%1")
		-- /ʋ/ has an allophone  before before voiceless consonants (not after a vowel;  before vowel already converted)
		phonetic = rsub(phonetic, "ʋ()", "ʍ%1")

		-- in a syllable-final position (i.e. the first position of a syllable coda) /j/ has an allophone :
		phonetic = rsub(phonetic, "(" .. vowel_c .. ")j()", "%1i̯%2")
		-- also at the beginning of a word before a consonant
		phonetic = rsub(phonetic, "#j(" .. consonant_no_w_c .. ")", "#i̯%1")
	 
		-- remove old orthographic apostrophe
		phonetic = rsub(phonetic, "!", "")
		-- stress mark in correct place
		-- (1) Put the stress mark before the final consonant of a cluster (if any).
		phonetic = rsub(phonetic, "(?*)()", "%2%1")
		-- (2) Continue moving it over the rest of an affricate with a tie bar.
		phonetic = rsub(phonetic, "(͡)()", "%2%1")
		-- (3) Continue moving it over any "permanent onset" clusters (e.g. st, skr, pl, also Cj).
		phonetic = rsub(phonetic, "(.)(ʲ?)(" .. consonant_c .. ")(ʲ?)()(" .. consonant_c .. ")",
			function(a, aj, b, bj, stress, c)
				if perm_syl_onset then
					return stress .. a .. aj .. b .. bj .. c
				elseif perm_syl_onset or c == "j" then
					return a .. aj .. stress .. b .. bj .. c
				else
					return a .. aj .. b .. bj .. stress .. c
				end
			end)
		-- (4) If we're in the middle of an affricate with a tie bar, continue moving back
		--     if the following consonant is /j/, else move forward.
		phonetic = rsub(phonetic, "(͡)()(.ʲ?j)", "%2%1%3")
		phonetic = rsub(phonetic, "(͡)()(.ʲ?)", "%1%3%2")
		-- (5) Move back over any remaining consonants at the beginning of a word.
		phonetic = rsub(phonetic, "#(+)()", "#%2%1")
		-- (6) Move back over u̯ or i̯ at the beginning of a word.
		phonetic = rsub(phonetic, "#(̯)()", "#%2%1")

		phonetic = rsub(phonetic, "ʲ?ːʲ", "ʲː")

		-- use dark  for non-palatal /l/
		phonetic = rsub(phonetic, "l()", "ɫ%1")

		table.insert(pronuns, phonetic)
	end

	phonetic = rsub(table.concat(pronuns, " "), "#", "")
	
	if output == "template" then
		local ipa = m_IPA.format_IPA_full {
			lang = lang,
			items = {{ pron = "" }},
		}
		local anntext
		if ann == "1" or ann == "y" then
			-- remove secondary stress annotations
			anntext = "'''" .. export.remove_pron_notations(origterm, true) .. "''':&#32;"
		elseif ann then
			anntext = "'''" .. ann .. "''':&#32;"
		else
			anntext = ""
		end

		return anntext .. ipa
	else
		return phonetic
	end
end

return export

Module:uk-pronunciation

Testcases

References

Wikious

Boobota

Sagapedia