Module:be-pronunciation

The following documentation is located at Module:be-pronunciation/documentation. Categories were auto-generated by Module:module categorization.
Useful links: subpage list • links • transclusions • testcases • sandbox
This module automatically converts Belarusian orthography to a phonetic transcription in the International Phonetic Alphabet.
Testcases

Module:be-pronunciation/testcases
References

2017, В.П. Русак , Арфаэпічны слоўнік беларускай мовы, p. 4 (online version for word lookup)
local export = {}

local m_table = require("Module:table")

local u = require("Module:string/char")
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local usub = mw.ustring.sub
local ulen = mw.ustring.len

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply function repeatedly until no change
local function do_sub_repeatedly(term, fun)
	while true do
		local new_term = fun(term)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local grave = u(0x300)
local acute = u(0x301)
local stress = u(0x2C8)
local secondary_stress = u(0x2CC)
local tie = u(0x361)

local correspondences = {
	 = "a",
	 = "b",
	 = "v",
	 = "ɣ",
	 = "ɡ",
	 = "d",
	 = "d" .. tie .. "z",
	 = "d" .. tie .. "ʐ",
	 = "ʲe",	-- or ɛ
	 = "ʲo",
	 = "ʐ",
	 = "z",
	 = "ʲi",
	 = "j",
	 = "k",
	 = "l",
	 = "m",
	 = "n",
	 = "o",	-- or ɔ
	 = "p",
	 = "r",
	 = "s",
	 = "t",
	 = "u",
	 = "w",
	 = "f",
	 = "x",
	 = "t" .. tie .. "s",
	 = "t" .. tie .. "ʂ",
	 = "ʂ",
	 = "ɨ",
	 = "ʲ",
	 = "ɛ",
	 = "ʲu",
	 = "ʲa",
	 = stress,
	 = secondary_stress,
	-- Space
	 = " ",
	-- Apostrophes
	 = "j",
	 = "j",
	 = "j"
}

local devoicing = {
	 = 'p',  = 't',  = 'k',
	 = 's',  = 'ʂ',  = 'x'
}

local voicing = {
	 = 'b',  = 'd',  = 'ɡ',
	 = 'z',  = 'ʐ',  = 'ɣ',
	 = 'v'
}

local vowel = "aeɛiɨou"
local vowel_c = ""
local consonant = "jmnlrvwbdzʐɡɣpftskxʂ"
local consonant_c = ""

local accent = stress .. secondary_stress
local accent_c = ""

local perm_syl_onset = m_table.listToSet({
	'spr', 'str', 'skr', 'spl', 'skl',
	'sp', 'st', 'sk', 'sf', 'sx', 'sl', 'sm', 'sn',
	-- WARNING, IPA ɡ used in the next two lines (and throughout this module)
	'pr', 'br', 'tr', 'dr', 'kr', 'ɡr', 'ɣr', 'fr', 'xr',
	'pl', 'bl', 'kl', 'ɡl', 'ɣl', 'fl', 'xl',
})


local function move_stress(transcription)
	-- The following logic for placing the stress mark on a syllable boundary is copied from
	-- ].
	-- (1) Put the stress mark before the final consonant of a cluster (if any).
	transcription = rsub(transcription, "(?*" .. vowel_c .. ")(" .. accent_c .. ")", "%2%1")
	-- (2) Continue moving it over the rest of an affricate with a tie bar.
	transcription = rsub(transcription, "(͡)(" .. accent_c .. ")", "%2%1")
	-- (3) Continue moving it over any "permanent onset" clusters (e.g. st, skr, pl, also Cj).
	transcription = rsub(transcription, "(.)(ʲ?)(" .. consonant_c .. ")(ʲ?)(" .. accent_c .. ")(" .. consonant_c .. ")",
		function(a, aj, b, bj, stress, c)
			if perm_syl_onset then
				return stress .. a .. aj .. b .. bj .. c
			elseif perm_syl_onset or c == "j" then
				return a .. aj .. stress .. b .. bj .. c
			else
				return a .. aj .. b .. bj .. stress .. c
			end
		end)
	-- (4) If we're in the middle of an affricate with a tie bar, continue moving back
	--     if the following consonant is /j/, else move forward.
	transcription = rsub(transcription, "(͡)(" .. accent_c .. ")(.ʲ?j)", "%2%1%3")
	transcription = rsub(transcription, "(͡)(" .. accent_c .. ")(.ʲ?)", "%1%3%2")
	-- (5) Move back over any remaining consonants at the beginning of a word.
	transcription = rsub(transcription, "#(+)(" .. accent_c .. ")", "#%2%1")
	-- (6) Move back over u̯ or i̯ at the beginning of a word.
	transcription = rsub(transcription, "#(̯)(" .. accent_c .. ")", "#%2%1")
	return transcription
end

local function assimilate_voicing(transcription)
	return do_sub_repeatedly(transcription, function(text)
		text = rsub(text, "()(*)", function(a, b)
			return devoicing .. b end)
		text = rsub(text, "()(*v?*)", function(a, b)
			return voicing .. b end)
		return text
	end)
end

local function assimilate_sibilants(transcription)
	return rsub_repeatedly(transcription, "(?" .. tie .. "?)()", "%2%1%2")
end

-- Can probably be simplified
local function assimilate_palatals(transcription)
	return do_sub_repeatedly(transcription, function(text)
		text = rsub(text, "()%1ʲ", "%1ʲ%1ʲ")
		text = rsub(text, "()j", "%1ʲj")
		text = rsub(text, "()(" .. accent_c .. "?" .. tie .. "ʲ)", "%1ʲ%2")
		text = rsub(text, "()()ʲ", "%1ʲ%2ʲ")

		-- No assimilation in a final, non-initial syllable
		text = rsub_repeatedly(text, "()()ʲ(*" .. vowel_c .. "*" .. vowel_c .. ")", "%1ʲ%2ʲ%3")
		text = rsub(text, "#(*)()()ʲ", "%1%2ʲ%3ʲ")

		text = rsub(text, "(" .. tie .. ")vʲ", "%1ʲvʲ")
		text = rsub(text, "tsʲ", "t" .. tie .. "sʲsʲ")
		text = rsub(text, "dzʲ", "d" .. tie .. "zʲzʲ")
		text = rsub(text, "tt" .. tie .. "sʲ", "t" .. tie .. "sʲt" .. tie .. "sʲ")
		text = rsub(text, "dd" .. tie .. "zʲ", "d" .. tie .. "zʲd" .. tie .. "zʲ")
		return text
	end)
end

local function convert(text)
	-- convert commas and em/en dashes to IPA foot boundaries
	text = rsub(text, '%s*%s*', ' | ')
	-- convert hyphen to space
	text = rsub(text, "%-", " ")
	-- canonicalize spaces
	text = rsub(text, "%s+", " ")
	text = rsub(text, "^%s", "")
	text = rsub(text, "%s$", "")
	local working_string = mw.ustring.lower(text)
	local IPA = {}
	while ulen(working_string) > 0 do
		local IPA_letter
		
		local letter = usub(working_string, 1, 1)
		local twoletters = usub(working_string, 1, 2) or ""
		
		if correspondences then
			IPA_letter = correspondences
			working_string = usub(working_string, 3)
		else
			IPA_letter = correspondences or letter
			working_string = usub(working_string, 2)
		end
		
		table.insert(IPA, IPA_letter)
	end
	IPA = table.concat(IPA)

	-- Mark word boundaries
	IPA = rsub(IPA, "(%s+)", "#%1#")
	IPA = "#" .. IPA .. "#"

	-- Change ʲ to j between vowels or after another ʲ.
	IPA = rsub_repeatedly(IPA, "(" .. accent_c .. "?)ʲ(" .. vowel_c .. ")", "%1j%2")
	IPA = rsub(IPA, "jʲ", "j")

	-- /г/ is a stop in /зг/, /жг/
	IPA = rsub(IPA, "()ɣ", "%1ɡ")

	-- Mark stress
	IPA = rsub_repeatedly(IPA, "(#*)o(**#)", "%1o" .. stress .. "%2")
	IPA = rsub_repeatedly(IPA, "(#**)o(*#)", "%1o" .. stress .. "%2")

	-- Syllable-final /в/ is 
	IPA = rsub_repeatedly(IPA, "(+)w()", "%1u̯%2")

	return IPA
end

function export.toIPA(term)
	--	Returns an error if the word contains alphabetic characters that are not Cyrillic.
	require("Module:script utilities").checkScript(term, "Cyrl")
	
	IPA = convert(term)

	-- Voicing assimilation
	IPA = assimilate_voicing(IPA)

	-- Sibilant assimilation
	IPA = assimilate_sibilants(IPA)

	-- Palatal assimilation
	IPA = assimilate_palatals(IPA)
	
	-- Soft and hard /л/
	IPA = rsub(IPA, "l()", "ɫ%1")

	-- Convert identical consonant sequences to geminates
	IPA = rsub(IPA, "(" .. tie .. "ʲ?)%1", "%1ː")
	IPA = rsub_repeatedly(IPA, "()(ʲ?)%2", "%1%2ː")

	IPA = move_stress(IPA)

	-- Remove #s
	IPA = rsub(IPA, "#", "")

	return IPA
end

function export.remove_pron_notations(text, remove_grave)
	-- Remove grave accents from annotations but maybe not from phonetic respelling
	if remove_grave then
		text = mw.ustring.toNFC(rsub(mw.ustring.toNFD(text), grave, ""))
	end
	return text
end

function export.show(frame)
	local params = {
		 = {},
		 = {},
	}
	
	local title = mw.title.getCurrentTitle()
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args or title.nsText == "Template" and "пры́клад" or title.text
	
	local IPA = export.toIPA(term)
	
	IPA = ""
	IPA = require("Module:IPA").format_IPA_full {
		lang = require("Module:languages").getByCode("be"),
		items = {{ pron = IPA }},
	}
	
	local anntext
	if args.ann == "1" or args.ann == "y" then
		-- remove secondary stress annotations
		anntext = "'''" .. export.remove_pron_notations(term, true) .. "''':&#32;"
	elseif args.ann then
		anntext = "'''" .. args.ann .. "''':&#32;"
	else
		anntext = ""
	end

	return anntext .. IPA
end

return export
Module:be-pronunciation

Testcases

References

Wikious

Boobota

Sagapedia