This module automatically converts Belarusian orthography to a phonetic transcription in the International Phonetic Alphabet.
local export = {}
local m_table = require("Module:table")
local u = require("Module:string/char")
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local usub = mw.ustring.sub
local ulen = mw.ustring.len
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply function repeatedly until no change
local function do_sub_repeatedly(term, fun)
while true do
local new_term = fun(term)
if new_term == term then
return term
end
term = new_term
end
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
local grave = u(0x300)
local acute = u(0x301)
local stress = u(0x2C8)
local secondary_stress = u(0x2CC)
local tie = u(0x361)
local correspondences = {
= "a",
= "b",
= "v",
= "ɣ",
= "ɡ",
= "d",
= "d" .. tie .. "z",
= "d" .. tie .. "ʐ",
= "ʲe", -- or ɛ
= "ʲo",
= "ʐ",
= "z",
= "ʲi",
= "j",
= "k",
= "l",
= "m",
= "n",
= "o", -- or ɔ
= "p",
= "r",
= "s",
= "t",
= "u",
= "w",
= "f",
= "x",
= "t" .. tie .. "s",
= "t" .. tie .. "ʂ",
= "ʂ",
= "ɨ",
= "ʲ",
= "ɛ",
= "ʲu",
= "ʲa",
= stress,
= secondary_stress,
-- Space
= " ",
-- Apostrophes
= "j",
= "j",
= "j"
}
local devoicing = {
= 'p', = 't', = 'k',
= 's', = 'ʂ', = 'x'
}
local voicing = {
= 'b', = 'd', = 'ɡ',
= 'z', = 'ʐ', = 'ɣ',
= 'v'
}
local vowel = "aeɛiɨou"
local vowel_c = ""
local consonant = "jmnlrvwbdzʐɡɣpftskxʂ"
local consonant_c = ""
local accent = stress .. secondary_stress
local accent_c = ""
local perm_syl_onset = m_table.listToSet({
'spr', 'str', 'skr', 'spl', 'skl',
'sp', 'st', 'sk', 'sf', 'sx', 'sl', 'sm', 'sn',
-- WARNING, IPA ɡ used in the next two lines (and throughout this module)
'pr', 'br', 'tr', 'dr', 'kr', 'ɡr', 'ɣr', 'fr', 'xr',
'pl', 'bl', 'kl', 'ɡl', 'ɣl', 'fl', 'xl',
})
local function move_stress(transcription)
-- The following logic for placing the stress mark on a syllable boundary is copied from
-- ].
-- (1) Put the stress mark before the final consonant of a cluster (if any).
transcription = rsub(transcription, "(?*" .. vowel_c .. ")(" .. accent_c .. ")", "%2%1")
-- (2) Continue moving it over the rest of an affricate with a tie bar.
transcription = rsub(transcription, "(͡)(" .. accent_c .. ")", "%2%1")
-- (3) Continue moving it over any "permanent onset" clusters (e.g. st, skr, pl, also Cj).
transcription = rsub(transcription, "(.)(ʲ?)(" .. consonant_c .. ")(ʲ?)(" .. accent_c .. ")(" .. consonant_c .. ")",
function(a, aj, b, bj, stress, c)
if perm_syl_onset then
return stress .. a .. aj .. b .. bj .. c
elseif perm_syl_onset or c == "j" then
return a .. aj .. stress .. b .. bj .. c
else
return a .. aj .. b .. bj .. stress .. c
end
end)
-- (4) If we're in the middle of an affricate with a tie bar, continue moving back
-- if the following consonant is /j/, else move forward.
transcription = rsub(transcription, "(͡)(" .. accent_c .. ")(.ʲ?j)", "%2%1%3")
transcription = rsub(transcription, "(͡)(" .. accent_c .. ")(.ʲ?)", "%1%3%2")
-- (5) Move back over any remaining consonants at the beginning of a word.
transcription = rsub(transcription, "#(+)(" .. accent_c .. ")", "#%2%1")
-- (6) Move back over u̯ or i̯ at the beginning of a word.
transcription = rsub(transcription, "#(̯)(" .. accent_c .. ")", "#%2%1")
return transcription
end
local function assimilate_voicing(transcription)
return do_sub_repeatedly(transcription, function(text)
text = rsub(text, "()(*)", function(a, b)
return devoicing .. b end)
text = rsub(text, "()(*v?*)", function(a, b)
return voicing .. b end)
return text
end)
end
local function assimilate_sibilants(transcription)
return rsub_repeatedly(transcription, "(?" .. tie .. "?)()", "%2%1%2")
end
-- Can probably be simplified
local function assimilate_palatals(transcription)
return do_sub_repeatedly(transcription, function(text)
text = rsub(text, "()%1ʲ", "%1ʲ%1ʲ")
text = rsub(text, "()j", "%1ʲj")
text = rsub(text, "()(" .. accent_c .. "?" .. tie .. "ʲ)", "%1ʲ%2")
text = rsub(text, "()()ʲ", "%1ʲ%2ʲ")
-- No assimilation in a final, non-initial syllable
text = rsub_repeatedly(text, "()()ʲ(*" .. vowel_c .. "*" .. vowel_c .. ")", "%1ʲ%2ʲ%3")
text = rsub(text, "#(*)()()ʲ", "%1%2ʲ%3ʲ")
text = rsub(text, "(" .. tie .. ")vʲ", "%1ʲvʲ")
text = rsub(text, "tsʲ", "t" .. tie .. "sʲsʲ")
text = rsub(text, "dzʲ", "d" .. tie .. "zʲzʲ")
text = rsub(text, "tt" .. tie .. "sʲ", "t" .. tie .. "sʲt" .. tie .. "sʲ")
text = rsub(text, "dd" .. tie .. "zʲ", "d" .. tie .. "zʲd" .. tie .. "zʲ")
return text
end)
end
local function convert(text)
-- convert commas and em/en dashes to IPA foot boundaries
text = rsub(text, '%s*%s*', ' | ')
-- convert hyphen to space
text = rsub(text, "%-", " ")
-- canonicalize spaces
text = rsub(text, "%s+", " ")
text = rsub(text, "^%s", "")
text = rsub(text, "%s$", "")
local working_string = mw.ustring.lower(text)
local IPA = {}
while ulen(working_string) > 0 do
local IPA_letter
local letter = usub(working_string, 1, 1)
local twoletters = usub(working_string, 1, 2) or ""
if correspondences then
IPA_letter = correspondences
working_string = usub(working_string, 3)
else
IPA_letter = correspondences or letter
working_string = usub(working_string, 2)
end
table.insert(IPA, IPA_letter)
end
IPA = table.concat(IPA)
-- Mark word boundaries
IPA = rsub(IPA, "(%s+)", "#%1#")
IPA = "#" .. IPA .. "#"
-- Change ʲ to j between vowels or after another ʲ.
IPA = rsub_repeatedly(IPA, "(" .. accent_c .. "?)ʲ(" .. vowel_c .. ")", "%1j%2")
IPA = rsub(IPA, "jʲ", "j")
-- /г/ is a stop in /зг/, /жг/
IPA = rsub(IPA, "()ɣ", "%1ɡ")
-- Mark stress
IPA = rsub_repeatedly(IPA, "(#*)o(**#)", "%1o" .. stress .. "%2")
IPA = rsub_repeatedly(IPA, "(#**)o(*#)", "%1o" .. stress .. "%2")
-- Syllable-final /в/ is
IPA = rsub_repeatedly(IPA, "(+)w()", "%1u̯%2")
return IPA
end
function export.toIPA(term)
-- Returns an error if the word contains alphabetic characters that are not Cyrillic.
require("Module:script utilities").checkScript(term, "Cyrl")
IPA = convert(term)
-- Voicing assimilation
IPA = assimilate_voicing(IPA)
-- Sibilant assimilation
IPA = assimilate_sibilants(IPA)
-- Palatal assimilation
IPA = assimilate_palatals(IPA)
-- Soft and hard /л/
IPA = rsub(IPA, "l()", "ɫ%1")
-- Convert identical consonant sequences to geminates
IPA = rsub(IPA, "(" .. tie .. "ʲ?)%1", "%1ː")
IPA = rsub_repeatedly(IPA, "()(ʲ?)%2", "%1%2ː")
IPA = move_stress(IPA)
-- Remove #s
IPA = rsub(IPA, "#", "")
return IPA
end
function export.remove_pron_notations(text, remove_grave)
-- Remove grave accents from annotations but maybe not from phonetic respelling
if remove_grave then
text = mw.ustring.toNFC(rsub(mw.ustring.toNFD(text), grave, ""))
end
return text
end
function export.show(frame)
local params = {
= {},
= {},
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args or title.nsText == "Template" and "пры́клад" or title.text
local IPA = export.toIPA(term)
IPA = ""
IPA = require("Module:IPA").format_IPA_full {
lang = require("Module:languages").getByCode("be"),
items = {{ pron = IPA }},
}
local anntext
if args.ann == "1" or args.ann == "y" then
-- remove secondary stress annotations
anntext = "'''" .. export.remove_pron_notations(term, true) .. "''': "
elseif args.ann then
anntext = "'''" .. args.ann .. "''': "
else
anntext = ""
end
return anntext .. IPA
end
return export