local u = mw.ustring.char
local MACRON = u(0x0304)
local DOTABOVE = u(0x0307)
local DOTBELOW = u(0x0323)
local str_gsub, ugsub = string.gsub, mw.ustring.gsub
local UTF8char = '*'
local export = {}
local tab = {
='A', ='a', ='W', ='w', ='E', ='e',
='Jo', ='jo', ='G', ='g', ='D', ='d',
='I', ='i', ='Ī', ='ī', ='J', ='j',
='K', ='k', ='L', ='l', ='M', ='m',
='N', ='n', ='Ŋ', ='ŋ', ='O', ='o',
='P', ='p', ='R', ='r', ='S', ='s',
='T', ='t', ='U', ='u', ='Ū', ='ū',
='F', ='f', ='H', ='h', ='Ç', ='ç',
='I', ='i', ='Ə', ='ə', ='Ju', ='ju',
='Ja', ='ja',
-- non-native letters
='B', ='b', ='Z', ='z', ='Z', ='z',
='C', ='c', ='Ş', ='ş', ='Ş', ='ş', --in literary language ш is only found in Russian words and was originally represented with s, however some dialects have ш in native words
='ʺ', ='ʺ', ="’", ="’"
}
local other = {
{ 'Я', 'Ja' },
{ 'я', 'ja' },
{ 'Ё', 'Jo' },
{ 'ё', 'jo' },
{ 'Ю', 'Ju' },
{ 'ю', 'ju' },
-- Unfortunately the Cyrillic alphabet doesn't distinguish between ʒe and ʒə
{ 'Де', 'Ʒe' },
{ 'де', 'ʒe' },
{ 'Не', 'Ņe' },
{ 'не', 'ņe' },
{ 'Ди', 'Ʒi' },
{ 'ди', 'ʒi' },
{ 'Ни', 'Ņi' },
{ 'ни', 'ņi' },
{ 'Дӣ', 'Ʒī' },
{ 'дӣ', 'ʒī' },
{ 'Нӣ', 'Ņī' },
{ 'нӣ', 'ņī' },
{ 'Дj', 'Ʒ' },
{ 'дj', 'ʒ' },
{ 'Нj', 'Ņ' },
{ 'нj', 'ņ' },
}
function export.tr(text, lang, sc)
for i, replacement in ipairs(other) do
text = str_gsub(text, unpack(replacement))
end
-- е after a vowel or at the beginning of a word becomes ye
-- Again, the Cyrillic alphabet doesn't distinguish between je and jə
text = ugsub(text,
"(?)е",
"%1je")
text = ugsub(text,
"(?)и",
"%1ji")
text = ugsub(text,
"(?)ӣ",
"%1jī")
text = str_gsub(text, "^Е","Je")
text = str_gsub(text, "^е","je")
text = ugsub(text, "()Е","%1Je")
text = ugsub(text, "()е","%1je")
return (str_gsub(text, UTF8char, tab))
end
return export