This module will transliterate text in the Old Cyrillic script. It is used to transliterate Old Church Slavonic, Old East Slavic, Bulgar, Old Novgorodian, and Church Slavonic.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:Cyrs-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local export = {}
local numbers = mw.loadData("Module:Cyrs-translit/numbers")
local ugsub = mw.ustring.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local U = mw.ustring.char
local umatch = mw.ustring.match
local usub = mw.ustring.sub
local ulower = mw.ustring.lower
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x302)
local palatalization = U(0x0484)
local titlo = U(0x0483)
local dasia = U(0x0485)
local psili = U(0x0486)
local vzmet = U(0xA66F)
local breathing = psili .. dasia
local accent = "*"
local vowels = "aAeEiIoOuUyY"
local vowel_or_soft = ""
local common_letters = {
= 'A', = 'a',
= 'B', = 'b',
= 'V', = 'v',
= 'G', = 'g',
= 'D', = 'd',
= 'E', = 'e',
= 'Ž', = 'ž',
= 'Dz', = 'dz',
= 'Z', = 'z',
= 'I', = 'i',
= 'I', = 'i', -- Contrastive with "И".
= 'J', = 'j',
= 'Đ', = 'đ',
= 'K', = 'k',
= 'L', = 'l',
= 'M', = 'm',
= 'N', = 'n',
= 'O', = 'o',
= 'P', = 'p',
= 'R', = 'r',
= 'S', = 's',
= 'T', = 't',
= 'U', = 'u',
= 'U', = 'u',
= 'F', = 'f',
= 'X', = 'x',
= 'O', = 'o', -- Contrastive with "О".
= 'Ot', = 'ot', -- Becomes "otŭ" as appropriate.
= 'Ô', = 'ô',
= 'C', = 'c',
= 'Ć', = 'ć', -- From a merger of "Ц" and "Ч" in Old Novgorodian.
= 'Č', = 'č',
= 'Š', = 'š',
= 'Št', = 'št',
= 'Ŭ', = 'ŭ',
= 'Y', = 'y',
= 'Ĭ', = 'ĭ',
= 'Ě', = 'ě',
= 'Jě', = 'jě',
= 'Ja', = 'ja',
= 'Je', = 'je',
= 'Ju', = 'ju',
= 'Ǫ', = 'ǫ',
= 'Jǫ', = 'jǫ',
= 'Ę', = 'ę',
= 'Ję', = 'ję',
= 'Ks', = 'ks',
= 'Ps', = 'ps',
= 'Θ', = 'θ',
= 'Ü', = 'ü',
= 'Ü', = 'ü', -- Contrastive with "Ѵ".
= 'Q', = 'q',
}
local variants = {
= 'в',
= 'Г', = 'г',
= 'д',
= 'Д' .. palatalization, = 'д' .. palatalization,
= 'Е', = 'е',
= 'Е', = 'е',
= 'Ѕ', = 'ѕ',
= 'Ѕ', = 'ѕ',
= 'З', = 'з',
= 'И', = 'и',
= 'І', = 'і',
= 'І', = 'і',
= 'Л' .. palatalization, = 'л' .. palatalization,
= 'М' .. palatalization, = 'м' .. palatalization,
= 'Н' .. palatalization, = 'н' .. palatalization,
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'о',
= 'о',
= 'с',
= 'т',
= 'т',
= 'Ꙋ', = 'ꙋ', = 'ꙋ',
= 'Ѡ', = 'ѡ',
= 'Ꙑ', = 'ꙑ',
= 'ъ',
= 'ѣ',
= 'Ꙗ', = 'ꙗ',
= 'Ю', = 'ю',
= 'Ѫ', = 'ѫ',
= 'Ѧ', = 'ѧ',
= 'Ѩ', = 'ѩ',
}
-- Letters converted to their iotated equivalents when word-initial.
local common_iotated_initial = {
= 'Ꙓ', = 'ꙓ',
}
-- Letters converted to their iotated equivalents after vowels or a
-- palatalization mark.
local common_iotated_after_vowel_or_soft = {
= 'Ѥ', = 'ѥ',
= 'Ꙓ', = 'ꙓ',
= 'Ѩ', = 'ѩ',
}
local lang_letters = {}
local lang_iotated_initial = {}
local lang_iotated_after_vowel_or_soft = {}
local uo_is_u = {}
-- Old East Slavic
lang_letters = setmetatable({
= 'Šč', = 'šč',
}, {__index = common_letters})
lang_iotated_initial = setmetatable({
= 'Ѥ', = 'ѥ',
= 'Ѩ', = 'ѩ',
}, {__index = common_iotated_initial})
-- Old Novgorodian
lang_letters = setmetatable({
= 'Ć', = 'ć',
= 'Ć', = 'ć',
= 'Ść', = 'ść',
}, {__index = common_letters})
lang_iotated_initial = lang_iotated_initial
uo_is_u = true
-- Old Pskovian
lang_letters = setmetatable({ -- In addition to zle-ono above.
= 'Ź', = 'ź',
= 'Dź', = 'dź',
= 'Ź', = 'ź',
= 'Ś', = 'ś',
= 'Ś', = 'ś',
= 'Śk', = 'śk',
= 'Ӕ', = 'æ',
= 'Jæ', = 'jæ',
}, {__index = lang_letters})
lang_iotated_initial = setmetatable({ -- In addition to zle-ono above.
= 'Ѭ', = 'ѭ',
}, {__index = lang_iotated_initial})
lang_iotated_after_vowel_or_soft = setmetatable({
= 'Ѭ', = 'ѭ',
}, {__index = common_iotated_after_vowel_or_soft})
uo_is_u = true
local function handle_v(prev, v)
return prev .. (v == "Ѵ" and "В" or "в")
end
local function handle_ou(o, ac)
return (ulower(o) == o and "у" or "У") .. ac
end
local function handle_breathing(vowel, br)
-- Don't mark smooth breathing.
if br == psili then
return vowel
end
-- Mark rough breathing with "h".
local vowel_lower = ulower(vowel)
return (vowel_lower == vowel and "h" or "H") .. vowel_lower
end
function export.tr(text, lang, sc)
if not sc then
sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode()
end
if sc ~= "Cyrs" then
return nil
end
local input = text
-- Decompose any acute and grave accents.
text = ugsub(toNFD(text), "+", toNFC)
-- Canonicalize any variants.
text = text:gsub(".*", variants)
-- Transliterate the palatalization mark as prime.
text = text:gsub(palatalization, "ʹ")
-- Treat "Ѵ" as the consonant "В" (transliterated "V") in diphthongs that
-- correspond to Ancient Greek "αυ", "ευ" and "ηυ" (equivalent to "аѵ", "еѵ"
-- and "иѵ"). Note that "ιυ" ("іѵ") is not a diphthong, and "ου" ("оѵ") is
-- a long vowel. However, this doesn't apply to "Ѷ", as the diacritic means
-- it must be treated as a vowel.
text = ugsub(text, "(" .. accent .. ")()", handle_v)
local letters = lang_letters or common_letters
-- Convert "ѿ" to "ѡт" if followed by a non-iotated vowel (including those
-- which iotate only after vowels) or a palatalization mark, and "ѡтъ" in
-- all other cases.
text = ugsub(text, "()(" .. accent .. ")()", function(ot, ac, loc)
ot = (ot == "Ѿ" and "Ѡ" or "ѡ") .. ac .. "т"
local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters))
if not umatch(nxt, "^" .. vowel_or_soft) then
ot = ot .. "ъ"
end
return ot
end)
-- Handle any vowels which are iotated at the start of words.
local iotated_initial = lang_iotated_initial or common_iotated_initial
-- Not possible to input iotated_initial directly, as mw.ustring.gsub
-- doesn't respect metamethods...
text = ugsub(text, "%f.", function(m)
return iotated_initial
end)
-- Handle any vowels which are iotated after another vowel or a
-- palatalization mark.
local iotated_after_vowel_or_soft = lang_iotated_after_vowel_or_soft or common_iotated_after_vowel_or_soft
text = ugsub(text, "()(" .. accent .. ")(.)", function(loc, ac, letter)
local iotated = iotated_after_vowel_or_soft
if iotated then
loc = loc - 1
local prev = toNFD((loc == 0 and "" or usub(text, loc, loc)):gsub(".*", letters))
if umatch(prev, vowel_or_soft .. "%W*$") then
return ac .. iotated
end
end
end)
-- Treat "ъі" as "ꙑ", and make "ъ" tense ("ŷ") before "и" or an iotated
-- vowel.
text = ugsub(text, "()(" .. accent .. ")()(?)", function(yer, ac, loc, i)
local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters)):match("^")
if nxt ~= nil then
return (yer == "Ъ" and "Ꙑ" or "ꙑ") .. ((i == "і" or i == "І") and ac or circumflex .. ac .. i)
end
end)
-- In some languages, treat "уо" ("uo") as "у" ("u").
if uo_is_u then
text = ugsub(text, "(" .. accent .. ")", "%1")
end
-- Treat "оу" ("ou") as "у" ("u").
text = ugsub(text, "()(" .. accent .. ")", handle_ou)
-- Substitute any numbers.
for key, repl in pairs(numbers) do
text = ugsub(text, key, repl)
end
-- Main substitution.
text = text:gsub(".*", letters)
-- Handle any breathing marks.
text = ugsub(toNFD(text), "(-)()", handle_breathing)
if umatch(text, "") then
error("Invalid breathing marks in input " .. mw.dumpObject(input))
end
-- Transliterate the titlo and vzmet as colon.
text = ugsub(text, "", ":")
return toNFC(text)
end
return export