Language code in page name (User:Theknightwho/Cyrs
) not recognized.
local export = {}
local numbers = mw.loadData("Module:Cyrs-translit/numbers")
local ugsub = mw.ustring.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local U = mw.ustring.char
local umatch = mw.ustring.match
local usub = mw.ustring.sub
local ulower = mw.ustring.lower
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x302)
local kamora = U(0x0484)
local titlo = U(0x0483)
local dasia = U(0x0485)
local psili = U(0x0486)
local vzmet = U(0xA66F)
local breathing = psili .. dasia
local accent = "*"
local vowels = "aAeEiIoOuUyY"
local vowel_or_soft = ""
local common_letters = {
= 'A', = 'a',
= 'B', = 'b',
= 'V', = 'v',
= 'G', = 'g',
= 'D', = 'd',
= 'E', = 'e',
= 'Ž', = 'ž',
= 'Dz', = 'dz',
= 'Z', = 'z',
= 'I', = 'i',
= 'I', = 'i', -- Contrastive with "И".
= 'J', = 'j',
= 'Đ', = 'đ',
= 'K', = 'k',
= 'L', = 'l',
= 'M', = 'm',
= 'N', = 'n',
= 'O', = 'o',
= 'P', = 'p',
= 'R', = 'r',
= 'S', = 's',
= 'T', = 't',
= 'U', = 'u',
= 'U', = 'u',
= 'F', = 'f',
= 'X', = 'x',
= 'O', = 'o', -- Contrastive with "О".
= 'Ot', = 'ot', -- Becomes "otŭ" as appropriate.
= 'Ô', = 'ô',
= 'C', = 'c',
= 'Č', = 'č',
= 'Š', = 'š',
= 'Št', = 'št',
= 'Ŭ', = 'ŭ',
= 'Y', = 'y',
= 'Ĭ', = 'ĭ',
= 'Ě', = 'ě',
= 'Jě', = 'jě',
= 'Ja', = 'ja',
= 'Je', = 'je',
= 'Ju', = 'ju',
= 'Ǫ', = 'ǫ',
= 'Jǫ', = 'jǫ',
= 'Ę', = 'ę',
= 'Ję', = 'ję',
= 'Ks', = 'ks',
= 'Ps', = 'ps',
= 'Θ', = 'θ',
= 'Ü', = 'ü',
= 'Ü', = 'ü', -- Contrastive with "Ѵ".
= 'Q', = 'q',
}
local variants = {
= 'в',
= 'Г', = 'г',
= 'д',
= 'Д' .. kamora, = 'д' .. kamora,
= 'Е', = 'е',
= 'Е', = 'е',
= 'Ѕ', = 'ѕ',
= 'Ѕ', = 'ѕ',
= 'З', = 'з',
= 'И', = 'и',
= 'І', = 'і',
= 'І', = 'і',
= 'Л' .. kamora, = 'л' .. kamora,
= 'М' .. kamora, = 'м' .. kamora,
= 'Н' .. kamora, = 'н' .. kamora,
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'О', = 'о',
= 'о',
= 'о',
= 'с',
= 'т',
= 'т',
= 'Ꙋ', = 'ꙋ', = 'ꙋ',
= 'Ѡ', = 'ѡ',
= 'Ц', = 'ц', -- From a merger of Ц and Ч in Old Novgorodian
= 'Ꙑ', = 'ꙑ',
= 'ъ',
= 'ѣ',
= 'Ꙗ', = 'ꙗ',
= 'Ю', = 'ю',
= 'Ѫ', = 'ѫ',
= 'Ѧ', = 'ѧ',
= 'Ѩ', = 'ѩ',
}
local common_iotated_initial = {
= 'Ꙓ', = 'ꙓ',
}
local common_iotated_after_vowel = {
= 'Ѥ', = 'ѥ',
= 'Ꙓ', = 'ꙓ',
= 'Ѩ', = 'ѩ',
}
local lang_letters = {}
local lang_iotated_initial = {}
local lang_iotated_after_vowel = {}
local uo_is_u = {}
-- Old East Slavic
lang_letters = setmetatable({
= 'Šč', = 'šč',
}, {__index = common_letters})
lang_iotated_initial = setmetatable({
= 'Ѥ', = 'ѥ',
= 'Ѩ', = 'ѩ',
}, {__index = common_iotated_initial})
-- Old Novgorodian
lang_letters = setmetatable({
= 'Ć', = 'ć',
= 'Ć', = 'ć',
= 'Ść', = 'ść',
}, {__index = common_letters})
lang_iotated_initial = lang_iotated_initial
uo_is_u = true
-- Old Pskovian
lang_letters = setmetatable({ -- In addition to zle-ono above.
= 'Ź', = 'ź',
= 'Dź', = 'dź',
= 'Ź', = 'ź',
= 'Ś', = 'ś',
= 'Ś', = 'ś',
= 'Šk', = 'šk',
}, {__index = lang_letters})
lang_iotated_initial = setmetatable({ -- In addition to zle-ono above.
= 'Ѭ', = 'ѭ',
}, {__index = lang_iotated_initial})
lang_iotated_after_vowel = setmetatable({
= 'Ѭ', = 'ѭ',
}, {__index = common_iotated_after_vowel})
uo_is_u = true
local function handle_v(prev, v)
return prev .. (v == "Ѵ" and "В" or "в")
end
local function handle_ou(o, ac)
return (ulower(o) == o and "у" or "У") .. ac
end
local function handle_breathing(vowel, br)
-- Don't mark smooth breathing.
if br == psili then
return vowel
end
-- Mark rough breathing with "h".
local vowel_lower = ulower(vowel)
return (vowel_lower == vowel and "h" or "H") .. vowel_lower
end
function export.tr(text, lang, sc)
if not sc then
sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode()
end
if sc ~= "Cyrs" then
return nil
end
local input = text
-- Decompose any acute and grave accents.
text = ugsub(toNFD(text), "+", toNFC)
-- Canonicalize any variants.
text = text:gsub(".*", variants)
-- Transliterate the kamora as prime.
text = text:gsub(kamora, "ʹ")
-- Treat "Ѵ" as the consonant "В" (transliterated "V") in diphthongs that
-- correspond to Ancient Greek "αυ", "ευ" and "ηυ" (equivalent to "аѵ", "еѵ"
-- and "иѵ"). Note that "ιυ" ("іѵ") is not a diphthong, and "ου" ("оѵ") is
-- a long vowel. However, this doesn't apply to "Ѷ", as the diacritic means
-- it must be treated as a vowel.
text = ugsub(text, "(" .. accent .. ")()", handle_v)
local letters = lang_letters or common_letters
-- Convert "ѿ" to "ѡт" if followed by a non-iotated vowel (including those
-- which iotate only after vowels) or a kamora, and "ѡтъ" in all other
-- cases.
text = ugsub(text, "()(" .. accent .. ")()", function(ot, ac, loc)
ot = (ot == "Ѿ" and "Ѡ" or "ѡ") .. ac .. "т"
local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters))
if not umatch(nxt, "^" .. vowel_or_soft) then
ot = ot .. "ъ"
end
return ot
end)
-- Handle any vowels which are iotated at the start of words.
local iotated_initial = lang_iotated_initial or common_iotated_initial
text = ugsub(text, "%f.", function(m)
return iotated_initial -- Can't input iotated_initial directly, as mw.ustring.gsub doesn't respect metamethods...
end)
-- Handle any vowels which are iotated after another vowel or a kamora.
local iotated_after_vowel = lang_iotated_after_vowel or common_iotated_after_vowel
text = ugsub(text, "()(" .. accent .. ")(.)", function(loc, ac, letter)
local iotated = iotated_after_vowel
if iotated then
loc = loc - 1
local prev = toNFD((loc == 0 and "" or usub(text, loc, loc)):gsub(".*", letters))
if umatch(prev, vowel_or_soft .. "%W*$") then
return ac .. iotated
end
end
end)
-- Treat "ъі" as "ꙑ", and make "ъ" tense ("ŷ") before "и" or an iotated vowel.
text = ugsub(text, "()(" .. accent .. ")()(?)", function(yer, ac, loc, i)
local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters)):match("^")
if nxt ~= nil then
return (yer == "Ъ" and "Ꙑ" or "ꙑ") .. (
(i == "і" or i == "І") and ac or
circumflex .. ac .. i
)
end
end)
-- In some languages, treat "уо" ("uo") as "у" ("u").
if uo_is_u then
text = ugsub(text, "(" .. accent .. ")", "%1")
end
-- Treat "оу" ("ou") as "у" ("u").
text = ugsub(text, "()(" .. accent .. ")", handle_ou)
-- Substitute any numbers.
for key, repl in pairs(numbers) do
text = ugsub(text, key, repl)
end
-- Main substitution.
text = text:gsub(".*", letters)
-- Handle any breathing marks.
text = ugsub(toNFD(text), "(-)()", handle_breathing)
if umatch(text, "") then
error("Invalid breathing marks in input " .. mw.dumpObject(input))
end
-- Transliterate the titlo and vzmet as colon.
text = ugsub(text, "", ":")
return toNFC(text)
end
return export