Module:User:Theknightwho/Cyrs-translit

The following documentation is generated by Module:documentation/functions/translit.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Language code in page name (User:Theknightwho/Cyrs) not recognized.
local export = {}

local numbers = mw.loadData("Module:Cyrs-translit/numbers")

local ugsub = mw.ustring.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local U = mw.ustring.char
local umatch = mw.ustring.match
local usub = mw.ustring.sub
local ulower = mw.ustring.lower

local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x302)
local kamora = U(0x0484)
local titlo = U(0x0483)
local dasia = U(0x0485)
local psili = U(0x0486)
local vzmet = U(0xA66F)

local breathing = psili .. dasia
local accent = "*"
local vowels = "aAeEiIoOuUyY"
local vowel_or_soft = ""

local common_letters = {
	 = 'A',  = 'a',
	 = 'B',  = 'b',
	 = 'V',  = 'v',
	 = 'G',  = 'g',
	 = 'D',  = 'd',
	 = 'E',  = 'e',
	 = 'Ž',  = 'ž',
	 = 'Dz',  = 'dz',
	 = 'Z',  = 'z',
	 = 'I',  = 'i',
	 = 'I',  = 'i', -- Contrastive with "И".
	 = 'J',  = 'j',
	 = 'Đ',  = 'đ',
	 = 'K',  = 'k',
	 = 'L',  = 'l',
	 = 'M',  = 'm',
	 = 'N',  = 'n',
	 = 'O',  = 'o',
	 = 'P',  = 'p',
	 = 'R',  = 'r',
	 = 'S',  = 's',
	 = 'T',  = 't',
	 = 'U',  = 'u',
	 = 'U',  = 'u',
	 = 'F',  = 'f',
	 = 'X',  = 'x',
	 = 'O',  = 'o', -- Contrastive with "О".
	 = 'Ot',  = 'ot', -- Becomes "otŭ" as appropriate.
	 = 'Ô',  = 'ô',
	 = 'C',  = 'c',
	 = 'Č',  = 'č',
	 = 'Š',  = 'š',
	 = 'Št',  = 'št',
	 = 'Ŭ',  = 'ŭ',
	 = 'Y',  = 'y',
	 = 'Ĭ',  = 'ĭ',
	 = 'Ě',  = 'ě',
	 = 'Jě',  = 'jě',
	 = 'Ja',  = 'ja',
	 = 'Je',  = 'je',
	 = 'Ju',  = 'ju',
	 = 'Ǫ',  = 'ǫ',
	 = 'Jǫ',  = 'jǫ',
	 = 'Ę',  = 'ę',
	 = 'Ję',  = 'ję',
	 = 'Ks',  = 'ks',
	 = 'Ps',  = 'ps',
	 = 'Θ',  = 'θ',
	 = 'Ü',  = 'ü',
	 = 'Ü',  = 'ü', -- Contrastive with "Ѵ".
	 = 'Q',  = 'q',
}

local variants = {
	 = 'в',
	 = 'Г',  = 'г',
	 = 'д',
	 = 'Д' .. kamora,  = 'д' .. kamora,
	 = 'Е',  = 'е',
	 = 'Е',  = 'е',
	 = 'Ѕ',  = 'ѕ',
	 = 'Ѕ',  = 'ѕ',
	 = 'З',  = 'з',
	 = 'И',  = 'и',
	 = 'І',  = 'і',
	 = 'І',  = 'і',
	 = 'Л' .. kamora,  = 'л' .. kamora,
	 = 'М' .. kamora,  = 'м' .. kamora,
	 = 'Н' .. kamora,  = 'н' .. kamora,
	 = 'О',  = 'о',
	 = 'О',  = 'о',
	 = 'О',  = 'о',
	 = 'О',  = 'о',
	 = 'О',  = 'о',
	 = 'О',  = 'о',
	 = 'о',
	 = 'о',
	 = 'с',
	 = 'т',
	 = 'т',
	 = 'Ꙋ',  = 'ꙋ',  = 'ꙋ',
	 = 'Ѡ',  = 'ѡ',
	 = 'Ц',  = 'ц', -- From a merger of Ц and Ч in Old Novgorodian
	 = 'Ꙑ',  = 'ꙑ',
	 = 'ъ',
	 = 'ѣ',
	 = 'Ꙗ',  = 'ꙗ',
	 = 'Ю',  = 'ю',
	 = 'Ѫ',  = 'ѫ',
	 = 'Ѧ',  = 'ѧ',
	 = 'Ѩ',  = 'ѩ',
}

local common_iotated_initial = {
	 = 'Ꙓ',  = 'ꙓ',
}

local common_iotated_after_vowel = {
	 = 'Ѥ',  = 'ѥ',
	 = 'Ꙓ',  = 'ꙓ',
	 = 'Ѩ',  = 'ѩ',
}

local lang_letters = {}
local lang_iotated_initial = {}
local lang_iotated_after_vowel = {}
local uo_is_u = {}

-- Old East Slavic
lang_letters = setmetatable({
	 = 'Šč',  = 'šč',
}, {__index = common_letters})

lang_iotated_initial = setmetatable({
	 = 'Ѥ',  = 'ѥ',
	 = 'Ѩ',  = 'ѩ',
}, {__index = common_iotated_initial})

-- Old Novgorodian
lang_letters = setmetatable({
	 = 'Ć',  = 'ć',
	 = 'Ć',  = 'ć',
	 = 'Ść',  = 'ść',
}, {__index = common_letters})

lang_iotated_initial = lang_iotated_initial
uo_is_u = true

-- Old Pskovian
lang_letters = setmetatable({ -- In addition to zle-ono above.
	 = 'Ź',  = 'ź',
	 = 'Dź',  = 'dź',
	 = 'Ź',  = 'ź',
	 = 'Ś',  = 'ś',
	 = 'Ś',  = 'ś',
	 = 'Šk',  = 'šk',
}, {__index = lang_letters})

lang_iotated_initial = setmetatable({ -- In addition to zle-ono above.
	 = 'Ѭ',  = 'ѭ',
}, {__index = lang_iotated_initial})

lang_iotated_after_vowel = setmetatable({
	 = 'Ѭ',  = 'ѭ',
}, {__index = common_iotated_after_vowel})

uo_is_u = true

local function handle_v(prev, v)
	return prev .. (v == "Ѵ" and "В" or "в")
end

local function handle_ou(o, ac)
	return (ulower(o) == o and "у" or "У") .. ac
end

local function handle_breathing(vowel, br)
	-- Don't mark smooth breathing.
	if br == psili then
		return vowel
	end
	-- Mark rough breathing with "h".
	local vowel_lower = ulower(vowel)
	return (vowel_lower == vowel and "h" or "H") .. vowel_lower
end

function export.tr(text, lang, sc)
	if not sc then
		sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode()
	end
	if sc ~= "Cyrs" then
		return nil
	end
	
	local input = text
	
	-- Decompose any acute and grave accents.
	text = ugsub(toNFD(text), "+", toNFC)
	
	-- Canonicalize any variants.
	text = text:gsub(".*", variants)

	-- Transliterate the kamora as prime.
	text = text:gsub(kamora, "ʹ")
	
	-- Treat "Ѵ" as the consonant "В" (transliterated "V") in diphthongs that
	-- correspond to Ancient Greek "αυ", "ευ" and "ηυ" (equivalent to "аѵ", "еѵ"
	-- and "иѵ").  Note that "ιυ" ("іѵ") is not a diphthong, and "ου" ("оѵ") is
	-- a long vowel. However, this doesn't apply to "Ѷ", as the diacritic means
	-- it must be treated as a vowel.
	text = ugsub(text, "(" .. accent .. ")()", handle_v)
	
	local letters = lang_letters or common_letters
	
	-- Convert "ѿ" to "ѡт" if followed by a non-iotated vowel (including those
	-- which iotate only after vowels) or a kamora, and "ѡтъ" in all other
	-- cases.
	text = ugsub(text, "()(" .. accent .. ")()", function(ot, ac, loc)
		ot = (ot == "Ѿ" and "Ѡ" or "ѡ") .. ac .. "т"
		local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters))
		if not umatch(nxt, "^" .. vowel_or_soft) then
			ot = ot .. "ъ"
		end
		return ot
	end)
	
	-- Handle any vowels which are iotated at the start of words.
	local iotated_initial = lang_iotated_initial or common_iotated_initial
	text = ugsub(text, "%f.", function(m)
		return iotated_initial -- Can't input iotated_initial directly, as mw.ustring.gsub doesn't respect metamethods...
	end)
	
	-- Handle any vowels which are iotated after another vowel or a kamora.
	local iotated_after_vowel = lang_iotated_after_vowel or common_iotated_after_vowel
	text = ugsub(text, "()(" .. accent .. ")(.)", function(loc, ac, letter)
		local iotated = iotated_after_vowel
		if iotated then
			loc = loc - 1
			local prev = toNFD((loc == 0 and "" or usub(text, loc, loc)):gsub(".*", letters))
			if umatch(prev, vowel_or_soft .. "%W*$") then
				return ac .. iotated
			end
		end
	end)
	
	-- Treat "ъі" as "ꙑ", and make "ъ" tense ("ŷ") before "и" or an iotated vowel.
	text = ugsub(text, "()(" .. accent .. ")()(?)", function(yer, ac, loc, i)
		local nxt = toNFD(usub(text, loc, loc):gsub(".*", letters)):match("^")
		if nxt ~= nil then
			return (yer == "Ъ" and "Ꙑ" or "ꙑ") .. (
				(i == "і" or i == "І") and ac or
				circumflex .. ac .. i
			)
		end
	end)
	
	-- In some languages, treat "уо" ("uo") as "у" ("u").
	if uo_is_u then
		text = ugsub(text, "(" .. accent .. ")", "%1")
	end
	
	-- Treat "оу" ("ou") as "у" ("u").
	text = ugsub(text, "()(" .. accent .. ")", handle_ou)
	
	-- Substitute any numbers.
	for key, repl in pairs(numbers) do
		text = ugsub(text, key, repl)
	end

	-- Main substitution.
	text = text:gsub(".*", letters)
	
	-- Handle any breathing marks.
	text = ugsub(toNFD(text), "(-)()", handle_breathing)
	
	if umatch(text, "") then
		error("Invalid breathing marks in input " .. mw.dumpObject(input))
	end
	
	-- Transliterate the titlo and vzmet as colon.
	text = ugsub(text, "", ":")

	return toNFC(text)
end

return export
Module:User:Theknightwho/Cyrs-translit

Wikious

Boobota

Sagapedia