Module:cau-nec-translit

Hello, you have come here looking for the meaning of the word Module:cau-nec-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:cau-nec-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:cau-nec-translit in singular and plural. Everything you need to know about the word Module:cau-nec-translit you have here. The definition of the word Module:cau-nec-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:cau-nec-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate text in one of the Northeast Caucasian languages. It is also used to transliterate Aghul, Akhvakh, Andi, Archi, Avar, Budukh, Botlikh, Chechen, Chamalal, Ingush, Bezhta, and Bagvalal. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:cau-nec-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local upper = m_str_utils.upper

local CyrlConsonant = "бвгджзклмнпрстфхцчшщБВГДЖЗКЛМНПРСТФХЦЧШЩ"
local CyrlVowel = "аеёиоуыэюяАЕЁИОУЫЭЮЯ"
local ACUTE, CIRC, TILDE, MACRON, BREVE, DOTABOVE, DIAER, CARON, DOTBELOW = u(0x301), u(0x302), u(0x303), u(0x304), u(0x306), u(0x307), u(0x308), u(0x30C), u(0x323)
local accent = ""
local br = u(0xF000)

local export = {}

-- Structured like this to reduce size of loaded table.
local function getSubs(lang)
	--Aghul
	if lang == "agx" then
		return {
			{
				 = "ğ",  = "h",  = "ʻʳ",  = "qq",  = "qʼ",  = "kʼ",  = "pʼ",  = "tʼ",  = "q",  = "x̂",  = "ḥʳ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Akhvakh
	elseif lang == "akv" then
		return {
			{
				 = "ğʰ",  = "qˣʼ",  = "kˡʼ",  = "ᵏl",  = "qˣ"
			},
			{
				 = "ɣ",  = "h",  = "ʻʳ",  = "qxʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "ĺ",  = "ᵏll",  = "pʼ",  = "tʼ",  = "qx",  = "x̂",  = "ḥʳ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Andi
	elseif lang == "ani" then
		return {
			{
				 = "qxʼ",  = "qx"
			},
			{
				 = "ğʼ",  = "žʼ",  = "qxʼ",  = "llˢʼ",  = "qx",  = "ccʼ",  = "cčʼ"
			},
			{
				 = "ğ",  = "h",  = "gʼ",  = "qˣʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "lˢʼ",  = "ᵏll",  = "pʼ",  = "tʼ",  = "qˣ",  = "x̂",  = "xʼ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ˀ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Archi
	elseif lang == "aqc" then
		return {
			{
				 = "qq̣ʼ"
			},
			{
				 = "ğ̣",  = "qqʼ",  = "q̣ʼ",  = "q̣",  = "x̣"
			},
			{
				 = "ğ",  = "h",  = "ˀ",  = "qʼ",  = "kˡʼ",  = "kʼ",  = "lʰ",  = "lˠ",  = "ᵏl",  = "pʼ",  = "tʼ",  = "q",  = "ḥʳ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "w",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "",  = "e",  = "ju",  = "ja"
			}
		}
	-- Avar
	elseif lang == "av" then
		return {
			{
				 = "ğ",  = "h",  = "ʻ",  = "qxʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "ᵏll",  = "tʼ",  = "qx",  = "x̂",  = "ḥʳ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "w",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Bagvalal
	elseif lang == "kva" then
		return {
			{
				 = "ğ",  = "h",  = "ˀ",  = "qʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "ᵏll",  = "sʼ",  = "tʼ",  = "qx",  = "x̂",  = "ḥ",  = "cʼ",  = "čʼ",  = "šʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Bezhta
	elseif lang == "kap" then
		return {
			{
				 = "ğ",  = "h",  = "ʻ",  = "qxʼ",  = "kˡʼ",  = "kʼ",  = "lˢ",  = "ᵏll",  = "pʼ",  = "tʼ",  = "qx",  = "x̂",  = "ḥ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Botlikh
	elseif lang == "bph" then
		return {
			{
				 = "ğ",  = "h",  = "qˣʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "ᵏll",  = "pʼ",  = "tʼ",  = "qˣ",  = "x̂",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "w",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ˀ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Budukh
	elseif lang == "bdk" then
		return {
			{
				 = "gʰ"
			},
			{
				 = "ğ",  = "h",  = "ʻ",  = "qq",  = "qʼ",  = "kʼ",  = "pʼ",  = "tʼ",  = "q",  = "x̂",  = "ḥ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ı",  = "ʲ",  = "e",  = "ju",  = "ja",  = "ˀ"
			}
		}
	-- Chamalal
	elseif lang == "cji" then
		return {
			{
				 = "kxʰʼ"
			},
			{
				 = "ğ",  = "h",  = "ʻ",  = "qxʼ",  = "kkˡʼ",  = "kʼ",  = "lˢ",  = "ᵏll",  = "pʼ",  = "sʼ",  = "tʼ",  = "qx",  = "x̂",  = "ḥ",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šš",  = "ʔ",  = "ə",  = "ʲ",  = "e",  = "ju",  = "ja"
			}
		}
	-- Chechen and Ingush
	elseif lang == "ce" or lang == "inh" then
		return {
			{
				 = "qq",  = "rh"
			},
			{
				 = "ğ",  = "q",  = "qʼ",  = "kʼ",  = "pʼ",  = "tʼ",  = "ḥʳ",  = "h",  = "cʼ",  = "čʼ"
			},
			{
				 = "a",  = "b",  = "v",  = "g",  = "d",  = "e",  = "jo",  = "ž",  = "z",  = "ı̇",  = "j",  = "k",  = "l",  = "m",  = "n",  = "o",  = "p",  = "r",  = "s",  = "t",  = "u",  = "f",  = "x",  = "c",  = "č",  = "š",  = "šč",  = "ʔ",  = "y",  = "ʲ",  = "e",  = "ju",  = "ja",  = "ˀ"
			}
		}
	end
end

local function double_with_j(vowel, acute, nasal)
	local ret = vowel .. (nasal ~= "" and TILDE or nasal)
	return ret .. acute .. lower(ret)
end

function export.tr(text, lang, sc)
	local subs = getSubs(lang)
	
	if not subs then
		return nil
	end
	
	-- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", Greek "Ι" or Cyrillic "І"). Lowercase palochka is found in tables above.
	text = gsub(text, "", "ӏ")
	-- Convert dialectal nasal ᵸ written as Latin ᴴ.
	text = gsub(text, "ᴴ", "ᵸ")
	
	-- Decompose precomposed characters, except for ё and й.
	text = gsub(text, "", toNFD)
	
	-- Substitute double consonants for macrons over consonants. Add a temporary breaking character after, to prevent the creation of false multigraphs with following characters.
	local function macronToDouble(a, b) return a .. b .. lower(a) .. b .. br end
	text = gsub(text, "()" .. MACRON .. "()" .. MACRON, macronToDouble)
	text = gsub(text, "()" .. MACRON .. "(ӏ)" .. MACRON, macronToDouble)
	text = gsub(text, "()" .. MACRON, macronToDouble)
	
	-- Remove any double hard/soft signs or palochkas this creates.
	text = gsub(text, "()" .. "()", function(a, b) if b == lower(a) then return a else return a .. b end end)
	
	-- Contextual substitution of "j" before "е", "w" for "у" and ʷ for "в".
	if lang == "aqc" then
		text = gsub(text, "(" .. br .. "??" .. br .. "?)в", "%1ʷ")
	else
		text = gsub(gsub(text, "^е", "jе"), "^Е", "Jе")
		text = gsub(text, "(" .. MACRON .. "?ь?ӏ?ᵸ?)е", "%1jе")
		text = gsub(text, "()Е", "%1Jе")
		text = gsub(text, "у()", "w%1")
		text = gsub(text, "У()", "W%1")
		text = gsub(text, "(" .. MACRON .. "?ь?ӏ?ᵸ?)у", "%1w")
		text = gsub(text, "(" .. br .. "?)в", "%1ʷ")
	end
	
	-- Add "j" before iotated vowels, and substitute non-iotated equivalents.
	text = gsub(gsub(text, "ё", "jо"), "Ё", "Jо")
	text = gsub(gsub(text, "ю", "jу"), "Ю", "Jу")
	text = gsub(gsub(text, "я", "jа"), "Я", "Jа")
	
	-- Process vowel modifiers.
	text = gsub(text, "()(" .. MACRON .. "?)(" .. ACUTE .. "?)(ь?)(" .. MACRON .. "?)(ӏ?)(ᵸ?)", function(vowel, macron1, acute, soft, macron2, palochka, nasal)
		local ret = vowel ..
			(soft ~= "" and DIAER or soft) ..
			(palochka ~= "" and lang == "aqc" and DOTBELOW or "") ..
			(nasal ~= "" and TILDE or nasal)
		if macron1 ~= "" then
			ret = ret .. acute .. lower(ret)
		else
			ret = ret .. acute
		end
		return ret .. (lang ~= "aqc" and palochka or "")
	end)
	
	if lang == "ce" or lang == "inh" then
		text = gsub(text, "()(" .. ACUTE .. "?" .. ")й(ᵸ?)", double_with_j)
		text = gsub(text, "(" .. DIAER .. ")(" .. ACUTE .. "?" .. ")й(ᵸ?)", double_with_j)
	end
	
	-- Apply language-specific substitutions by iterating over each subtable. For each one, create a temporary table that stores each substitution in lowercase and uppercase variants. Then, iterate over all substitutions.
	for _,i in ipairs(subs) do
		local t = {}
		-- Create a temporary table, then iterate over all of them.
		for k, v in pairs(i) do
			t = v
			if v == "ʔ" then
				t = gsub(v, "^.", "Ɂ")
			else
				t = gsub(v, "^.", upper)
			end
		end
		for letter, replacement in pairs(t) do
			text = text:gsub(letter, replacement)
		end
	end
	
	-- Reposition apostrophes, remove temporary breaking characters, then decompose.
	text = toNFD(gsub(gsub(gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ"), br, ""))
	
	-- When double letters both have a modifier letter and/or an apostrophe, only show on the first or second for readability purposes.
	for letter in string.gmatch("abcdefghijklmnopqrstuvwxyzəɣıʔABCDEFGHIJKLMNOPQRSTUVWXYZƏƔɁʻˀ", ".*") do
		text = gsub(text, "(ᵏ?)" .. letter .. "(" .. accent .. "?" .. accent .. "?" .. accent .. "?)(??ʼ?)" .. "%1" .. lower(letter) .. "%2%3", "%1" .. letter .. "%2" .. lower(letter) .. "%2%3")
	end
	
	-- Remove consecutive j/ʲ and w/ʷ.
	text = gsub(gsub(text, "ʲ?()ʲ?", "%1"), "ʷ?()ʷ?", "%1")
	
	-- Substitute i for dotted dotless i if not followed by an acute or tilde, then recompose.
	return toNFC(gsub(gsub(text, "ı" .. "(" .. DOTBELOW .. "?)" .. DOTABOVE .. "()", "i%1%2"), "ı" .. "(" .. DOTBELOW .. "?)" .. DOTABOVE .. "$", "i%1"))
end

return export