Module:uby-translit

Hello, you have come here looking for the meaning of the word Module:uby-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:uby-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:uby-translit in singular and plural. Everything you need to know about the word Module:uby-translit you have here. The definition of the word Module:uby-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:uby-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate Ubykh language text per WT:UBY TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:uby-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local u = require("Module:string/char")

local export = {}

local GRAVE, ACUTE, CIRC, BREVE, CARON, DOTBELOW = u(0x300), u(0x301), u(0x302), u(0x306), u(0x30C), u(0x323)
local accent = ""

local tt = {
	 = "a",  = "b",  = "v",  = "g",  = "ğ",  = "ğ",  = "d",  = "e",  = "jo",  = "ẑ",  = "z",  = "ź",  = "dz",  = "i",  = "j",  = "kʼ",  = "k",  = "qʼ",  = "q",  = "l",  = "lˢ",  = "m",  = "n",  = "o",  = "pʼ",  = "p",  = "p",  = "r",  = "s",  = "ś",  = "tʼ",  = "t",  = "u",  = "f",  = "x",  = "h",  = "c",  = "cʼ",  = "č",  = "čʼ",  = "ĉ",  = "ĉʼ",  = "ŝ",  = "ə",  = "jʷ",  = "dẑ",  = "ʲ",  = "ʷ",  = "e",  = "ju",  = "ja",  = "ʔ",
	 = "A",  = "B",  = "V",  = "G",  = "Ğ",  = "Ğ",  = "D",  = "E",  = "Jo",  = "Ẑ",  = "Z",  = "Ź",  = "Dz",  = "I",  = "J",  = "Kʼ",  = "K",  = "Qʼ",  = "Q",  = "L",  = "Lˢ",  = "M",  = "N",  = "O",  = "Pʼ",  = "P",  = "P",  = "R",  = "S",  = "Ś",  = "Tʼ",  = "T",  = "U",  = "F",  = "X",  = "H",  = "C",  = "Cʼ",  = "Č",  = "Čʼ",  = "Ĉ",  = "Ĉʼ",  = "Ŝ",  = "Ə",  = "Jʷ",  = "Dẑ",  = "ʲ",  = "ʷ",  = "E",  = "Ju",  = "Ja",  = "Ɂ"
}

local digraphs = {
	 = "ḅ",  = "ṿ",  = "ğ̣",  = "ğ̣",  = "ž",  = "dź",  = "dź",  = "q̣ʼ",  = "q̣",  = "lˢʼ",  = "ṃ",  = "p̣ʼ",  = "p̣",  = "fʼ",  = "x̣",  = "ć",  = "ć",  = "ćʼ",  = "ćʼ",  = "š",  = "dž",
	 = "Ḅ",  = "Ṿ",  = "Ğ̣",  = "Ğ̣",  = "Ž",  = "Dź",  = "Dź",  = "Q̣ʼ",  = "Q̣",  = "Lˢʼ",  = "Ṃ",  = "P̣ʼ",  = "P̣",  = "Fʼ",  = "X̣",  = "Ć",  = "Ć",  = "Ćʼ",  = "Ćʼ",  = "Š",  = "Dž"
}

function export.tr(text, lang, sc)
	local UTF8_char = "*"
	
	-- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", Greek "Ι" or Cyrillic "І"). Lowercase palochka is found in tables above.
	text = mw.ustring.gsub(text, "", "ӏ")
	
	-- Contextual substitution of "w" for "у", "j" for "и" and "j" before "е". NOTE: These break with string.gsub, so must use mw.ustring.gsub.
	text = mw.ustring.gsub(text, "у()", "w%1")
	text = mw.ustring.gsub(text, "У()", "W%1")
	text = mw.ustring.gsub(text, "()у", "%1w")
	text = mw.ustring.gsub(text, "и()", "j%1")
	text = mw.ustring.gsub(text, "И()", "J%1")
	text = mw.ustring.gsub(text, "()и", "%1j")
	text = mw.ustring.gsub(text, "()е", "%1jе")
	
	for digraph, replacement in pairs(digraphs) do
		text = string.gsub(text, digraph, replacement)
	end
	
	text = string.gsub(text, UTF8_char, tt)
	
	-- Reposition apostrophes then decompose.
	text = mw.ustring.toNFD(mw.ustring.gsub(mw.ustring.gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ"))
	
	-- When double letters both have a modifier letter and/or an apostrophe, only show on the second for readability purposes.
	for letter in string.gmatch("abcdefghijklmnopqrstuvxzəʔABCDEFGHIJKLMNOPQRSTUVXZƏɁ", UTF8_char) do
		text = mw.ustring.gsub(text, letter .. "(" .. accent .. "?" .. accent .. "?)(??ʼ?)" .. mw.ustring.lower(letter) .. "%1%2", letter .. "%1" .. mw.ustring.lower(letter) .. "%1%2")
	end
	
	-- Remove consecutive j/ʲ and w/ʷ then recompose.
	return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.gsub(text, "ʲ?()ʲ?", "%1"), "ʷ?()ʷ?", "%1"))
end

return export