Module:sdh-translit

Hello, you have come here looking for the meaning of the word Module:sdh-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:sdh-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:sdh-translit in singular and plural. Everything you need to know about the word Module:sdh-translit you have here. The definition of the word Module:sdh-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:sdh-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate Southern Kurdish language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sdh-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak

local export = {}

local gsub = mw.ustring.gsub
local U = require("Module:string/char")

local mapping = {
	 = "a",  = "b",  = "ç",  = "c",  = "d",  = "e",  = "ê",  = "f",  = "g",
	 = "h",  = "h",  = "ḧ",  = "j",  = "k",  = "ll",  = "l",  = "m",  = "n",
	 = "o",  = "p",  = "q",  = "r",  = "r",  = "s",  = "ş",  = "t",  = "ü",
	 = "v",  = "x",  = "ẍ",  = "z",  = "",  = "'",
	
	 = "", -- ZWNJ (zero-width non-joiner)
	 = "", -- kashida, no sound
	
	-- numerals
     = "1",  = "2",  = "3",  = "4",  = "5",
	 = "6",  = "7",  = "8",  = "9",  = "0",
	-- persian variants to numerals
	 = "1",  = "2",  = "3",  = "4",  = "5",
	 = "6",  = "7",  = "8",  = "9",  = "0",
}

	-- punctuation (leave on separate lines)
local punctuation = {
	 = "?", -- question mark
	 = ",", -- comma
	 = ";", -- semicolon
	 = '“', -- quotation mark
	 = '”', -- quotation mark
	 = "%", -- percent
	 = "‰", -- per mille
	 = ".", -- decimals
	 = ",", -- thousand
}

-- translit
local function tr_word(word)
	
    word = gsub(word, '.', punctuation)
		
	--Remove punctuation at the end of the word.
	local ponct
	if mw.ustring.find(word, '$') then
	   ponct =  mw.ustring.sub(word, -1)
	   word = gsub(word, '$', '')
	else
	   word = word
	   ponct = ''
	end
	
	word = gsub(word, 'ه‌', "ە") --correct unicode for letter ە
	-- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner) → U+06D5 (Arabic letter ae)
	
	-- diacritics
	word = gsub(word, 'ْ', "i") -- U+0652, Arabic sukun
	word = gsub(word, 'ِ', "i") -- U+0650, Arabic kasra
	 
	--managing 'و' and 'ی'
	word = gsub(word, 'و()', "w%1") --و + vowel => w (e.g. wan)
	word = gsub(word, 'ی()', "y%1") --ی + vowel => y (e.g. yas)
	word = gsub(word, '()و', "%1w") --vowel + و => w (e.g. kew)
	word = gsub(word, '()ی', "%1y") --vowel + ی => y (e.g. bey)
	word = gsub(word, '()ۊ', "%1ẅ") --vowel + و => ẅ (e.g. taẅ)
	word = gsub(word, '()ێ', "%1ÿ") --vowel + ێ => ÿ (e.g.  şeÿtan)
	word = gsub(word, '^و$', "û") --non-letter + 'و' + non-letter => û (=and)
	
	word = gsub(word, '()و', "%1w") --non-letter + 'و' => w (e.g. wetar)
	word = gsub(word, '^و', "w") --first 'و' => w (e.g. wetar)
	word = gsub(word, 'یو', "îw") --'ی' + 'و' => îw (e.g. mîwe)
	word = gsub(word, '()یی', "%1îy") --'ی' + 'ی' => îy (e.g. kanîy)
	word = gsub(word, 'وی', "uy") --'و' + 'ی' => uy (e.g. buyn)
	word = gsub(word, 'وو', "û") --'و' + 'و' => û (e.g. nû)
	word = gsub(word, 'ی', "î")
	word = gsub(word, 'و', "u")
	word = gsub(word, 'uu', "û") --'و' + 'و' => û (e.g. nû)
	word = gsub(word, '()ڕ', "%1rr") --when 'ڕ' not at the beginning of a word => rr
	word = gsub(word, '()ئ', "%1'") --when 'ئ' not at the beginning of a word => '
	
	word = gsub(word, '.', mapping)
	
	--insert i where applicable
	word = gsub(word, 'll', "Ľ") -- temporary conversion to avoid seeing ll as 2 letters
	word = gsub(word, 'rr', "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters
	
	word = gsub(word, '()()()()', "%1%2i%3%4") --e.g. grft -> grift
	word = gsub(word, '()()()()$', "%1%2%3i%4") --e.g. cejnt -> cejnit
	word = gsub(word, '()()()', "%1i%2%3") --e.g. wrd -> wird
	
	word = gsub(word, '()()()', "%1i%2%3") --e.g. prd -> pird
	word = gsub(word, '()()$', "%1i%2") --like above
	
	word = gsub(word, '()()()()', "%1%2i%3%4") --repeat the latter expression, in case skipped
	word = gsub(word, '()()()$', "%1%2i%3") --repeat the latter expression, in case skipped

	word = gsub(word, '^()()()', "%1i%2%3") --e.g. ktk -> kitk
	word = gsub(word, '^()()$', "%1i%2") --e.g. ktk -> kitk
	word = gsub(word, '()()()()', "%1%2i%3%4") --e.g. ktk -> kitk
	word = gsub(word, '()()()$', "%1%2i%3") --e.g. ktk -> kitk

	word = gsub(word, '()()$', "%1%2i") --e.g. j -> ji
	word = gsub(word, '^()$', "%1i") --e.g. j -> ji	


	--word = gsub(word, '()()()', "%1%2i%3") --e.g. bra -> bira
	--word = gsub(word, '^()()', "%1i%2") --e.g. bra -> bira

	--word = gsub(word, '()()', "%1i%2") --e.g. aşkra -> aşkira
	
	--word = gsub(word, 'si()', "s%1") -- sp, st cluster
	
	word = gsub(word, 'Ľ', "ll") --revert the temporary conversion
	word = gsub(word, 'Ŕ', "rr") --revert the temporary conversion

    -- Add the punctuation who had previously deleted.
    word = word .. ponct
    
	return word
end

function export.tr(text, lang, sc)
    local textTab = {}
    
    -- Create a word table separated by a space (%s).
	for _, word in ipairs(mw.text.split(text, '%s+')) do
		table.insert(textTab, word)
	end
	
	-- Tablo of translit.
	for key, word in ipairs(textTab) do
		textTab = tr_word(word)
	end

	return table.concat(textTab, ' ')
end

return export