Module:ks-Arab-translit

Hello, you have come here looking for the meaning of the word Module:ks-Arab-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:ks-Arab-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:ks-Arab-translit in singular and plural. Everything you need to know about the word Module:ks-Arab-translit you have here. The definition of the word Module:ks-Arab-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:ks-Arab-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate text in the Arabic script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:ks-Arab-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

-- TODO: long í support
-- TODO: sort out short e vs palatalisation
-- TODO: add rule for CẹC = CyaC

local u = require("Module:string/char")
local gsub = mw.ustring.gsub

local export = {}

-- defined below
local invalid_vowel_combination

local vav = 'و'
local ye = 'ی'
local alif = 'ا'
local he = 'ہ'
local docheshm = u(0x06BE)

local re = 'ر'
local nun = 'ن'

local vw_s_cfu = u(0x0650) -- pesh (i)
local vw_s_ccu = u(0x0655) -- hamza below
local vw_s_cbr = u(0x064F) -- zer (u)
local vw_s_mcu = u(0x0654) -- hamza above
local vw_s_ocu = u(0x064E) -- zabar (a)

local vw_l_cbr = u(0x0657) -- inverted zer
local vw_l_cfu = u(0x0656) -- subscript alif

local hat = u(0x065A) -- V
local inverted_hat = u(0x065B) -- inverted V
local hats = hat .. inverted_hat

local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu 

-- carrier + diacritic combos
local long_u = vav .. vw_l_cbr
local short_o = vav .. inverted_hat
local long_i = ye .. vw_l_cfu
local short_ye = ye .. inverted_hat

local vocalised_carrier = long_u .. short_o .. long_i .. short_ye .. 'ێ' .. 'ۆ' .. 'اٟ' .. 'ۄ'
local standalone_carrier = long_u .. short_o .. long_i .. short_ye .. 'ۆ' .. 'ێ' .. 'ۆ' .. 'اٟ' .. 'ۄ'
local palatalisers = "ۍؠ"

local consonants = "بپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄیٲآ"
local consonants_latn = "bptṭsjchxdḍzrċsśzʿġfqkglmnhv"

local conv = {
	-- consonants
	 = 'b',  = 'p',  = 't',  = 'ṭ',  = 's',
	 = 'j',  = 'c',  = 'h',  = 'kh',
	 = 'd',  = 'ḍ',  = 'z',
	 = 'r',  = 'ḍ',  = 'z',  = 'ċ',
	 = 's',  = 'ś',  = 's',  = 'z',
	 = 't',  = 'z',
	 = 'ʿ',  = 'ġ',
	 = 'f',  = 'q',
	 = 'k',  = 'g',
	 = 'l',  = 'm',  = 'n',
	 = 'h',  = 'h',

    -- Why is this separate?
	 = 'kh',
	
	-- always word-final
	 = 'y', 

    -- short e to be treated separately
	
	-- incorrect palatalisation marker
	 = '\'',

	-- broken/open vowels 
    -- confirm if there are other use cases for these two
	 = 'ọ',  = 'ẹ', -- optionally ẹ = ya or used at the end to indicate palatalisation
	
	-- a carries long vowels
	 = 'ạ̄',  = 'ā',

     = 'ī',  = 'ụ̄',

    -- vowels
     = 'o',  = 'e',  = 'ạ',
	
	-- numerals
	 = '0',  = '1',  = '2',  = '3',  = '4',  = '5',  = '6',  = '7',  = '8',  = '9',

}

local short_vowels = {
	-- high vowels
	 = 'i',  = 'ụ',  = 'u',  = 'ụ̄',
	
	-- central vowels
	 = 'ạ',
	
	-- low vowels
	 = 'a',
}

local hardcoded_conv = {
     = 'vạ',
}

local alif = 'ا'
local waw = 'و'
local ye = 'ی'

function export.tr(text, lang, sc)
    if invalid_vowel_combination(text) then
--		require("Module:debug").track("ks-Arab-translit/invalid vowel combination")
		return nil
	end

    text = gsub(text, '(%f)', '\'')

    -- short e at the end of words is ē with V sign
    text = gsub(text, 'ے' .. hat, "e")

    -- ye with inverted hat is /j/
    -- ? always occurs after a consonant
    text = gsub(text, "()" .. ye .. inverted_hat .. "()", "%1y%2")
    text = gsub(text, "()" .. short_ye .. alif, "%1yā")
    text = gsub(text, "()" .. short_ye, "%1y")

    -- nun with hat
    text = gsub(text, nun .. "", nun)

    -- ye with hat = short e 
    -- only when not final?
    text = gsub(text, ye .. "", "e")

    -- re with inverted hat followed by short vowel is re + short vowel
--    text = gsub(text, "()" .. re .. inverted_hat .. "()", "%1r%2")

    -- re with inverted hat is re
    text = gsub(text, re .. inverted_hat, re)

    -- interconsonantal vav is a long ō sound
    text = gsub(text,
        '(' .. docheshm .. '?)' .. vav .. '()',
        "%1ō%2")

    -- intervocalic alif is a long a sound
	text = gsub(text, '()' .. alif .. '()', "%1ā%2")

    -- consonant + short vowel + he disregards the he and transliterates the vowel
    text = gsub(text, '()()' .. he .. '(%s)', "%1%2%3")
    text = gsub(text, '()()' .. he .. '^', "%1%2")

    -- final he + short vowel disregards the he and transliterates the vowel
    -- should return NIL 
    text = gsub(text, he .. '()$', short_vowels)

    -- word-initial alif + vowelled carrier drops the alif
    text = gsub(text, '^' .. alif .. '()', "%1")

    -- word-initial alif + short vowel diacritic drops the alif
    text = gsub(text, '^' .. alif .. '()', "%1")

    -- word-initial alif + vowelled carrier drops the alif
    text = gsub(text, '(%s)' .. alif .. '()', "%1%2")

    -- word-initial alif + short vowel diacritic drops the alif
    text = gsub(text, '(%s)' .. alif .. '()', "%1%2")

    -- re with inverted hat is just re
    text = gsub(text, re .. inverted_hat, re)
	
    -- long /u:/ and /i:/
    text = gsub(text, vav .. vw_s_cbr .. vav .. "()", vav .. "ū%1")
    text = gsub(text, "()" .. vw_s_cfu .. ye .. "()", "%1ī%2")

    -- vav with hat = short o
    text = gsub(text, vav .. "", "o")

    -- vav with short vowel
    text = gsub(text,
        vav .. "()",
        function(c)
            return "v" .. short_vowels
        end)

    -- vav with inverted pish = long u
    text = gsub(text, long_u, "ū")

    -- long i
    text = gsub(text, ye .. vw_l_cfu, 'ī') 

    -- intervocalic ye is a long a sound
    text = gsub(text, '()' .. ye .. '()', "%1ē%2")

    -- word-final alif and ye
    text = gsub(text, '()' .. ye .. '$', "%1ī")
    text = gsub(text, '()' .. alif .. '$', "%1ā")

    -- regard the consonant + short vowel combinations throughout
	text = gsub(text, '.', short_vowels)

	text = gsub(text, '', conv)
	
	-- normal consonants left over
	text = gsub(text, vav, 'v')
	text = gsub(text, 'ہ', 'h')
    text = gsub(text, "ی", "y")

    -- hardcoded consonants left over
    -- Seems to be auto-subbed?
    text = gsub(text, '', hardcoded_conv)

	-- CẹC = CyaC
    text = gsub(text, "()ẹ()", "%1ya%2")
	
	return text
end

function invalid_vowel_combination(text) 
    -- if a vowel carrier or a standalone vowel has another vowel on top it should not
    local orig_text = text
    local count
    text, count = gsub(text, "", "")
    if count > 0 then
        return nil
	end
    return #text == 0
end

return export