Module:User:Sinonquoi/ks-pa-translit-2

Hello, you have come here looking for the meaning of the word Module:User:Sinonquoi/ks-pa-translit-2. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Sinonquoi/ks-pa-translit-2, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Sinonquoi/ks-pa-translit-2 in singular and plural. Everything you need to know about the word Module:User:Sinonquoi/ks-pa-translit-2 you have here. The definition of the word Module:User:Sinonquoi/ks-pa-translit-2 will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Sinonquoi/ks-pa-translit-2, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


-- TODO: long í support
-- TODO: sort out short e vs palatalisation
-- TODO: add rule for CẹC = CyaC

local u = mw.ustring.char -- unicode
local gsub = mw.ustring.gsub -- string manipulation

local export = {}

-- defined below
local invalid_vowel_combination

local vav = 'و'
local ye = 'ی'
local alif = 'ا'
local he = 'ہ'
local docheshm = u(0x06BE)

local re = 'ر'
local nun = 'ن'

local vw_s_cfu = u(0x0650) -- pesh (i)
local vw_s_ccu = u(0x0655) -- hamza below
local vw_s_cbr = u(0x064F) -- zer (u)
local vw_s_mcu = u(0x0654) -- hamza above
local vw_s_ocu = u(0x064E) -- zabar (a)

local vw_l_cbr = u(0x0657) -- inverted zer
local vw_l_cfu = u(0x0656) -- subscript alif

local hat = u(0x065A) -- V
local inverted_hat = u(0x065B) -- inverted V
local hats = hat .. inverted_hat

local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu 

-- carrier + diacritic combos
local long_u = vav .. vw_l_cbr
local short_o = vav .. inverted_hat
local long_i = ye .. vw_l_cfu
local short_ye = ye .. inverted_hat

local vocalised_carrier = long_u .. short_o .. long_i .. short_ye .. 'ێ' .. 'ۆ' .. 'اٟ' .. 'ۄ'
local standalone_carrier = long_u .. short_o .. long_i .. short_ye .. 'ۆ' .. 'ێ' .. 'ۆ' .. 'اٟ' .. 'ۄ'
local palatalisers = "ۍؠ"

local consonants = "بپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄیٲآ"
local consonants_latn = "bptṭsjchxdḍzrċsśzʿġfqkglmnhv"

local conv = {
	-- consonants
	 = 'b',  = 'p',  = 't',  = 'ṭ',  = 's',
	 = 'j',  = 'c',  = 'h',  = 'kh',
	 = 'd',  = 'ḍ',  = 'z',
	 = 'r',  = 'ḍ',  = 'z',  = 'ċ',
	 = 's',  = 'ś',  = 's',  = 'z',
	 = 't',  = 'z',
	 = 'ʿ',  = 'ġ',
	 = 'f',  = 'q',
	 = 'k',  = 'g',
	 = 'l',  = 'm',  = 'n',
	 = 'h',  = 'h',

    -- Why is this separate?
	 = 'kh',
	
	-- always word-final
	 = 'y', 

    -- short e to be treated separately
	
	-- incorrect palatalisation marker
	 = '\'',

	-- broken/open vowels 
    -- confirm if there are other use cases for these two
	 = 'ọ',  = 'ẹ', -- optionally ẹ = ya or used at the end to indicate palatalisation
	
	-- a carries long vowels
	 = 'ạ̄',  = 'ā',

     = 'ī',  = 'ụ̄',

    -- vowels
     = 'o',  = 'e',  = 'ạ',
	
	-- numerals
	 = '0',  = '1',  = '2',  = '3',  = '4',  = '5',  = '6',  = '7',  = '8',  = '9',

}

local short_vowels = {
	-- high vowels
	 = 'i',  = 'ụ',  = 'u',  = 'ụ̄',
	
	-- central vowels
	 = 'ạ',
	
	-- low vowels
	 = 'a',
}

local hardcoded_conv = {
     = 'vạ',
}

local alif = 'ا'
local waw = 'و'
local ye = 'ی'

function export.transliterate(text)
    if invalid_vowel_combination(text) then
--		require("Module:debug").track("ks-Arab-translit/invalid vowel combination")
		return nil
	end

    text = gsub(text, '(%f)', '\'')

    -- short e at the end of words is ē with V sign
    text = gsub(text, 'ے' .. hat, "e")

    -- ye with inverted hat is /j/
    -- ? always occurs after a consonant
    text = gsub(text, "()" .. ye .. inverted_hat .. "()", "%1y%2")
    text = gsub(text, "()" .. short_ye .. alif, "%1yā")
    text = gsub(text, "()" .. short_ye, "%1y")

    -- nun with hat
    text = gsub(text, nun .. "", nun)

    -- ye with hat = short e 
    -- only when not final?
    text = gsub(text, ye .. "", "e")

    -- re with inverted hat followed by short vowel is re + short vowel
--    text = gsub(text, "()" .. re .. inverted_hat .. "()", "%1r%2")

    -- re with inverted hat is re
    text = gsub(text, re .. inverted_hat, re)

    -- interconsonantal vav is a long ō sound
    text = gsub(text,
        '(' .. docheshm .. '?)' .. vav .. '()',
        "%1ō%2")

    -- intervocalic alif is a long a sound
	text = gsub(text, '()' .. alif .. '()', "%1ā%2")

    -- consonant + short vowel + he disregards the he and transliterates the vowel
    text = gsub(text, '()()' .. he .. '(%s)', "%1%2%3")
    text = gsub(text, '()()' .. he .. '^', "%1%2")

    -- final he + short vowel disregards the he and transliterates the vowel
    -- should return NIL 
    text = gsub(text, he .. '()$', short_vowels)

    -- word-initial alif + vowelled carrier drops the alif
    text = gsub(text, '^' .. alif .. '()', "%1")

    -- word-initial alif + short vowel diacritic drops the alif
    text = gsub(text, '^' .. alif .. '()', "%1")

    -- word-initial alif + vowelled carrier drops the alif
    text = gsub(text, '(%s)' .. alif .. '()', "%1%2")

    -- word-initial alif + short vowel diacritic drops the alif
    text = gsub(text, '(%s)' .. alif .. '()', "%1%2")

    -- re with inverted hat is just re
    text = gsub(text, re .. inverted_hat, re)
	
    -- long /u:/ and /i:/
    text = gsub(text, vav .. vw_s_cbr .. vav .. "()", vav .. "ū%1")
    text = gsub(text, "()" .. vw_s_cfu .. ye .. "()", "%1ī%2")

    -- vav with hat = short o
    text = gsub(text, vav .. "", "o")

    -- vav with short vowel
    text = gsub(text,
        vav .. "()",
        function(c)
            return "v" .. short_vowels
        end)

    -- vav with inverted pish = long u
    text = gsub(text, long_u, "ū")

    -- long i
    text = gsub(text, ye .. vw_l_cfu, 'ī') 

    -- intervocalic ye is a long a sound
    text = gsub(text, '()' .. ye .. '()', "%1ē%2")

    -- word-final alif and ye
    text = gsub(text, '()' .. ye .. '$', "%1ī")
    text = gsub(text, '()' .. alif .. '$', "%1ā")

    -- regard the consonant + short vowel combinations throughout
	text = gsub(text, '.', short_vowels)

	text = gsub(text, '', conv)
	
	-- normal consonants left over
	text = gsub(text, vav, 'v')
	text = gsub(text, 'ہ', 'h')
    text = gsub(text, "ی", "y")

    -- hardcoded consonants left over
    -- Seems to be auto-subbed?
    text = gsub(text, '', hardcoded_conv)

	-- CẹC = CyaC
    text = gsub(text, "()ẹ()", "%1ya%2")
	
	return text
end

function invalid_vowel_combination(text) 
    -- if a vowel carrier or a standalone vowel has another vowel on top it should not
    local orig_text = text
    local count
    text, count = gsub(text, "", "")
    if count > 0 then
        return nil
	end
    return #text == 0
end

return export