local u = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}
local vav = u(0x0648)
local ye = u(0x06CC)
local alif = u(0x0627)
local he = 'ه'
local vw_s_cfu = u(0x0650) -- pesh (i)
local vw_s_ccu = u(0x0655) -- hamza below
local vw_s_cbr = u(0x064F) -- zer (u)
local vw_s_mcu = u(0x0654) -- hamza above
local vw_s_ocu = u(0x064E) -- zabar (a)
local vw_l_cbr = u(0x0657) -- inverted zer
local vw_l_cfu = u(0x0656) -- subscript alif
local hat = u(0x065A)
local inverted_hat = u(0x065B)
local hats = hat .. inverted_hat
local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu
-- carrier + diacritic combos
local long_u = vav .. vw_l_cbr
local short_o = vav .. inverted_hat
local long_i = ye .. vw_l_cfu
local short_e = ye .. inverted_hat
local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. 'و' .. 'و' .. 'ی' .. 'ے'
local consonants_1 = "ببپتثجچحخدذرزژسشصضطظعغفقکگلمنڻوهىھٹڈڑ"
local consonants_2 = "ببھتھڈھجھدھٹھکھڑھ"
local vowels = "اِیاُؤآے"
local consonants = gsub(consonants_1, consonants_2, "")
local conv = {
--consonants
= 'b', = 'p', = 't', = 's', = 'j',
= 'ch', = 'h', = 'kḥ', = 'd', = 'z',
= 'r', = 'z', = 'z', = 's', = 'sh',
= 's', = 'z', = 't', = 'z', = 'a',
= 'ġ', = 'f', = 'q', = 'k', = 'g',
= 'l', = 'm', = 'n', = 'ṇ', = 'v',
= 'h', = 'y', = 'h', = 'ṭ', = 'ḍ',
= 'ṛ',
--aspirated consonants
= 'bh', = 'th', = 'ḍh', = 'jh',
= 'dh', = 'ṭh', = 'kh', = 'ṛh',
= 'gh',
-- digits
= '0', = '1', = '2', = '3', = '4',
= '5', = '6', = '7', = '8', = '9',
}
local nasal_assim = {
h?'] = 'ṅ',
h?'] = 'ñ',
h?'] = 'ṇ',
h?'] = 'n',
h?'] = 'm',
= 'n',
= 'm',
= 'n',
}
local short_vowels = {
-- independent vowels
= 'ā', -- alif ا
= 'ā', -- alif with madda آ
= 'e', -- ye ے
= 'ī' ,
= 'u', -- alif with damma
= 'ē',
= 'au', -- waw with hamza ؤ
--vowels
= 'a', -- zabar َ◌
= 'ā', -- khari zabar ◌ٰ
= 'i', -- zer ◌ِ
= 'u', -- pesh ُ◌
-- other diacritics
= 'N', -- noon ghunna ں
= 'i', -- hamza ء
= 'ūn', -- sukun ◌ْ
= 'ː', -- shad ◌ّ
= 'n', -- do zabar ◌ً
= 'ni', -- do zer ◌ٍ
}
local alif = 'ا'
local waw = 'و'
local ye = 'ی'
local noon = u(0x06BA)
local shadda = u(0x0651)
function export.tr(text, lang, sc)
text = gsub(text,
'وا' .. '()',
"vā%1")
-- interconsonantal vav is a long ō sound
text = gsub(text,
'(ھ?)' .. vav .. '()',
"%1ū%2")
-- intervocalic alif is a long a sound
text = gsub(text, '()' .. alif .. '()', "%1ā%2")
-- final he + short vowel disregards the he and transliterates the vowel
text = gsub(text, 'ہ()', short_vowels)
-- word-initial alif + vowelled carrier drops the alif
text = gsub(text, '^' .. alif .. '()', "%1")
-- word-initial alif + short vowel diacritic drops the alif
text = gsub(text, '^' .. alif .. '()', "%1")
-- long /u:/ and /i:/
text = gsub(text, vav .. vw_s_cbr .. vav .. "()", vav .. "ū%1")
text = gsub(text, "()" .. vw_s_cfu .. ye .. "()", "%1ī%2")
-- vav with hat = short o
text = gsub(text, vav .. "", "o")
-- vav with short vowel
text = gsub(text,
vav .. "()",
function(c)
return "v" .. short_vowels
end)
-- nun or re with hat
-- TODO: add support for re
text = gsub(text, "ن" .. "", "n")
-- ye with hat = short e
text = gsub(text, ye .. "", "e")
-- vav with inverted pish = long u
text = gsub(text, long_u, "ū")
-- long i
text = gsub(text, ye .. vw_l_cfu, 'ī')
-- intervocalic ye is a long a sound
text = gsub(text, '()' .. ye .. '()', "%1ae%2")
-- word-final alif and ye
text = gsub(text, '()' .. ye .. '$', "%1ī")
text = gsub(text, '()' .. alif .. '$', "%1ā")
-- regard the consonant + short vowel combinations throughout
text = gsub(text, '.', short_vowels)
text = gsub(text, 'ھ', conv)
text = gsub(text, '', conv)
-- normal consonants left over
text = gsub(text, vav, 'v')
text = gsub(text, 'ہ', 'h')
text = gsub(text, "ی", "y")
for key,val in pairs(nasal_assim) do
text = mw.ustring.gsub(text,"N("..key..")",val.."%1")
end
text = gsub(text,"()N ", "%1̃ ")
text = gsub(text,"(.?)N", "%1̃")
text = gsub(text,"ː(.)","%1%1")
text = gsub(text," ?।",".")
text = gsub(text," $","")
return text
end
return export