-- TODO: long í support
-- TODO: sort out short e vs palatalisation
-- TODO: add rule for CẹC = CyaC
local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local export = {}
local vav = u(0x0648)
local ye = u(0x06CC)
local alif = u(0x0627)
local he = 'ه'
local vw_s_cfu = u(0x0650) -- pesh (i)
local vw_s_ccu = u(0x0655) -- hamza below
local vw_s_cbr = u(0x064F) -- zer (u)
local vw_s_mcu = u(0x0654) -- hamza above
local vw_s_ocu = u(0x064E) -- zabar (a)
local vw_l_cbr = u(0x0657) -- inverted zer
local vw_l_cfu = u(0x0656) -- subscript alif
local hat = u(0x065A)
local inverted_hat = u(0x065B)
local hats = hat .. inverted_hat
local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu
-- carrier + diacritic combos
local long_u = vav .. vw_l_cbr
local short_o = vav .. inverted_hat
local long_i = ye .. vw_l_cfu
local short_e = ye .. inverted_hat
local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. 'ۆ' .. 'ێ' .. 'ۆ'
local palatalisers = "ۍؠ"
local consonants = "بپتٹجچدڈرڑزژسشفکگلمنهھےثحخذصضطظعغقۍۄیٲآ"
local consonants_no_conj = "بپتٹجچدڈرڑزژسشفکگلمنهےثحخذصضطظعغق"
local consonants_no_conj_r = "بپتٹجچدڈڑزژسشفکگلمنهےثحخذصضطظعغق"
local consonants_no_conj_n = "بپتٹجچدڈرڑزژسشفکگلمهےثحخذصضطظعغق"
local consonants_no_conj_r_n = "بپتٹجچدڈڑزژسشفکگلمهےثحخذصضطظعغق"
local conv = {
-- consonants
= 'b', = 'p', = 't', = 'ṭ', = 's',
= 'j', = 'c', = 'h', = 'x',
= 'd', = 'ḍ', = 'z',
= 'r', = 'ṛ', = 'z', = 'ć',
= 's', = 'ś', = 's', = 'z',
= 't', = 'z',
= 'ʿ', = 'ġ',
= 'f', = 'q',
= 'k', = 'g',
= 'l', = 'm', = 'n',
= 'h', = 'h',
-- always word-final
= 'y',
-- incorrect palatalisation marker
= '\'',
-- broken/open vowels
= 'ọ', = 'ẹ', -- optionally ẹ = ya
-- a carries long vowels
= 'ạ̄', = 'ā',
-- short vowels
= 'o', = 'o', = 'e', = 'ạ', = 'ụ',
-- numerals
= '0', = '1', = '2', = '3', = '4', = '5', = '6', = '7', = '8', = '9',
}
local short_vowels = {
-- high vowels
= 'i', = 'ụ', = 'u', = 'ụ̄',
-- central vowels
= 'ạ',
-- low vowels
= 'a',
}
local alif = 'ا'
local waw = 'و'
local ye = 'ی'
-- Devanagari begins here
local p1 = {
= 'ख्',
= 'घ्',
= 'छ्',
= 'छ़्',
= 'झ्',
= 'ठ्',
= 'ढ्',
= 'थ्',
= 'ध्',
= 'फ्',
= 'भ्',
}
local p2 = {
= 'क्',
= 'ग्',
= 'च्',
= 'च़्',
= 'ज्',
= 'ज़्',
= 'ट्',
= 'ड्',
= 'त्',
= 'द्',
= 'न्',
= 'प्',
= 'ब्',
= 'म्',
= 'र्',
= 'य्',
= 'य्',
= 'ल्',
= 'श',
= 'स्',
= 'व्',
= '०',
= '१',
= '२',
= '३',
= '४',
= '५',
= '६',
= '७',
= '८',
= '९',
}
local vowels_non_initial = {
= '',
= 'ा',
= 'ॅ',
= 'ॉ',
= 'ॆ',
= 'े',
= 'ऺ',
= 'ऻ',
= 'ि',
= 'ी',
= 'ॖ',
= 'ॗ',
= 'ु',
= 'ू',
= 'ॊ',
= 'ो',
}
local vowels_initial = {
= 'अ',
= 'आ',
= 'अॅ',
= 'ऑ',
= 'ए\'',
= 'ए',
= 'अ\'',
= 'आ\'',
= 'इ',
= 'ई',
= 'उ\'',
= 'ऊ\'',
= 'उ',
= 'ऊ',
= 'ओ\'',
= 'ओ',
}
function export.tr(text, lang, sc)
text = gsub(text, '(%f)', '\'')
-- interconsonantal vav is a long ō sound
text = gsub(text,
'(ھ?)' .. vav .. '()',
"%1ō%2")
-- intervocalic alif is a long a sound
text = gsub(text, '()' .. alif .. '()', "%1ā%2")
-- long /u:/ and /i:/
text = gsub(text, vav .. vw_s_cbr .. vav .. "()", vav .. "ū%1")
text = gsub(text, "()" .. vw_s_cfu .. ye .. "()", "%1ī%2")
-- vav with hat = short o
text = gsub(text, vav .. hats, "o")
-- vav with short vowel
text = gsub(text,
vav .. "()",
function(c)
return "v" .. short_vowels
end)
-- final he + short vowel disregards the he and transliterates the vowel
text = gsub(text, 'ہ()', short_vowels)
-- word-initial alif + vowelled carrier drops the alif
text = gsub(text, '^' .. alif .. '()', "%1")
-- word-initial alif + short vowel diacritic drops the alif
text = gsub(text, '^' .. alif .. '()', "%1")
-- nun or re with hat
-- TODO: add support for re
text = gsub(text, "ن" .. "()", "n")
-- ye with hat = short e
text = gsub(text, ye .. "()", "e")
-- vav with inverted pish = long u
text = gsub(text, long_u, "ū")
-- intervocalic ye is a long a sound
text = gsub(text, '()' .. ye .. '()', "%1ē%2")
-- word-final alif and ye
text = gsub(text, '()' .. ye .. '$', "%1ī")
text = gsub(text, '()' .. alif .. '$', "%1ā")
-- regard the consonant + short vowel combinations throughout
text = gsub(text, '.', short_vowels)
text = gsub(text, '', conv)
-- normal consonants left over
text = gsub(text, vav, 'v')
text = gsub(text, 'ہ', 'h')
text = gsub(text, "ی", "y")
-- Turn everything into Devanagari
text = gsub(text, ".", p1)
text = gsub(text, ".", p2)
text = gsub(text, "", "")
-- introduce schwa and long /waa/ sound
text = gsub(text, "^", vowels_initial)
text = gsub(text, "aāạạ̄eēoōiīuūụụ̄ọọ̄", vowels_non_initial)
return text
end
return export