-- Transliteration for Hindi (possibly other languages using Devanagari script, except for Sanskrit)
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
= 'k', = 'kh', = 'g', = 'gh', = 'ṅ',
= 'c', = 'ch', = 'j', = 'jh', = 'ñ',
= 'ṭ', = 'ṭh', = 'ḍ', = 'ḍh', = 'ṇ',
= 't', = 'th', = 'd', = 'dh', = 'n',
= 'p', = 'ph', = 'b', = 'bh', = 'm',
= 'y', = 'r', = 'l', = 'v', = 'ḷ',
= 'ś', = 'ṣ', = 's', = 'h',
= 'q', = 'x', = 'ġ', = 'ḻ',
= 'z', = 'ḻ', = 'ž', = 'ṛ', = 'ṛh',
= 'f', = 'θ', = 'ṉ', = 'ṟ',
-- = 'gy',
-- vowel diacritics
= 'i', = 'u', = 'e', = 'o',
= 'ā', = 'ī', = 'ū',
= 'ŕ',
= 'ai', = 'au',
= 'ŏ',
= 'ĕ',
-- vowel signs
= 'a', = 'i', = 'u', = 'e', = 'o',
= 'ā', = 'ī', = 'ū',
= 'ŕ',
= 'ai', = 'au',
= 'ŏ',
= 'ĕ',
= 'om',
-- chandrabindu
= '̃',
-- anusvara
= 'ṁ',
-- visarga
= 'ḥ',
-- virama
= '',
-- numerals
= '0', = '1', = '2', = '3', = '4',
= '5', = '6', = '7', = '8', = '9',
-- punctuation
= '.', -- danda
= '.', -- double danda
= '', -- compound separator
-- abbreviation sign
= '.',
}
local nasal_assim = {
= 'ङ', = 'ङ', = 'ङ', = 'ङ',
= 'ञ', = 'ञ', = 'ञ', = 'ञ',
= 'ण', = 'ण', = 'ण', = 'ण',
= 'म', = 'म', = 'म', = 'म', = 'म',
= 'ँ', = 'ँ',
}
local perm_cl = {
= true, = true, = true,
}
local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम'
local vowel, vowel_sign = 'aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑऍ'
local syncope_pattern = '()(़?)a(़?)(?)'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text = gsub(text, '(़?)(?)', function(c, d)
return c .. (d == "" and 'a' or d) end)
for word in mw.ustring.gmatch(text, "+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^a(़?)()(.)(.?)', function(opt, first, second, third)
return (((match(first, '') and match(second, '्') and not perm_cl)
or match(first .. second, 'य'))
and 'a' or "") .. opt .. first .. second .. third end)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(word, '(.?)ं(.)', function(succ, prev)
return succ .. (succ..prev == "a" and "्म" or
(succ == "" and match(prev, '') and "̃" or nasal_assim or "n")) .. prev end)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.़?', conv)
text = gsub(text, 'a()̃', 'a͠%1')
text = gsub(text, 'jñ', 'gy')
text = gsub(text, 'ñz', 'nz')
return mw.ustring.toNFC(text)
end
return export