local data = {}
local U = mw.ustring.char
local candrabindu = U(0xC81)
local anusvAra = U(0xC82)
local visarga = U(0xC83)
local virAma = U(0xCCD)
local avagraha = "ಽ"
local consonants = "ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲವಶಷಸಹಳೞ"
local consonant = ""
local acute = U(0x301) -- combining acute
data = {
-- Vowels and modifiers. Do the diphthongs and diaereses first.
{"ai", "ಐ"},
{"au", "ಔ"},
{"ï", "ಇ"},
{"ü", "ಉ"},
{"a", "ಅ"},
{"ā", "ಆ"},
{"i", "ಇ"},
{"ī", "ಈ"},
{"u", "ಉ"},
{"ū", "ಊ"},
{"e", "ಎ"},
{"ē", "ಏ"},
{"o", "ಒ"},
{"ō", "ಓ"},
{"ṝ", "ೠ"},
{"ṛ", "ಋ"},
{"r̥", "ಋ"},
{"ḹ", "ೡ"},
{"l̥", "ಌ"},
{"(ಅ)()", "%1%2"}, -- a-i, a-u for ಅಇ, ಅಉ; must follow rules for "ai", "au"
{"(ಲ)()", "%1%2"}, -- l-R, l-RR for ಲೃ, ಲೄ; must follow rules for "lR", "lRR"
-- Two-letter consonants must go before h.
{"kh", "ಖ"},
{"gh", "ಘ"},
{"ch", "ಛ"},
{"jh", "ಝ"},
{"ṭh", "ಠ"},
{"ḍh", "ಢ"},
{"th", "ಥ"},
{"dh", "ಧ"},
{"ph", "ಫ"},
{"bh", "ಭ"},
{"h", "ಹ"},
-- Other stops.
{"k", "ಕ"},
{"g", "ಗ"},
{"c", "ಚ"},
{"j", "ಜ"},
{"ṭ", "ಟ"},
{"ḍ", "ಡ"},
{"t", "ತ"},
{"d", "ದ"},
{"p", "ಪ"},
{"b", "ಬ"},
-- Nasals.
{"ṅ", "ಙ"},
{"ñ", "ಞ"},
{"ṇ", "ಣ"},
{"n", "ನ"},
{"m", "ಮ"},
-- Remaining consonants.
{"y", "ಯ"},
{"r", "ರ"},
{"l", "ಲ"},
{"v", "ವ"},
{"ś", "ಶ"},
{"ṣ", "ಷ"},
{"s", "ಸ"},
{"ḷ", "ಳ"},
{"m̐", candrabindu},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"'", avagraha},
-- This rule must be applied twice because a consonant may only be in one capture per operation,
-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
}
local vowels = {
= U(0xCBF),
= U(0xCC1),
= U(0xCC3),
= U(0xCE2),
= U(0xCC6),
= U(0xCC7),
= U(0xCCA),
= U(0xCCB),
= U(0xCBE),
= U(0xCC0),
= U(0xCC2),
= U(0xCC4),
= U(0xCE3),
= U(0xCC8),
= U(0xCCC),
}
-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data, {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data, {"(" .. consonant .. ")ಅ", "%1"})
data = {
= {
= "ā",
= "ī",
= "ū",
= "ē",
= "ō",
= "ñ",
= "ṭ",
= "ḍ",
= "ḷ",
= "ṇ",
= "ṅ",
= "ś",
= "ṣ",
= "ṃ",
= "ḥ",
},
= {
= "l̥",
= "ḹ",
},
= {
= "ṛ",
= "ṝ",
},
}
return data