local data = {}
local U = require("Module:string/char")
local candrabindu = U(0xC81)
local anusvAra = U(0xC82)
local visarga = U(0xC83)
local virAma = U(0xCCD)
local nuktA = U(0xCBC)
local avagraha = "ಽ"
local consonants = "ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಱಲವಶಷಸಹಳೞ"
local consonant = "" .. nuktA .. "?"
local acute = U(0x301) -- combining acute
data = {
-- Two-element vowels or vowel seqeunces.
{"a", { = "ಐ", = "ಔ", = "ಅಇ", = "ಅಉ",
= "ಅಓ"}},
{"l̥̄", "ೡ"},
{"l̥", "ಌ"},
-- Digraphs with 'h':
{".h", { = "ಖ", = "ಘ", = "ಛ", = "ಝ",
= "ಠ", = "ಢ", = "ಥ", = "ಧ",
= "ಫ", = "ಭ",}},
-- Non-ASCII single characters
{".", { = "ೡ", = anusvAra, = visarga,
= "ಙ", = "ಞ", = "ಣ", = "ಶ",
= "ಷ", = "ಆ", = "ಈ", = "ಊ",
= "ಎ", = "ಒ", = "ೠ", = "ಳ",
= "ಟ", = "ಡ", = "", = "ಋ",}},
-- ASCII letters
{"", {n = "ನ", m = "ಮ", y = "ಯ", r = "ರ",
l = "ಲ", v = "ವ", s = "ಸ", a = "ಅ",
i = "ಇ", u = "ಉ", e = "ಏ", o = "ಓ",
k = "ಕ", g = "ಗ", c = "ಚ", j = "ಜ",
t = "ತ", d = "ದ", p = "ಪ", b = "ಬ",
h = "ಹ"}},
{"(ಅ)()", "%1%2"}, -- a-i, a-u for 𑀅𑀇, 𑀅𑀉; must follow rules for "ai", "au"
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
-- this rule must be applied twice because a consonant may only be in one capture per operation, so "CCC" will only recognize the first two consonants
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"i", "ಇ"},
{"u", "ಉ"},
}
local vowels = {
= U(0xCBF),
= U(0xCC1),
= U(0xCC3),
= U(0xCE2),
= U(0xCC6),
= U(0xCC7),
= U(0xCCA),
= U(0xCCB),
= U(0xCBE),
= U(0xCC0),
= U(0xCC2),
= U(0xCC4),
= U(0xCE3),
= U(0xCC8),
= U(0xCCC),
}
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data, {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data, {"(" .. consonant .. ")ಅ", "%1"})
-- ] to ]
data = {
= {
= "ā",
= "ī",
= "ū",
= "ñ",
= "ṭ",
= "ḍ",
= "ṇ",
= "ṅ",
= "ś",
= "ṣ",
= "ṃ",
= "ḥ",
= "ḹ",
= acute,
},
= {
= "ḷ",
= "ṝ",
},
= {
= "ṛ",
},
}
return data