Warning: Incompatible with Tamil Brahmi. Deliberately obliterates vowel length distinction on 'e' and 'o'.
local data = {}
local U = require("Module:string/char")
local anusvAra = U(0x11001)
local visarga = U(0x11002)
local virAma = U(0x11046)
local consonants = "𑀓𑀔𑀕𑀖𑀗𑀘𑀙𑀚𑀛𑀜𑀝𑀞𑀟𑀠𑀡𑀢𑀣𑀤𑀥𑀦𑀧𑀨𑀩𑀪𑀫𑀬𑀭𑀮𑀯𑀰𑀱𑀲𑀳𑀴𑀵𑀶𑀷"
local consonant = ""
local acute = U(0x301) -- combining acute
data = {
-- Priority digraphs
{".", { = "𑀐", = "𑀒", = "𑀅𑀇", = "𑀅𑀉",
= "𑀅𑀑",}},
-- Digraphs with 'h'
{".h", { = "𑀔", = "𑀖", = "𑀙", = "𑀛",
= "𑀞", = "𑀠", = "𑀣", = "𑀥",
= "𑀨", = "𑀪", }},
{"ḹ", "𑀎"},
{"l̥̄", "𑀎"},
{"l̥", "𑀍"},
-- Single letters
{".", { = anusvAra, = visarga,
= "𑀗", = "𑀜", = "𑀡", n = "𑀦",
m = "𑀫", y = "𑀬", r = "𑀭", l = "𑀮",
v = "𑀯", = "𑀰", = "𑀱", s = "𑀲",
a = "𑀅", = "𑀆", i = "𑀇", = "𑀈",
u = "𑀉", = "𑀊", e = "𑀏", o = "𑀑",
= "𑀌", = "𑀴",
-- {"ḷ", "𑀍"}, -- Only Sanskrit uses this as a vowel.
k = "𑀓", g = "𑀕", c = "𑀘", j = "𑀚",
= "𑀝", = "𑀟", t = "𑀢", d = "𑀤",
p = "𑀧", b = "𑀩", h = "𑀳", = "",
= "𑀋",}},
{"(𑀅)()", "%1%2"}, -- a-i, a-u for 𑀅𑀇, 𑀅𑀉; must follow rules for "ai", "au"
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
-- this rule must be applied twice because a consonant may only be in one capture per operation, so "CCC" will only recognize the first two consonants
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"i", "𑀇"},
{"u", "𑀉"},
}
local vowels = {
= U(0x1103A),
= U(0x1103C),
= U(0x1103E),
= U(0x11040),
= U(0x11042),
= U(0x11044),
= U(0x11038),
= U(0x1103B),
= U(0x1103D),
= U(0x1103F),
= U(0x11041),
= U(0x11043),
= U(0x11045),
}
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data, {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data, {"(" .. consonant .. ")𑀅", "%1"})
-- ] to ]
data = {
= {
= "ā",
= "ī",
= "ū",
= "e", -- These two short vowels are transcriptional additions,
= "o", -- used in Pischel's transcription of Prakrit.
= "ñ",
= "ṭ",
= "ḍ",
= "ṇ",
= "ṅ",
= "ś",
= "ṣ",
= "ṃ",
= "ḥ",
= "y",
= "l̥̄",
= acute,
},
= {
= "l̥",
= "ṝ",
},
= {
= "ṛ",
},
}
return data