This module will transliterate Middle Bengali language text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:inc-mbn-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.-- Transliteration for Middle Bengali
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local char = {
-- consonants
= "k", = "kh", = "g", = "gh", = "ṅ",
= "c", = "ch", = "j", = "jh", = "ñ",
= "ṭ", = "ṭh", = "ḍ", = "ḍh", = "ṇ",
= "t", = "th", = "d", = "dh", = "n",
= "p", = "ph", = "b", = "bh", = "m",
= "j", = "r", = "l",
= "ś", = "ṣ", = "s", = "h",
= "ẏ", = "ṛ", = "ṛh",
-- vowel diacritics
= "i", = "u",
= "ri", = "e", = "ō",
= "a", = "i", = "u", = "ōi", = "ōu",
-- archaic vowel diacritics
= "ri", = "li", = "li",
-- visarga
= "ḥ",
-- vowel signs
= "o", = "i", = "u",
= "ri", = "e", = "ō",
= "a", = "i", = "u", = "ōi", = "ōu",
-- archaic vowel signs
= "ri", = "li", = "li",
--virama
= "",
-- chandrabindu
= "̃",
-- avagraha
='’',
-- anusvara
= "ṅ",
-- khandata,
= "t",
-- numerals
= "0", = "1", = "2", = "3", = "4",
= "5", = "6", = "7", = "8", = "9",
-- punctuation
= ".", -- dãri
}
local consonant, vowel, vowel_sign = "ক-হড়-য়", "oা-ৌ’", "অ-ঔ"
local c = ""
local cc = "়?" .. c
local v = ""
local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")"
local function rev_string(text)
local result, length = "", mw.ustring.len(text)
for i = 1, length do
result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1)
end
return result
end
function export.tr(text, lang, sc, override)
text = gsub(text, "(" .. c .. ")ও", "%1্ও")
text = gsub(text, "^(" .. c .. ")্ও", "%1ও")
text = gsub(text, "()()", "%1্%2")
text = gsub(text, "^()্()", "%1%2")
text = gsub(text, "()্()", "%1%2")
text = gsub(text, "কব", "ক্b")
text = gsub(text, "^ক্b", "কb")
text = gsub(text, "(" .. c .. ")্(" .. c .. ")$", "%1্%2্")
text = gsub(text, "(" .. c .. ")্(" .. c .. ") ", "%1্%2্ ")
text = gsub(text, "()()(" .. v .. ")", "%1্%2%3")
text = gsub(text, "^()্()(" .. v .. ")", "%1%2%3")
text = gsub(text, " ^()্()(" .. v .. ")", " %1%2%3")
text = gsub(text, "ন()", "ন্%1")
text = gsub(text, "^ন্()", "ন%1")
text = gsub(text, " ন্()", " ন%1")
text = gsub(text, "প()", "প্%1")
text = gsub(text, "^প্()", "প%1")
text = gsub(text, "ফ()", "ফ্%1")
text = gsub(text, "^ফ্()", "ফ%1")
text = gsub(text, "(" .. v .. ")ঞ(" .. v .. ")", "%1̃%2")
text = gsub(text, "(" .. c .. "়?)(?)", function(a, b)
return a .. (b == "" and "o" or b) end)
for word in mw.ustring.gmatch(text, "+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2")
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, "%1%2")
end
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, "্ম", "ṃ")
text = gsub(text, "্য", "y")
text = gsub(text, "্ব", "v")
text = gsub(text, "িত$", "ito")
text = gsub(text, "িত ", "ito ")
text = gsub(text, "ৃত$", "rito")
text = gsub(text, "ৃত ", "rito ")
text = gsub(text, "ছিল$", "chilo")
text = gsub(text, "ছিল ", "chilo ")
text = gsub(text, ".?", char)
text = gsub(text, ".", char)
local v_Latn = ""
local c_Latn = ""
local consonants_no_h = ""
-- inherent vowel deletion
text = gsub(text, "(".. v_Latn .. ")bo()(".. v_Latn .. ")", "%1b%2%3")
text = gsub(text, "(".. v_Latn .. ")do()(".. v_Latn .. ")", "%1d%2%3")
text = gsub(text, "(".. v_Latn .. ")dho()(".. v_Latn .. ")", "%1dh%2%3")
text = gsub(text, "(".. v_Latn .. ")lo()(".. v_Latn .. ")", "%1l%2%3")
text = gsub(text, "(".. v_Latn .. ")mo()(".. v_Latn .. ")", "%1m%2%3")
text = gsub(text, "(".. v_Latn .. ")ro(h?)(".. v_Latn .. ")", "%1r%2%3")
text = gsub(text, "goñjo$", "gonj") -- exceptional
text = gsub(text, "goñjo ", "gonj ") -- exceptional
-- Cv
text = gsub(text, "()v", "%1b")
text = gsub(text, "hv", "hb")
text = gsub(text, "udv", "udb")
text = gsub(text, "ttv", "tt")
text = gsub(text, "^sv", "ś") -- initial
text = gsub(text, "()v", "śś") -- medial
text = gsub(text, "^(" .. consonants_no_h .. "h?)v", "%1") -- initial
text = gsub(text, "(" .. consonants_no_h .. ")v", "%1%1") -- medial
text = gsub(text, "(" .. consonants_no_h .. ")hv", "%1%1h") -- medial_h
--ahb, ihb
text = gsub(text, "ahb", "aōbh")
text = gsub(text, "ihb", "iubh")
-- kṣ
text = gsub(text, "^kṣ", "kh") -- initial
text = gsub(text, "kṣ", "kkh") -- medial
text = gsub(text, "okkhṃ", "okkh") -- medial_m
-- sm
text = gsub(text, "^()ṃ(" .. v_Latn .. ")", "ś%2̃") -- initial
text = gsub(text, "()ṃ(" .. v_Latn .. ")", "śś%2̃") -- medial
-- tm
text = gsub(text, "^tṃ", "t") -- initial
text = gsub(text, "tṃ", "tt") -- medial
text = gsub(text, "ṃ", "m")
text = gsub(text, "ṣ", "ś")
-- rules for changing s to ś (applicable for native words only)
text = gsub(text, "s(".. v_Latn .. ")$", "ś%1")
text = gsub(text, "s(".. v_Latn .. ") ", "ś%1 ")
text = gsub(text, "s()", "ś%1")
text = gsub(text, "os$", "oś")
text = gsub(text, "os ", "oś ")
text = gsub(text, "śl", "sl")
text = gsub(text, "śr", "sr")
text = gsub(text, "sp", "śp")
text = gsub(text, "^śp", "sp")
text = gsub(text, " śp", " sp")
-- visarga deletion
text = gsub(text, "ḥkh", "kkh")
-- jñ
text = gsub(text, "jñ", "gy")
text = gsub(text, "ñ", "n")
text = gsub(text, "nḍo$", "nḍ")
text = gsub(text, "nḍo ", "nḍ ")
text = gsub(text, "rko$", "rk")
text = gsub(text, "rko ", "rk ")
text = gsub(text, "(" .. v_Latn .. ")h$", "%1ho")
text = gsub(text, "(" .. v_Latn .. ")h ", "%1ho ")
text = gsub(text, "()aho$", "%1ah")
text = gsub(text, "()aho ", "%1ah ")
text = gsub(text, "ṇn", "ṇon")
text = gsub(text, "ṇ", "n")
text = gsub(text, "^ek", "êk")
text = gsub(text, "^oya", "ê")
text = gsub(text, "^eya", "ê")
text = gsub(text, "^(" .. consonants_no_h .. "h?)ya", "%1ê") -- initial
text = gsub(text, " (" .. consonants_no_h .. "h?)ya", " %1ê") -- initial
text = gsub(text, "^hya", "hê") -- h_initial
text = gsub(text, "gya", "ggê") -- g_medial
text = gsub(text, "yal$", "êl") -- final_l
text = gsub(text, "yanḍ", "ênḍ")
-- Cy
text = gsub(text, "^(" .. consonants_no_h .. "h?)y", "%1") -- initial
text = gsub(text, "(" .. consonants_no_h .. ")y", "%1%1") -- medial
text = gsub(text, "(" .. consonants_no_h .. ")hy", "%1%1h") -- medial_h
text = gsub(text, "^hy", "jh") -- h_initial
text = gsub(text, "hy", "jjh") -- h_medial
text = gsub(text, "ry", "rj")
text = gsub(text, "ẏo()()", "ẏ%1%2")
text = gsub(text, "ẏoō", "ẏō")
text = gsub(text, "oō$", "ō")
text = gsub(text, "()ẏ()", "%1ẏo%2")
text = gsub(text, "()ẏ$", "%1ẏo")
text = gsub(text, "śṭh$", "śṭho")
text = gsub(text, "^(h?)()", "%1o%2")
text = gsub(text, "()b$", "%1bo")
text = gsub(text, "()b ", "%1bo ")
text = gsub(text, "(h?)ob$", "%1obo") -- exceptional
text = gsub(text, "(h?)ob ", "%1obo ") -- exceptional
text = gsub(text, "()bh$", "%1bho")
text = gsub(text, "()bh ", "%1bho ")
text = gsub(text, "^l()bho$", "l%1bh")
text = gsub(text, "^l()bho ", "l%1bh ")
text = gsub(text, "lona$", "lna")
text = gsub(text, "nola$", "nla")
text = gsub(text, "ōẏ", "ōẇ")
text = gsub(text, "ō̃ẏ", "ō̃ẇ")
text = gsub(text, "oo", "o")
if match(text, "") and mode ~= "debug" then
return nil
else
return mw.ustring.toNFC(text)
end
end
return export