Bengali IPA pronunciation module. See {{bn-IPA}}
.
Module:bn-IPA/sandbox/testcases:
22 of 35 tests failed. (refresh)
Text | Expected | Actual | Comments | |
---|---|---|---|---|
অল্পসংখ্যক (olpośoṅkhok) | ɔlpoʃɔŋkʰɔk | ɔlpɔʃɔŋkʰɔk | ||
উত্তর (uttor) | ut̪ːoɹ | ut̪ːɔɾ | ||
অ্যান্টার্কটিকা (ênṭarkṭika) | ɛntaɹktika | ænʈaɾkʈika | ||
দেশ (deś) | d̪eʃ | d̪eʃ | ||
আমার (amar) | amaɹ | amaɾ | ||
খেলনা (khelna) | kʰɛlna | kʰelna | ||
বিবাহ (bibaho) | bibaɦo | bibaɦo | ||
গম (gom) | ɡɔm | ɡɔm | ||
খরগোশ (khorgōś) | kʰɔɹɡoʃ | kʰɔɾɡoʃ | ||
সংস্থান (śoṅsthan) | ʃɔŋst̪ʰan | ʃɔŋst̪ʰan | ||
মধু (modhu) | mod̪ʱu | mod̪ʱu | ||
মিঞা (mĩa) | mĩa | mĩa | ||
শাস্ত্র (śastro) | ʃast̪ɹo | ʃast̪ɾo | ||
সমাচার (śomacar) | ʃɔmatɕaɾ | ʃɔmatʃaɾ | ||
শ্রাবণ (srabon) | sɹabon | sɾabɔn | ||
ভাই (bhai) | bʱai̯ | bʱai̯ | ||
দৃষ্টি (driśṭi) | d̪ɹiʃti | d̪ɾiʃʈi | ||
শক্তি (śokti) | ʃokt̪i | ʃokt̪i | ||
ওস্তাদ (ōstad) | ost̪ad̪ | ost̪ad̪ | ||
পঙ্কজ (poṅkoj) | pɔŋkodʑ | pɔŋkɔdʒ | ||
মালা (mala) | mala | mala | ||
দেওয়াল (deōẇal) | d̪eo̯al | d̪eo̯al | ||
নিরস্ত্র (nirostro) | niɹɔst̪ɹo | niɾɔst̪ɾo | ||
নিবৃত্ত (nibritto) | niɹbɹit̪ːo | nibɾit̪ːo | ||
মৃত্যুঞ্জয় (mrittunjoẏ) | mɹit̪ːundʑɔe̯ | mɾit̪ːundʒɔe̯ | ||
গর্ভপাত (gorbhopat) | ɡɔɹbʱopat | ɡɔɾbʱɔpat̪ | ||
গর্ভ (gorbho) | ɡɔɹbʱo | ɡɔɾbʱo | ||
বস্ত্র (bostro) | bɔst̪ɹo | bɔst̪ɾo | ||
যক্ষ্মা (jokkha) | dʑɔkːʰa | dʒokʰːa | ||
সংখ্যা (śoṅkha) | ʃɔŋkʰa | ʃɔŋkʰa | ||
সম্ভ্রান্ত (śombhranto) | ʃɔmbʱɹant̪o | ʃɔmbʱɾant̪o | ||
ইনফ্লুয়েঞ্জা (inphluẏenja) | influendʑa | inpʰlu̯endʒa | ||
পশ্চিমবঙ্গ (pościmboṅgo) | poʃtɕimbɔŋɡo | pɔʃtʃimbɔŋɡo | ||
নয়ন (noẏon) | nɔe̯on | nɔe̯on | ||
জিহ্বা (jiubha) | dʑiubʱa | dʒiu̯bʱa |
local export = {}
local lang = require("Module:languages").getByCode("bn")
local sc = require("Module:scripts").getByCode("Beng")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find
local correspondences = {
= "ŋ", = "ɡ",
= "tʃ", = "dʒ",
= "ʈ", = "ɖ",
= "t̪", = "d̪",
= "e̯", = "ɾ", = "l",
= "ʃ", = "ɦ",
= "ɽ",
= "pʰ", = "bʱ",
= "e̯", = "o̯",
= "ɔ", = "ɔ",
= "i", = "o",
= "u", = "æ",
= "ɔ̃", = "õ", = "ĩ", = "ũ", = "æ̃",
}
local vowels = "aāiīuūoêɔɔ̃ɛeææ̃ãā̃ễẽĩī̃õũū̃"
local vowel = ""
local consonants = ""
local weak = "()"
local aspirate = "()"
local function find_consonants(text)
local current = ""
local cons = {}
for cc in mw.ustring.gcodepoint(text .. " ") do
local ch = mw.ustring.char(cc)
if find(current .. ch, "^$") or find(current .. ch, "^h$") then
current = current .. ch
else
table.insert(cons, current)
current = ch
end
end
return cons
end
local identical = "knlsfz"
for character in gmatch(identical, ".") do
correspondences = character
end
local function transliterate(text)
return (lang:transliterate(text))
end
function export.link(term)
return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end
function export.toIPA(text, style)
text = gsub(text, '॰', '-')
local translit = text
if lang:findBestScript(text):isTransliterated() then
translit = transliterate(text)
end
if not translit then
error('The term "' .. text .. '" could not be transliterated.')
end
if style == "desanskritized" then
translit = gsub(translit, "ṛh", "ṛ")
end
if style == "colloquial" then
translit = gsub(translit, "lh", "ll") -- Chatterji
translit = gsub(translit, "mh", "mm") -- Chatterji
translit = gsub(translit, "nh", "nn") -- Chatterji
translit = gsub(translit, "ṛ", "r")
translit = gsub(translit, "ṛh", "r")
translit = gsub(translit, "s", "ś")
translit = gsub(translit, "ś()", "s%1")
translit = gsub(translit, "z", "j")
end
if style == "vanga" then
translit = gsub(translit, "bʱ", "v")
translit = gsub(translit, "c", "ts")
translit = gsub(translit, "ch", "s")
translit = gsub(translit, "j(h?)", "z")
translit = gsub(translit, "pʰ", "f")
translit = gsub(translit, "ṛ", "r")
translit = gsub(translit, "ṛh", "r")
translit = gsub(translit, "s", "ś")
translit = gsub(translit, "ś()", "s%1")
end
translit = gsub(translit, "ś()", "s%1")
translit = gsub(translit, "^śp", "sp")
translit = gsub(translit, "śk$", "sk$")
translit = gsub(translit, "śk(" .. consonants .. ")", "sk%1")
-- vowels
translit = gsub(translit, "%-$", "")
translit = gsub(translit, "^%-", "")
translit = gsub(translit, ",", "")
translit = gsub(translit, " ", "..")
translit = gsub(translit, "%.ː", "ː.")
translit = gsub(translit, "%.̃", "̃")
translit = gsub(translit, "aẏ()", "a%1")
translit = gsub(translit, "eẏ()", "e%1")
translit = gsub(translit, "êẏ()", "ê%1")
translit = gsub(translit, "iẏ()", "i%1")
translit = gsub(translit, "ĩẏ()", "ĩ%1")
translit = gsub(translit, "ito$", "itō")
translit = gsub(translit, "oẏ()", "o%1")
translit = gsub(translit, "õẏ()", "õ%1")
translit = gsub(translit, "ōẇ()", "ō%1")
translit = gsub(translit, "ō̃ẇ()", "ō̃%1")
translit = gsub(translit, "uẏ()", "u%1")
translit = gsub(translit, "ũẏ()", "ũ%1")
local consonants_no_h = ""
translit = gsub(translit, "o(" .. consonants .. "h?)()", "ō%1%2") -- vowel harmony, per Chatterji
translit = gsub(translit, "ho$", "hō")
translit = gsub(translit, "(" .. vowel .. ")h$", "%1")
translit = gsub(translit, "o$", "ō")
translit = gsub(translit, "o ", "ō ")
translit = gsub(translit, "ok(" .. consonants .. ")", "ōk%1")
translit = gsub(translit, "()ho", "%1hō")
translit = gsub(translit, aspirate .. "h", '%1ʰ')
translit = gsub(translit, weak .. "h", '%1ʱ')
local result = gsub(translit, ".", correspondences)
result = gsub(result, "%.?%-", ".")
result = gsub(result, "%.%.", "‿")
-- formatting
result = gsub(result, "ː̃", "̃ː")
result = gsub(result, "ː%.̃", "̃ː.")
result = gsub(result, "%.$", "")
result = gsub(result, "^ɾ", "r")
-- force final ɔe̯
result = gsub(result, "()oe̯$", "%1ɔe̯")
-- gemination
result = gsub(result, "kk(ʰ?)", "k%1ː")
result = gsub(result, "ɡɡ(ʱ?)", "ɡ%1ː")
result = gsub(result, "tʃtʃ(ʰ?)", "tʃ%1ː")
result = gsub(result, "dʒdʒ(ʱ?)", "dʒ%1ː")
result = gsub(result, "ʈʈ(ʰ?)", "ʈ%1ː")
result = gsub(result, "ɖɖ(ʱ?)", "ɖ%1ː")
result = gsub(result, "t̪t̪(ʰ?)", "t̪%1ː")
result = gsub(result, "d̪d̪(ʱ?)", "d̪%1ː")
result = gsub(result, "pp", "pː")
result = gsub(result, "ff", "fː")
result = gsub(result, "bb(ʱ?)", "b%1ː")
result = gsub(result, "ɾɾ", "ɾ") -- মহররম
result = gsub(result, "ll", "lː")
result = gsub(result, "mm", "mː")
result = gsub(result, "nn", "nː")
result = gsub(result, "ʃʃ", "ʃː")
result = gsub(result, "ss", "sː")
result = gsub(result, "ŋk(ʰ?)ː", "ŋk%1")
result = gsub(result, "ŋɡ(ʱ?)ː", "ŋɡ%1")
result = gsub(result, "ntʃ(ʰ?)ː", "ntʃ%1")
result = gsub(result, "ndʒ(ʱ?)ː", "ndʒ%1")
result = gsub(result, "nʈ(ʰ?)ː", "nʈ%1")
result = gsub(result, "nɖ(ʱ?)ː", "nɖ%1")
result = gsub(result, "nt̪(ʰ?)ː", "nt̪%1")
result = gsub(result, "nd̪(ʱ?)ː", "nd̪%1")
result = gsub(result, "ae", "ae̯")
result = gsub(result, "iu", "iu̯")
result = gsub(result, "i(" .. vowel .. ")", "i̯%1")
result = gsub(result, "i̯u̯", "iu̯")
result = gsub(result, "oa", "o̯a")
result = gsub(result, "ɔe̯ɔ", "ɔe̯o")
result = gsub(result, "ɔo", "ɔo̯")
result = gsub(result, "u(" .. vowel .. ")", "u̯%1")
result = gsub(result, "()i", "%1i̯")
result = gsub(result, "^ui̯", "u̯i")
result = gsub(result, "()̯̯", "%1̯")
return result
end
function export.narrow_IPA(ipa)
-- lenition before dental
ipa = gsub(ipa, "dʒ()", "z%1")
ipa = gsub(ipa, "dʒʱ()", "z%1")
-- word-final deaspiration
ipa = gsub(ipa, "ɖʱ$", "ɖ")
ipa = gsub(ipa, "dʒʱ$", "dʒ")
ipa = gsub(ipa, "d̪ʱ$", "d̪")
ipa = gsub(ipa, "ɡʱ$", "ɡ")
ipa = gsub(ipa, "pʰ$", "p")
ipa = gsub(ipa, "ʈʰ$", "ʈ")
ipa = gsub(ipa, "tʃʰ$", "tʃ")
ipa = gsub(ipa, "t̪ʰ$", "t̪")
-- exceptions
ipa = gsub(ipa, "bʱ$", "v")
ipa = gsub(ipa, "kʰ$", "x")
-- dental and post-alveolar lateral
ipa = gsub(ipa, "l()", "l̪%1")
ipa = gsub(ipa, "l()", "ɭ%1")
-- dental and post-alveolar nasal
ipa = gsub(ipa, "n()", "n̪%1")
ipa = gsub(ipa, "n()", "ɳ%1")
-- regressive assimilation
ipa = gsub(ipa, "b()‿p", "pː")
ipa = gsub(ipa, "d()‿t", "tː")
ipa = gsub(ipa, "dʒ()‿tʃ", "tʃː")
ipa = gsub(ipa, "d̪()‿t̪", "t̪ː")
ipa = gsub(ipa, "f‿b", "bː")
ipa = gsub(ipa, "ɡ()‿k", "kː")
ipa = gsub(ipa, "k()‿ɡ", "ɡː")
ipa = gsub(ipa, "p()‿b", "bː")
ipa = gsub(ipa, "t()‿d", "dː")
ipa = gsub(ipa, "tʃ()‿dʒ", "dʒː")
ipa = gsub(ipa, "t̪()‿d̪", "d̪ː")
ipa = gsub(ipa, "z‿s", "sː")
ipa = gsub(ipa, "zs", "sː")
ipa = gsub(ipa, "sː()", "s%1")
ipa = gsub(ipa, "^(" .. consonants .. ")ɾ(" .. vowel .. ")(" .. consonants .. ")ɾ", "%1ɾ%2%3ː") -- R syncope
ipa = gsub(ipa, "()()", "%1ː%2")
-- intervocalic e̯
ipa = gsub(ipa, "(" .. vowel .. ")‿(" .. vowel .. ")", "%1e̯%2")
-- long vowels
ipa = gsub(ipa, "^(" .. vowel .. ")(" .. consonants .. ")$", "%1ː%2")
ipa = gsub(ipa, "^(" .. consonants .. ")(" .. vowel .. ")(" .. consonants .. ")$", "%1%2ː%3")
return ipa
end
function export.make(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local p, results = {}, {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
p = { pagetitle }
end
for _, Bengali in ipairs(p) do
local formal = export.toIPA(Bengali, "formal")
local desanskritized = export.toIPA(Bengali, "desanskritized")
local colloquial = export.toIPA(Bengali, "colloquial")
table.insert(results, { pron = "/" .. formal .. "/" })
local narrow = export.narrow_IPA(formal)
if narrow ~= formal then table.insert(results, { pron = "" }) end
if formal ~= desanskritized then
table.insert(results, { pron = "/" .. desanskritized .. "/" })
local narrow = export.narrow_IPA(desanskritized)
if narrow ~= desanskritized then table.insert(results, { pron = "" }) end
end
if formal ~= colloquial then
table.insert(results, { pron = "/" .. colloquial .. "/" })
local narrow = export.narrow_IPA(colloquial)
if narrow ~= colloquial then table.insert(results, { pron = "" }) end
end
end
return m_a.format_qualifiers(lang, {'Rarh'}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end
function export.make_vanga(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local p, results = {}, {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
p = { pagetitle }
end
for _, Vanga in ipairs(p) do
local vanga = export.toIPA(Vanga, "vanga")
table.insert(results, { pron = "/" .. vanga .. "/" })
local narrow = export.narrow_IPA(vanga)
if narrow ~= formal then table.insert(results, { pron = "" }) end
end
return m_a.format_qualifiers(lang, {'Vanga'}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end
return export