Hindi IPA pronunciation module. See {{hi-IPA}}
.
12 of 59 tests failed. (refresh)
Text | Expected | Actual | Comments | |
---|---|---|---|---|
अशिष्ट॰ता (aśiṣṭ.tā) | ə.ʃɪʂʈ.t̪ɑː | ə.ʃɪʂʈ.t̪ɑː | syllabification | |
अशिष्ट-ता (aśiṣṭ-tā) | ə.ʃɪʂʈ.t̪ɑː | ə.ʃɪʂʈ.t̪ɑː | syllabification | |
अल्प्संख्यक (alpsaṅkhyak) | əlp.səŋ.kʰjək | əlp.səŋ.kʰjək | syllabification | |
अंडकोष (aṇḍkoṣ) | əɳɖ.koːʂ | əɳɖ.koːʂ | syllabification | |
अंग्रेज़ (aṅgrez) | əŋ.ɡɾeːz | əŋ.ɡɾeːz | syllabification | |
अंटर्क्टिका (aṇṭarkṭikā) | əɳ.ʈəɾk.ʈɪ.kɑː | əɳ.ʈəɾk.ʈɪ.kɑː | syllabification | |
मैं (ma͠i) | mɛ̃ː | mɛ̃ː | ||
देश (deś) | d̪eːʃ | d̪eːʃ | ||
मेरा (merā) | meː.ɾɑː | meː.ɾɑː | ||
खिलौना (khilaunā) | kʰɪ.lɔː.nɑː | kʰɪ.lɔː.nɑː | ||
नौटंकी (nauṭaṅkī) | nɔː.ʈəŋ.kiː | nɔː.ʈəŋ.kiː | ||
हौं (ha͠u) | ɦɔ̃ː | ɦɔ̃ː | ||
मुँह (mũh) | mũːʱ | mũːɦ | ||
माह (māh) | mɑːʱ | mɑːɦ | ||
बहना (bahnā) | bəʱ.nɑː | bəɦ.nɑː | ||
विवाह (vivāh) | ʋɪ.ʋɑːʱ | ʋɪ.ʋɑːɦ | ||
ग़म (ġam) | ɣəm | ɣəm | ||
ख़रगोश (xargoś) | xəɾ.ɡoːʃ | xəɾ.ɡoːʃ | ||
इकट्ठा (ikaṭṭhā) | ɪ.kəʈ.ʈʰɑː | ɪ.kəʈ.ʈʰɑː | ||
संस्थान (sansthān) | sən.st̪ʰɑːn | sən.st̪ʰɑːn | ||
मधु (madhu) | mə.d̪ʱuː | mə.d̪ʱuː | final u is lengthened, aspiration should not be split in syllabification | |
मियाँ (miyā̃) | miː.jɑ̃ː | mɪ.jɑ̃ː | i + y lengthens i | |
मुहाफ़ज़ाह (muhāfzāh) | mʊ.ɦɑːf.zɑːʱ | mʊ.ɦɑːf.zɑːɦ | ||
स्त्रीत्व (strītva) | st̪ɾiːt̪.ʋᵊ | st̪ɾiːt̪.ʋᵊ | ||
शास्त्र (śāstra) | ʃɑːs.t̪ɾᵊ | ʃɑːs.t̪ɾᵊ | ||
समाचार (samācār) | sə.mɑː.t͡ʃɑːɾ | sə.mɑː.t͡ʃɑːɾ | ||
श्रावण (śrāvaṇ) | ʃɾɑː.ʋəɳ | ʃɾɑː.ʋəɳ | ||
हमें (hamẽ) | ɦə.mẽː | ɦə.mẽː | ||
में (mẽ) | mẽː | mẽː | ||
भैया (bhaiyā) | bʱə.iː.jɑː | bʱə̯i.jɑː | ||
सुलह (sulah) | sʊ.ləʱ | sʊ.ləɦ | ||
दृष्टि (dŕṣṭi) | d̪ɾɪʂ.ʈiː | d̪ɾɪʂ.ʈiː | ||
सोई (soī) | soː.iː | soː.iː | ||
खाइए (khāie) | kʰɑː.ɪ.eː | kʰɑː.ɪ.eː | ||
शक्ति (śakti) | ʃək.t̪iː | ʃək.t̪iː | ||
उस्ताद (ustād) | ʊs.t̪ɑːd̪ | ʊs.t̪ɑːd̪ | ||
पंकज (paṅkaj) | pəŋ.kəd͡ʒ | pəŋ.kəd͡ʒ | ||
माला (mālā) | mɑː.lɑː | mɑː.lɑː | ||
दीवार (dīvār) | d̪iː.ʋɑːɾ | d̪iː.ʋɑːɾ | ||
सुरुची (surucī) | sʊ.ɾʊ.t͡ʃiː | sʊ.ɾʊ.t͡ʃiː | ||
निरस्त्र (nirastra) | nɪ.ɾəs.t̪ɾᵊ | nɪ.ɾəs.t̪ɾᵊ | ||
निर्वृत्त (nirvŕtt) | nɪɾ.ʋɾɪt̪t̪ | nɪɾ.ʋɾɪt̪t̪ | ||
मृत्युंजय (mŕtyuñjay) | mɾɪt̪.jʊn.d͡ʒəj | mɾɪt̪.jʊn.d͡ʒəj | ||
पितृओं (pitŕõ) | pɪt̪.ɾõː | pɪt̪.ɾõː | ||
गर्भ॰पात (garbh.pāt) | ɡəɾbʱ.pɑːt̪ | ɡəɾbʱ.pɑːt̪ | ||
गर्भ (garbh) | ɡəɾbʱ | ɡəɾbʱ | ||
वस्त्र (vastra) | ʋəs.t̪ɾᵊ | ʋəs.t̪ɾᵊ | ||
यक्ष्मा (yakṣmā) | jək.ʂmɑː | jək.ʂmɑː | ||
उत्प्रेक्षा (utprekṣā) | ʊt̪.pɾeːk.ʂɑː | ʊt̪.pɾeːk.ʂɑː | ||
झुंझलाहट (jhuñjhlāhaṭ) | d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ | d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ | ||
संख्या (saṅkhyā) | səŋ.kʰjɑː | səŋ.kʰjɑː | ||
घुँघरू (ghuṅghrū) | ɡʱʊŋ.ɡʱɾuː | ɡʱʊŋ.ɡʱɾuː | ||
संभ्रांत (sambhrānt) | səm.bʱɾɑːnt̪ | səm.bʱɾɑːnt̪ | ||
इन्फ़्लुएंज़ा (influenzā) | ɪn.flʊ.eːn.zɑː | ɪn.flʊ.eːn.zɑː | ||
इंफ़्लुएंज़ा (imfluenzā) | ɪn.flʊ.eːn.zɑː | ɪm.flʊ.eːn.zɑː | ||
हिमाचल प्रदेश (himācal pradeś) | /ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ/ | ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ | ||
तंक़ीद (taṅqīd) | t̪əŋ.qiːd̪ | anusvara before uvulars | ||
चेरापूंजी (cerāpūñjī) | t͡ʃeː.ɾɑː.puːn.d͡ʒiː | t͡ʃeː.ɾɑː.puːn.d͡ʒiː | ||
चेरापूंजी (cerāpūñjī) | t͡ʃeː.ɾäː.pũːn.d͡ʒiː | t͡ʃeː.ɾäː.pũːn.d͡ʒiː | nasal allophone before postalveolar |
local export = {}
local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gcodepoint = m_str_utils.gcodepoint
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local u = m_str_utils.char
local correspondences = {
= "ŋ", = "ɡ",
= "t͡ʃ", = "d͡ʒ",
= "ʈ", = "ɖ", = "ɳ",
= "t̪", = "d̪",
= "j", = "ɾ", = "ʋ",
= "ʃ", = "ʂ", = "ʒ", = "ʒ", = "ɦ",
= "ɽ", = "ʒ", = "l", = "l", = "ɣ", = "q", = "x", = "n", = "ɾ",
= "ə", = "ɑː", = "ɪ",
= "iː", = "oː", = "eː",
= "ʊ", = "uː", = "ɔ", = "æ",
= "ẽː", = "ʊ̃", = "õː", = "ə̃", = "ɑ̃ː", = "ɪ̃", = "ĩː",
= "oːm", = "(ɦ)", = "(ʔ)",
}
local perso_arabic = {
= "kh", = "g", = "k", = "z", = "j", = "ph", = "",
}
local urdu = {
= "ʃ", = "n",
}
local deccani = {
= "x",
}
local lengthen = {
= "ā", = "ī", = "ū",
}
local vowels = "aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "ː?"
local weak_h = "()h"
local aspirate = "()"
local syllabify_pattern = "(̃?)(+)(̃?)"
local function find_consonants(text)
local current = ""
local cons = {}
for cc in gcodepoint(text .. " ") do
local ch = u(cc)
if find(current .. ch, "^$") or find(current .. ch, "^h$") then
current = current .. ch
else
table.insert(cons, current)
current = ch
end
end
return cons
end
local function syllabify(text)
for count = 1, 2 do
text = gsub(text, syllabify_pattern, function(a, b, c)
b_set = find_consonants(b)
table.insert(b_set, #b_set > 1 and 2 or 1, ".")
return a .. table.concat(b_set) .. c
end)
text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
end
for count = 1, 2 do
text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
end
-- syllabification corrections
-- () is added in front, just in case one of the (unlikely) clusters
-- would occur after a blank space (temporarily reformatted as '..')
text = gsub(text, '()%.()(h?)()', '%1%2%3.%4')
text = gsub(text, '()%.()(h?)ṣ', '%1%2%3.ṣ')
text = gsub(text, '()%.khṣ', '%1kh.ṣ') -- not kṣ/क्ष
text = gsub(text, '()%.()()', '%1%2.%3')
text = gsub(text, '()%.()()', '%1%2.%3')
return text
end
local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
correspondences = character
end
local function transliterate(text)
return (lang:transliterate(text))
end
function export.link(term)
return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end
function export.toIPA(text, style)
text = gsub(text, '॰', '-')
local translit = text
if lang:findBestScript(text):isTransliterated() then
translit = transliterate(text)
end
if not translit then
error('The term "' .. text .. '" could not be transliterated.')
end
if style == "nonpersianized" then
translit = gsub(translit, "", perso_arabic)
end
if style == "dakhini" then
translit = gsub(translit, "", deccani)
end
-- force final schwa for Hindi
translit = gsub(translit, "a~$", "ə")
if style == "desanskritize" then
translit = gsub(translit, "(...)ə$", "%1ɑ(ː)")
translit = gsub(translit, "", urdu)
end
-- vowels
translit = gsub(translit, "͠", "̃")
translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
translit = gsub(translit, "%-$", "")
translit = gsub(translit, "^%-", "")
translit = gsub(translit, "ŕ$", "r")
translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1")
translit = gsub(translit, "ŕ", "ri")
translit = gsub(translit, 'jñ', 'gy')
translit = gsub(translit, ",", "")
translit = gsub(translit, " ", "..")
translit = syllabify(translit)
translit = gsub(translit, "%.ː", "ː.")
translit = gsub(translit, "%.̃", "̃")
translit = gsub(translit, aspirate .. "h", '%1ʰ')
translit = gsub(translit, weak_h, '%1ʱ')
local result = gsub(translit, ".", correspondences)
-- remove final schwa (Pandey, 2014)
-- actually weaken
result = gsub(result, "(...)ə$", "%1ᵊ")
result = gsub(result, "(...)ə ", "%1ᵊ ")
result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
-- formatting
result = gsub(result, "%.?%-", ".")
result = gsub(result, "%.%.", " ")
result = gsub(result, "ː̃", "̃ː")
result = gsub(result, "ː%.̃", "̃ː.")
result = gsub(result, "%.$", "")
-- ñ
result = gsub(result, "ñ", "n")
-- i and u lengthening
result = gsub(result, "ʊ(̃?)(ɦ?)$", "u%1ː%2")
result = gsub(result, "ɪ(̃?)(ɦ?)$", "i%1ː%2")
-- deaffricate first affricate in geminates
result = gsub(result, "t͡ʃ(%.?)t͡ʃ", "t̪%1t͡ʃ")
result = gsub(result, "d͡ʒ(%.?)d͡ʒ", "d̪%1d͡ʒ")
-- silent h in 'lh-', 'vh-' (Ohala 1983, p.45)
result = gsub(result, "^()ɦ", "%1")
result = gsub(result, "()()ɦ", "%1%2")
result = gsub(result, "ɛː(%.?)j", function(a)
local res = "ə̯i"
res = res .. a .. "j"
return res
end)
result = gsub(result, "ɔː(%.?)ʋ", function(a)
local res = "ə̯u"
res = res .. a .. "ʋ"
return res
end)
return result
end
function export.narrow_IPA(ipa)
-- what /ɑ/ and /ə/ really are
ipa = gsub(ipa, 'ɑ', 'ä')
ipa = gsub(ipa, 'ə', 'ɐ')
-- uvular /x/, /ɣ/ ??
-- ipa = gsub(ipa, 'x', 'χ')
-- ipa = gsub(ipa, 'ɣ', 'ʁ')
-- retroflex s rules
ipa = gsub(ipa, 'ʂ(%.?)()', 'ʃ%1%2')
ipa = gsub(ipa, 'ʂ$', 'ʃ')
-- nasal allophones
ipa = gsub(ipa, 'ŋ(%.?)()', 'ɴ%1%2')
ipa = gsub(ipa, 'n%.j', 'ɲ.j')
ipa = gsub(ipa, '(%.?)ʃ', 'ɲ%1ʃ') -- this nasal is likely more front than before /j/, but not doing a too narrow transcription seems preferable
ipa = gsub(ipa, 'n(%.?)()̪', 'n̪%1%2̪')
ipa = gsub(ipa, 'm(%.?)f', 'ɱ%1f')
-- nasals induce nasalization
ipa = gsub(ipa, '()(ː?)()', '%1̃%2%3')
-- cc, jj
ipa = gsub(ipa, 't̪(%.?)t͡ʃ', 't̚%1t͡ʃ')
ipa = gsub(ipa, 'd̪(%.?)d͡ʒ', 'd̚%1d͡ʒ')
-- syllable boundary consonants
ipa = gsub(ipa, '()%.()', '%1̚.%2')
ipa = gsub(ipa, '()%.()', '%1̚.%2')
ipa = gsub(ipa, '(̪?)%.()', '%1̚.%2')
ipa = gsub(ipa, '()%.()', '%1̚.%2')
-- aspiration rules
ipa = gsub(ipa, 'ɐɦ()', 'ɛɦ%1')
ipa = gsub(ipa, 'ɐɦ$', 'ɛɦ')
ipa = gsub(ipa, 'ɐ%.ɦɐ', 'ɛ.ɦɛ')
ipa = gsub(ipa, 'ɐ%(ɦ%)', 'ɛ(ɦ)')
ipa = gsub(ipa, 'ʊɦ%.', 'ɔɦ.')
ipa = gsub(ipa, 'ʊ%.ɦɐ', 'ɔ.ɦɔ')
ipa = gsub(ipa, 'ɐ%.ɦʊ', 'ɔ.ɦɔ')
ipa = gsub(ipa, '()(̃?)(ː?)ɦ', '%1%2%3ʱ')
-- v/w
ipa = gsub(ipa, '(̪?%.?)ʋ', '%1w')
-- geminate /ɾ/ is trill
ipa = gsub(ipa, "ɾ%.ɾ", "r.r")
-- for onomatopeic words ending on -र्र
ipa = gsub(ipa, "ɾɾ", "rː")
-- final geminates often pronounced as singletons
ipa = gsub(ipa, "(̪?)%1", "%1(ː)")
-- final cc, jj
ipa = gsub(ipa, "t̚t͡ʃ", "(t̚)t͡ʃ")
ipa = gsub(ipa, "d̚d͡ʒ", "(d̚)d͡ʒ")
ipa = gsub(ipa, "ɪ%.j", "i.j")
ipa = gsub(ipa, " ", "‿")
return ipa
end
function export.make(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local p, results = {}, {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
p = { pagetitle }
end
for _, Hindi in ipairs(p) do
local persianized = export.toIPA(Hindi, "persianized")
local nonpersianized = export.toIPA(Hindi, "nonpersianized")
table.insert(results, { pron = "/" .. persianized .. "/" })
local narrow = export.narrow_IPA(persianized)
if narrow ~= persianized then table.insert(results, { pron = "" }) end
if persianized ~= nonpersianized then
table.insert(results, { pron = "/" .. nonpersianized .. "/" })
local narrow = export.narrow_IPA(nonpersianized)
if narrow ~= nonpersianized then table.insert(results, { pron = "" }) end
end
end
return m_a.format_qualifiers(lang, {"Delhi"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = results }
end
function export.make_ur(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local lang = require("Module:languages").getByCode("ur")
local sc = require("Module:scripts").getByCode("ur-Arab")
local p, results = {}, {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
error("No transliterations given.")
end
for _, Urdu in ipairs(p) do
Urdu = lang:transliterate(Urdu) or Urdu
local desanskritize = export.toIPA(Urdu, "desanskritize")
table.insert(results, { pron = "/" .. desanskritize .. "/" })
end
return m_a.format_qualifiers(lang, {"ur"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end
function export.make_deccani(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local lang = require("Module:languages").getByCode("ur")
local sc = require("Module:scripts").getByCode("ur-Arab")
local p, results = {}, {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
error("No transliterations given.")
end
for _, Urdu in ipairs(p) do
local dakhini = export.toIPA(Urdu, "dakhini")
table.insert(results, { pron = "/" .. dakhini .. "/" })
end
return m_a.format_qualifiers(lang, {"Deccani"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end
return export