This module is used as the backend for template:fa-IPA and template:tg-IPA.
--[=[
FIXME:
1. (ir) q and ğ should both be ɣ intervocally (āqā should give ɒːɣɒː)
2. (prs) disable auto lowering of long vowels before /h/ & /ʔ/, (causes too many issues).
3. (tg, prs) change /q/ to /ɢ/ before a voiced consonant
4. (cls) prevent the appearance of β after a final consonant (e.g -atb should not give β)
-- see ]
5. (cls) fix geminated β and ð should be a normal b and d
6. FIXED // (ir) ] gives instead of
7. FIXED (all except cls) final geminates such as ] should transcribe as
8. (ir) add support for Shirazi dialect
9. (kbl, haz) ] should be /mutāssif/
10. (ir) ] should be /mo.ʁæj.jǽd̪/ (without a second parameter)
11. support disabling a dialect with '-'. e.g. ] should not generate classical
]=]
local export = {}
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local lang = require("Module:languages").getByCode("fa")
local m_IPA = require("Module:IPA")
local m_table = require("Module:table")
local m_qual = require("Module:qualifier")
local all_consonants = "bptTjčhxdDðrzžsšʔʾğGfqkglmnŋhɦwvy'" --needed for syllables
local stop_cons = "bptTjčdDðqkg"
local non_stopc = "hxrzžsšʔğGflmhɦwvy'"
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local ulen = m_str_utils.len
local usub = m_str_utils.sub
local pitchaccent = U(0x301)
local devoice = U(0x325)
local dtack = U(0x31E)
local gstop = U(0x027)
local dental = U(0x32A)
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
export.all_styles = {"cls", "prs", "kbl", "haz", "fa", "teh", "tg"}
export.all_style_groups = {
all = export.all_styles,
cls = {"cls"},
dari = {"prs", "kbl", "haz"},
ir = {"fa", "teh"},
tg = {"tg"}
}
export.all_style_descs = {
cls = "Classical Persian",
prs = "Dari Persian",
kabul = "Kabuli",
haz = "Hazaragi",
fa = "Iranian Persian",
teh = "Tehrani",
tg = "Tajik"
}
local function flatmap(items, fun)
local new = {}
for _, item in ipairs(items) do
local results = fun(item)
for _, result in ipairs(results) do
table.insert(new, result)
end
end
return new
end
local common_consonants = {
= "d͡ʒ",
= "ʔ",
= "z",
= "d",
--these are here for Hazaragi
= "t",
--they are retroflexes in haz
= "ɣ",
= "ɣ",
= "h",
= "ɾ",
= "s",
= "ʃ",
= "s",
= "t",
= "j",
= "ʒ",
= "z",
= "t͡ʃ",
= "ɡ",
= "ˈ"
}
local iranian_persian_short_vowels = { = "æ", = "e", = "o"}
local iranian_persian_long_vowels = {
= "ɒː",
= "iː",
= "uː",
= "uː",
= "iː"
}
local iranian_persian_consonants = { = "z", = "ɢ", = "ɢ", = "c", = "ɟ"}
local dari_persian_short_vowels = { = "ä", = "ɪ", = "ʊ"}
local dari_persian_long_vowels = {
= "ɑː",
= "iː",
= "uː",
= "oː",
= "eː"
}
local dari_persian_consonants = { = "z", = "w"}
local tajik_short_vowels = { = "ä", = "i", = "u"}
local tajik_long_vowels = {
= "ɔ",
= "i",
= "u",
= "ɵ",
= "e"
}
local tajik_vowels = "aieuɵɔ"
local tajik_consonants = { = "z", = "ʁ", = "χ"}
local classical_persian_short_vowels = { = "a", = "i", = "u"}
local classical_persian_long_vowels = {
= "ɑː",
= "iː",
= "uː",
= "oː",
= "eː"
}
local classical_persian_consonants = { = "ð", = "w"}
local vowels_minus_a = "iuāīūüēōːʷ"
local vowels = "aiuāīūüēōːʷ"
--ʷ and ː are counted as vowels to prevent them from being put in the next syllable
local consonant = ""
local vowel = ""
local syllabify_pattern = "(" .. vowel .. ")(" .. consonant .. ")(" .. consonant .. "?)(" .. vowel .. ")"
local function syllabify(text)
text = rsubn(text, "ˈ", "`")
text = rsubn(text, "%-(" .. consonant .. ")%-(" .. consonant .. ")", "%1.%2")
text = rsubn(text, "()`", "%1.`")
-- Add syllable breaks.
for _ = 1, 2 do
text =
rsubn(
text,
syllabify_pattern,
function(a, b, c, d)
if c == "" and b ~= "" then
c, b = b, ""
end
return a .. b .. "." .. c .. d
end
)
end
-- syllable boundry consonants
text =
rsubn(
text,
"()()(+ʷ?)()",
"%1%2.%3%4"
)
text = rsubn(text, "()()()", "%1%2.%3")
-- ALL syllables are CV- so vowels NEED an intial consonant (ʔ)
text = rsubn(text, "()()i#", "%1%2i#") --exclude izafa/ezafe
text = rsubn(text, "()()", "%1ʔ%2")
text = rsubn(text, "#()", "#ʔ%1")
return text
end
local function remove_glottal_c(text) --only for regional dialects
-- remove glottal consonants with appropriate glide
text = rsubn(text, "((%.?))()()", "%1w%4")
text = rsubn(text, "()(%.?)()()", "i%2y%4")
text = rsubn(text, "((%.?))()()", "%1y%4")
text = rsubn(text, "((%.?))()()", "%1y%4")
text = rsubn(text, "()(%.?)()()", "u%2w%4")
text = rsubn(text, "((%.?))()()", "%1w%4")
--completely delete GC if both vowels are the either the same or similar
text = rsubn(text, "()()`()()", "`%1ū")
text = rsubn(text, "()()`()()", "`%1ī")
text = rsubn(text, "()()`()()", "`%1ā")
--Else, turn GC into majhul long vowels
text = rsubn(text, "()((%.?))", "ā")
text = rsubn(text, "()((%.?))", "ē")
text = rsubn(text, "()((%.?))", "ō")
text = rsubn(text, "()", "")
-- lastly, remove all remaning GC
return text
end
local function con_assimilation(text) --DONT USE THIS ON CLASSICAL
-- assimilation/placement of certain consonants
text = rsubn(text, "l((%.?))", "ɭ%1") --retroflexes are only in hazaragi
text = rsubn(text, "()((%.?))", "%1" .. dental .. "%2")
text = rsubn(text, "n((%.?))", "ɳ%1")
text = rsubn(text, "()", "%1" .. dental .. "")
text = rsubn(text, "n((%.?))", "ŋ%1")
text = rsubn(text, "n((%.?))", "ɲ%1")
text = rsubn(text, "n((%.?))", "m%1")
text = rsubn(text, "n((%.?))", "ɴ%1")
text = rsubn(text, "()((%.?))", "ɱ%2")
text = rsubn(text, "((%" .. dtack .. "?)(%" .. pitchaccent .. "?)(%ː?)(%.?))()", "%1ɦ")
text = rsubn(text, "r()", "ɹ%1")
-- formally, f only assimiates in the same syllable
text = rsubn(text, "f()", "v%1")
text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
text = rsubn(text, "ä(" .. pitchaccent .. ")", "æ%1")
text = rsubn(text, "()#", "ʱ#")
text = rsubn(text, "()#", "ʰ#")
text = rsubn(text, "(" .. dental .. "?" .. devoice .. "?)%1(" .. devoice .. "?)#", "%1%2(ː)#")
return text
end
function export.fa_IPA(text)
text = rsubn(text, "a()", "a%1")
text = rsubn(text, "a%-", "e-")
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "%-i#", "i#")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
text = rsubn(text, "v", "w")
-- Replace xwa with xu
text = rsubn(text, "xwa", "xu")
-- Replace xwā with xā
text = rsubn(text, "xwā", "xā")
-- Replace xwē with xē
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "w(" .. vowel .. ")", "v%1")
text = rsubn(text, "w(" .. consonant .. ")", "w%1")
text = rsubn(text, "()w", "%1v")
text = rsubn(text, "(" .. consonant .. ")w#", "%1v#")
text = rsubn(text, "v%(w", "v(v")
-- Replace diphthong
text =
rsubn(
text,
"a()()",
function(semivowel, position)
local consonant = usub(text, position, position)
if consonant == "" or consonant:find(consonant) then
if semivowel == "w" then
return "uw"
else
return "ey"
end
end
end
)
--automatically denote syllables
text = syllabify(text)
-- then do pitch accent mark
text = rsubn(text, "`()()", "%1%2" .. pitchaccent .. "")
text = rsubn(text, "()(" .. pitchaccent .. "?)(%.?)y", "E%2%3y")
text = rsubn(text, "((%" .. dental .. "?))()", "%1ʰ%3")
text = rsubn(text, "((%.?))q", "%1ʁ")
-- Replace final a with e (can be overwritten by entering æ)
text = rsubn(text, "()(%" .. pitchaccent .. "?)#", "e%2#")
-- Replace short vowels
text = rsubn(text, ".", iranian_persian_short_vowels)
-- Replace long vowels
text = rsubn(text, ".", iranian_persian_long_vowels)
text = rsubn(text, "E(" .. pitchaccent .. "?)(%.?)y", "i%1%2y")
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
-- Replace owv- with avv-
text = rsubn(text, "owv", "ævv")
text = rsubn(text, "ow.v", "æv.v")
-- Allephones
text = rsubn(text, "((%" .. dental .. "?))#", "%1" .. devoice .. "#")
-- Replace consonants
text = rsubn(text, ".", iranian_persian_consonants)
text = rsubn(text, "cʰ()", "kʰ%1")
text = rsubn(text, "ɟ(%" .. devoice .. "?)()", "g%1%2")
text = con_assimilation(text)
text = rsubn(text, "#(g)", "%1" .. devoice .. "")
text = rsubn(text, ".", common_consonants)
text = rsubn(text, "ɾ", "ɹ")
--fix the pitch accent on long vowels
text = rsubn(text, "()ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
text = rsubn(text, "()(%.?)t", "x%2t")
text = rsubn(text, "()#", "ɢ" .. devoice .. "#")
text = rsubn(text, "#()", "#q")
text = rsubn(text, "ʁɢ", "ɢɢ")
text = rsubn(text, "#g", "#k")
text = rsubn(text, "c", "kʲ")
text = rsubn(text, "ɟ", "ɡʲ")
text = rsubn(text, "ʲʰ", "ʰʲ")
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
function export.prs_IPA(text) --based on formal speech URBAN kabul
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "%-i#", "i#")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
text = rsubn(text, "v", "w")
-- Replace xwa with xu
text = rsubn(text, "xwa", "xu")
-- Replace xwā with xā
text = rsubn(text, "xwā", "xā")
-- Replace xwē with xē
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "((%`?))(%.?)y", "i%2y")
-- Iranian and Classical dictionaries list -iyy,
-- But Tajik and Dari ones dont
text = rsubn(text, "(y(%`?))(%.?)y", "i%2y")
--automatically denote syllables
text = syllabify(text)
--pitch accent mark
text = rsubn(text, "`()(?)()", "%1%2%3" .. pitchaccent .. "")
text = rsubn(text, "()()", "%1ʰ%2")
-- Replace ih, īh, i\', ī\' by ēh, ē\'
text = rsubn(text, "i((%.?))", "e" .. dtack .. "%1")
text = rsubn(text, "ī((%.?))", "ē%1")
-- Replace uh, ūh, u\', ū\' by ɵh, ɵ\'
text = rsubn(text, "u((%.?))", "o" .. dtack .. "%1")
text = rsubn(text, "ū((%.?))", "ō%1")
-- Replace short vowels
text = rsubn(text, ".", dari_persian_short_vowels)
-- Replace long vowels
text = rsubn(text, ".", dari_persian_long_vowels)
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
--fix the pitch accent on long vowels
text = rsubn(text, "()ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
-- Allephones
text = con_assimilation(text)
-- Replace consonants
text = rsubn(text, ".", common_consonants)
text = rsubn(text, "f()", "v%1")
text = rsubn(text, ".", dari_persian_consonants)
text = rsubn(text, "ɾ(%.?)ɾ", "r%1r")
text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j")
text = rsubn(text, "#ɾ", "#r")
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
function export.prs_kbl_IPA(text) --Colloquial dialect of Kabul
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "%-i#", "i#")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
text = rsubn(text, "v", "w")
-- Replace xwa with xu
text = rsubn(text, "xwa", "xu")
-- Replace xwā with xā
text = rsubn(text, "xwā", "xā")
-- Replace xwē with xē
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "((%`?))(%.?)y", "i%2y")
-- Iranian and Classical dictionaries list -iyy,
-- But Tajik and Dari ones dont
text = rsubn(text, "(y(%`?))(%.?)y", "i%2y")
--pitch accent mark
text = rsubn(text, "`()()", "%1%2" .. pitchaccent .. "")
-- remove glottal consonants for some dialects
text = remove_glottal_c(text)
--automatically denote syllables
text = syllabify(text)
-- universal aspiration
text = rsubn(text, "()()", "%1ʰ%2")
-- Replace short vowels
text = rsubn(text, ".", dari_persian_short_vowels)
-- Replace long vowels
text = rsubn(text, ".", dari_persian_long_vowels)
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
text = rsubn(text, "āw", "aw") -- lost colloquially
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
-- Allephones
text = con_assimilation(text)
-- Replace consonants
text = rsubn(text, ".", dari_persian_consonants)
text = rsubn(text, "f((%.?))", "v%1")
text = rsubn(text, ".", common_consonants)
--fix the pitch accent on long vowels
text = rsubn(text, "()ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j")
text = rsubn(text, "#ɾ", "#r")
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
function export.prs_haz_IPA(text) --Hazaragi
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "((%`?))(%.?)y", "i%2y")
-- Iranian and Classical dictionaries list -iyy,
-- But Tajik and Dari ones dont
text = rsubn(text, "(y(%`?))(%.?)y", "i%2y")
text = rsubn(text, "%-i#", "i#")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
--these conversions need to happen BEFORE EVERYTHING ELSE
--pitch accent mark
text = rsubn(text, "`()()", "%1%2" .. pitchaccent .. "")
--Vowel Harmony
text = rsubn(text, "ē(" .. pitchaccent .. "?)()()", "%3%1%2%3")
text =
rsubn(text, "ē(" .. pitchaccent .. "?)()()()", "%4%1%2%3%4")
text = rsubn(text, "i(" .. pitchaccent .. "?)()()", "%3%1%2%3")
text = rsubn(text, "ī(" .. pitchaccent .. "?)()()", "%3%1%2%3")
text = rsubn(text, "ō(" .. pitchaccent .. "?)()()", "%3%1%2%3")
text = rsubn(text, "ō(" .. pitchaccent .. "?)()()", "u%1%2%3")
text = rsubn(text, "ō(" .. pitchaccent .. "?)()()", "ū%1%2%3")
-- Replace xwa with xu
text = rsubn(text, "xwa", "xu")
-- Replace xwā with xā
text = rsubn(text, "xwā", "xā")
-- Replace xwē with xē
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "v", "w")
-- remove glottal consonants for some dialects
text = remove_glottal_c(text)
--automatically denote syllables
text = syllabify(text)
-- universal aspiration
text = rsubn(text, "()()", "%1ʰ%2")
--delete certain consonant clusters and dipthongs
text = rsubn(text, "āy", "ay")
text = rsubn(text, "āw", "aw")
--retroflex consonants
text = rsubn(text, "D", "ɖ")
text = rsubn(text, "T", "ʈ")
-- Replace short vowels
-- no consistent vowel length for i or u
text = rsubn(text, "ī", "i")
text = rsubn(text, "ū", "u")
text = rsubn(text, ".", classical_persian_short_vowels)
--approximate vowels
text = rsubn(text, "ā", "ɔː")
text = rsubn(text, "ō", "ʊː")
text = rsubn(text, "a", "ä")
-- Replace long vowels
text = rsubn(text, ".", dari_persian_long_vowels)
--fix the pitch accent on long vowels
text = rsubn(text, "()ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
--allophones
text = con_assimilation(text)
text = rsubn(text, "((%" .. dental .. "?))#", "%1" .. devoice .. "#")
--Terminal voicing in not phonetic
text = rsubn(text, "()#", "%1" .. devoice .. "#")
-- Replace consonants
text = rsubn(text, ".", dari_persian_consonants)
text = rsubn(text, "f((%.?))", "v%1")
text = rsubn(text, ".", common_consonants)
text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
text = rsubn(text, "#ɾ", "#r")
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
function export.tg_IPA(text)
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "%-i#", "i#")
-- Iranian and Classical dictionaries list -iyy,
-- But Tajik and Dari ones dont
text = rsubn(text, "(y(%`?))(%.?)y", "i%2y")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
text = rsubn(text, "v", "w")
-- Replace xwa with xu
text = rsubn(text, "xwa", "xu")
-- Replace xwā with xā
text = rsubn(text, "xwā", "xā")
-- Replace xwē with xē
text = rsubn(text, "xwē", "xē")
text = rsubn(text, "xwē", "xē")
--automatically denote syllables
text = syllabify(text)
--pitch accent mark
text = rsubn(text, "`()()", "%1%2" .. pitchaccent .. "")
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
text = rsubn(text, "w()", "v%1")
-- universal aspiration
text = rsubn(text, "()()", "%1ʰ%2")
-- Replace ih, īh, i\', ī\' by ēh, ē\'
text = rsubn(text, "()(%" .. pitchaccent .. "?)()()", "e%2%3%4")
-- Replace uh, ūh, u\', ū\' by ɵh, ɵ\'
text = rsubn(text, "()(%" .. pitchaccent .. "?)()()", "ɵ%2%3%4")
--aspiration
text = rsubn(text, "((%" .. dental .. "?))()", "%1ʰ%3")
-- Replace short vowels
text = rsubn(text, ".", tajik_short_vowels)
-- Replace long vowels
text = rsubn(text, ".", tajik_long_vowels)
--allophones
text = con_assimilation(text)
-- Replace consonants
text = rsubn(text, ".", common_consonants)
text = rsubn(text, ".", tajik_consonants)
text = rsubn(text, "()(" .. pitchaccent .. ")", "æ%2")
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
function export.fa_cls_IPA(text)
text = rsubn(text, "ˈ", "`")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "((%`?))(%.?)y", "i%2y")
text = rsubn(text, "%-i#", "i#")
text = rsubn(text, "()", "%1")
text = rsubn(text, "", ".")
text = rsubn(text, "v", "w")
-- Replace xwa with xʷa
text = rsubn(text, "xwa", "xʷa")
-- Replace xwā with xʷā
text = rsubn(text, "xwā", "xʷā")
-- Replace xwē with xʷē
text = rsubn(text, "xwē", "xʷē")
--automatically denote syllables
text = syllabify(text)
-- Replace short vowels
text = rsubn(text, ".", classical_persian_short_vowels)
-- Replace d with ḏ after vowels
text = rsubn(text, "(+.?)(%`?)()", "%1%2ḏ")
text = rsubn(text, "(+.?)(%`?)()", "%1%2β")
-- Replace long vowels
text = rsubn(text, ".", classical_persian_long_vowels)
-- Replace jj with dj
text = rsubn(text, "jj", "dj")
-- Replace čč with tč
text = rsubn(text, "čč", "tč")
-- Replace consonants
text = rsubn(text, ".", common_consonants)
text = rsubn(text, ".", classical_persian_consonants)
text = rsubn(text, "#", "")
text = toNFC(text)
return text
end
-- ROMANIZATIONS
function export.romanize_fa_cls(text, script, options)
if type(text) == "table" then
options = {}
text, script = text.args, text.args
end
text = rsubn(text, "`", "")
text = rsubn(text, "ˈ", "")
text = rsubn(text, "", ", ")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "()(%.?)y", "iy")
--kill incorrect characters
text = rsubn(text, "()", "")
text = rsubn(text, "()", "n")
--remove v
text = rsubn(text, "v", "w")
--ensure vowels are paired to a consonant
text = rsubn(text, "()()", "%1'%2")
text = rsubn(text, "()", "")
text = rsubn(text, "()()", "%1ḏ")
text = rsubn(text, "((%-?))b", "%1ḇ")
text = rsubn(text, "ḏ", "ḏ")
text = rsubn(text, "ḏd", "ḏḏ")
text = rsubn(text, "ḇb", "ḇḇ")
text = rsubn(text, "G", "ğ")
text = rsubn(text, "ḍ", "z")
text = rsubn(text, "ṭ", "t")
text = rsubn(text, "ṯ", "s")
text = rsubn(text, "ṣ", "s")
text = rsubn(text, "ḥ", "h")
-- remove Hazaragi retroflexes
text = rsubn(text, "D", "d")
text = rsubn(text, "T", "t")
text = rsubn(text, "ɖ", "d")
text = rsubn(text, "ʈ", "t")
text = rsubn(text, "#'", "#")
text = rsubn(text, "#", "")
return text
end
function export.romanize_prs(text, script, options)
if type(text) == "table" then
options = {}
text, script = text.args, text.args
end
text = rsubn(text, "`", "")
text = rsubn(text, "ˈ", "")
text = rsubn(text, "", ", ")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "((%.?))y", "i%2y")
text = rsubn(text, "(y(%.?))y", "i%2y")
text = rsubn(text, "i()", "e%1")
text = rsubn(text, "u()", "o%1")
--kill incorrect characters
text = rsubn(text, "()", "")
text = rsubn(text, "()", "n")
text = rsubn(text, "v", "w")
-- Replace xw clusters
text = rsubn(text, "xw()", "x%1")
text = rsubn(text, "xwa", "xu")
-- for rare exceptions
text = rsubn(text, "ʷ", "w")
--ensure vowels are paired to a consonant
text = rsubn(text, "()()", "%1'%2")
text = rsubn(text, "()", "")
text = rsubn(text, "ḍ", "z")
text = rsubn(text, "ḏ", "z")
text = rsubn(text, "ṯ", "s")
text = rsubn(text, "ṭ", "t")
text = rsubn(text, "G", "ğ")
text = rsubn(text, "ṣ", "s")
text = rsubn(text, "ḥ", "h")
-- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES
-- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY
text = rsubn(text, "D", "ḍ")
text = rsubn(text, "T", "ṭ")
text = rsubn(text, "ɖ", "ḍ")
text = rsubn(text, "ʈ", "ṭ")
text = rsubn(text, "#'", "#")
-- remove unnecessary marks
text = rsubn(text, "#", "")
return text
end
function export.romanize_ira(text, script, options)
if type(text) == "table" then
options = {}
text, script = text.args, text.args
end
text = rsubn(text, "`", "")
text = rsubn(text, "ˈ", "")
text = rsubn(text, "", ", ")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
--kill incorrect characters
text = rsubn(text, "()", "")
text = rsubn(text, "()", "n")
text = rsubn(text, "v", "w")
-- Replace xw clusters
text = rsubn(text, "xw()", "x%1")
text = rsubn(text, "xwa", "xu")
text = rsubn(text, "ʷ", "")
text = rsubn(text, "w(" .. vowel .. ")", "v%1")
text = rsubn(text, "w(" .. consonant .. ")", "w%1")
text = rsubn(text, "()w", "%1v")
text = rsubn(text, "v%(w", "v(v")
text = rsubn(text, "(" .. consonant .. ")w#", "%1v#")
text = rsubn(text, "wv", "vv")
text = rsubn(text, "wæ", "væ")
--ensure vowels are paired to a consonant
text = rsubn(text, "()()", "%1'%2")
text = rsubn(text, "()", "")
text = rsubn(text, "iy", "īy")
text = rsubn(text, "ay", "ey")
text = rsubn(text, "aw", "ow")
text = rsubn(text, "ḍ", "z")
text = rsubn(text, "ḏ", "z")
text = rsubn(text, "ṭ", "t")
text = rsubn(text, "G", "ğ")
text = rsubn(text, "q", "ğ")
text = rsubn(text, "ṯ", "s")
text = rsubn(text, "ṣ", "s")
text = rsubn(text, "ḥ", "h")
text = rsubn(text, "ā", "â")
text = rsubn(text, "u", "o")
text = rsubn(text, "i", "e")
-- remove Hazaragi retroflexes
text = rsubn(text, "D", "d")
text = rsubn(text, "T", "t")
text = rsubn(text, "ɖ", "d")
text = rsubn(text, "ʈ", "t")
-- Tajik does not have vowel length
text = rsubn(text, "()", "i")
text = rsubn(text, "()", "u")
-- terminal w is only possible in a dipthong
text = rsubn(text, "(0)w#", "v#")
text = rsubn(text, "a#", "e#")
text = rsubn(text, "a%-", "e-")
text = rsubn(text, "æ", "a")
text = rsubn(text, "#'", "#")
text = rsubn(text, "#", "")
return text
end
function export.romanize_tg(text, script, options)
if type(text) == "table" then
options = {}
text, script = text.args, text.args
end
text = rsubn(text, "", ", ")
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "i()", "ē%1")
text = rsubn(text, "u()", "ō%1")
text = rsubn(text, "(y(%.?))y", "i%2y")
text = rsubn(text, "ˈ", "`")
text = rsubn(text, "(y`y)", "i`y")
--kill incorrect characters
text = rsubn(text, "()", "")
text = rsubn(text, "()", "n")
text = rsubn(text, "w", "v")
-- Replace xw clusters
text = rsubn(text, "xv()", "x%1")
text = rsubn(text, "xva", "xu")
text = rsubn(text, "ʷ", "")
--ensure vowels are paired to a consonant
text = rsubn(text, "()()", "%1'%2")
text = rsubn(text, "()", "")
text = rsubn(text, "ḍ", "z")
text = rsubn(text, "ḏ", "z")
text = rsubn(text, "ṯ", "s")
text = rsubn(text, "ṭ", "t")
text = rsubn(text, "()", "ʾ")
text = rsubn(text, "ṣ", "s")
text = rsubn(text, "ḥ", "h")
text = rsubn(text, "G", "ġ")
text = rsubn(text, "ğ", "ġ")
text = rsubn(text, "ē", "e")
text = rsubn(text, "ō", "ü")
text = rsubn(text, "ā", "o")
-- remove Hazaragi retroflexes
text = rsubn(text, "D", "d")
text = rsubn(text, "T", "t")
text = rsubn(text, "ɖ", "d")
text = rsubn(text, "ʈ", "t")
-- Tajik does not have vowel length
text = rsubn(text, "()", "i")
text = rsubn(text, "()", "u")
text = rsubn(text, "`", "")
text = rsubn(text, "#()", "")
text = rsubn(text, "#", "")
return text
end
local function one_term_ipa(text, style)
if style == "cls" then
text = export.fa_cls_IPA(text)
elseif style == "prs" then
text = export.prs_IPA(text)
elseif style == "kbl" then
text = export.prs_kbl_IPA(text) or export.prs_IPA(text) --should ignore conversion if specified
elseif style == "haz" then
text = export.prs_haz_IPA(text)
elseif style == "fa" then
text = export.fa_IPA(text)
elseif style == "teh" then
text = export.fa_IPA(text)
elseif style == "tg" then
text = export.tg_IPA(text)
end
return text
end
-- style == one of the following:
-- "cls": Classical Persian
-- "prs": Dari Persian
-- "kbl": Kabuli
-- "haz": Hazaragi
-- "fa": Iranian Persian
-- "teh": Tehrani
-- "tg": Tajik
function export.IPA(text, style)
local variants = {text}
local function apply_sub(from, to1, to2)
return function(item)
if rfind(item, from) then
if to2 then
return {rsub(item, from, to1), rsub(item, from, to2)}
else
return {rsub(item, from, to1)}
end
else
return {item}
end
end
end
local function call_one_term_ipa(variant)
local result = {
{
phonemic = one_term_ipa(variant, style, false, err)
}
}
local function apply_sub(item, from, to1, qual1, to2, qual2)
if rfind(item.phonemic, from) or rfind(item.phonetic, from) then
return {
{
phonemic = rsub(item.phonemic, from, to1),
qualifiers = qual1
},
{
phonemic = rsub(item.phonemic, from, to2),
qualifiers = qual2
}
}
else
return {item}
end
end
return result
end
return flatmap(variants, call_one_term_ipa)
end
function export.express_styles(inputs, args_style)
local pronuns_by_style = {}
local expressed_styles = {}
local function dostyle(style)
pronuns_by_style = {}
for _, val in ipairs(inputs) do
local pronuns = export.IPA(val, style)
for _, pronun in ipairs(pronuns) do
table.insert(pronuns_by_style, pronun)
end
end
end
local function all_available(styles)
local available_styles = {}
for _, style in ipairs(styles) do
if pronuns_by_style then
table.insert(available_styles, style)
end
end
return available_styles
end
local function express_style(hidden_tag, tag, styles, indent)
indent = indent or 1
if hidden_tag == true then
hidden_tag = tag
end
if type(styles) == "string" then
styles = {styles}
end
styles = all_available(styles)
if #styles == 0 then
return
end
local style = styles
-- If style specified, make sure it matches the requested style.
local style_matches
if not args_style then
style_matches = true
else
local or_styles = rsplit(args_style, "%s*,%s*")
for _, or_style in ipairs(or_styles) do
local and_styles = rsplit(or_style, "%s*%+%s*")
local and_matches = true
for _, and_style in ipairs(and_styles) do
local negate
if and_style:find("^%-") then
and_style = and_style:gsub("^%-", "")
negate = true
end
local this_style_matches = false
for _, part in ipairs(styles) do
if part == and_style then
this_style_matches = true
break
end
end
if negate then
this_style_matches = not this_style_matches
end
if not this_style_matches then
and_matches = false
end
end
if and_matches then
style_matches = true
break
end
end
end
if not style_matches then
return
end
local new_style = {
tag = tag,
represented_styles = styles,
pronuns = pronuns_by_style,
indent = indent
}
for _, hidden_tag_style in ipairs(expressed_styles) do
if hidden_tag_style.tag == hidden_tag then
table.insert(hidden_tag_style.styles, new_style)
return
end
end
table.insert(
expressed_styles,
{
tag = hidden_tag,
styles = {new_style}
}
)
end
for style, _ in pairs(inputs) do
dostyle(style)
end
local function diff(style1, style2)
if not pronuns_by_style or not pronuns_by_style then
return true
end
return not m_table.deepEquals(pronuns_by_style, pronuns_by_style)
end
local fa_teh_different = diff("fa", "teh")
local prs_kbl_different = diff("prs", "kbl")
local prs_haz_different = diff("prs", "haz")
-- Classical Persian
express_style("]", "]", "cls")
-- Dari Persian
express_style(
"]",
"]",
"prs"
)
express_style("]", "]", "kbl", 2)
express_style("]", "]", "haz", 2)
-- Iranian Persian
express_style(
"]",
"]",
"fa"
)
if fa_teh_different then
express_style("]", "]", "teh", 2)
end
-- Tajik
express_style(
"]",
"]",
"tg"
)
return expressed_styles
end
function export.show(frame)
-- Create parameter specs
local params = {
= {}, -- this replaces style group 'all'
= {},
= {},
= {},
= {},
= {type = "number", default = 1}
}
for group, _ in pairs(export.all_style_groups) do
if group ~= "all" then
params = {}
end
end
for _, style in ipairs(export.all_styles) do
params = {}
end
-- Parse arguments
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
-- Set inputs
local inputs = {}
-- If 1= specified, do all styles.
if args then
for _, style in ipairs(export.all_styles) do
inputs = args
end
end
-- Then do remaining style groups other than 'all', overriding 1= if given.
for group, styles in pairs(export.all_style_groups) do
if group ~= "all" and args then
for _, style in ipairs(styles) do
inputs = args
end
end
end
-- Then do individual style settings.
for _, style in ipairs(export.all_styles) do
if args then
inputs = args
end
end
-- If no inputs given, set all styles based on current pagename.
if not next(inputs) then
local text = mw.title.getCurrentTitle().text
for _, style in ipairs(export.all_styles) do
inputs = text
end
end
for style, input in pairs(inputs) do
inputs = rsplit(input, ",")
end
local expressed_styles = export.express_styles(inputs, args.style)
local lines = {}
local function format_style(tag, expressed_style, is_first)
local pronunciations = {}
local formatted_pronuns = {}
for _, pronun in ipairs(expressed_style.pronuns) do
table.insert(
pronunciations,
{
pron = "",
qualifiers = pronun.qualifiers
}
)
local formatted_phonemic = ""
if pronun.qualifiers then
formatted_phonemic = "(" .. table.concat(pronun.qualifiers, ", ") .. ") " .. formatted_phonemic
end
table.insert(formatted_pronuns, formatted_phonemic)
end
-- Number of bullets: When indent = 1, we want the number of bullets given by `args.bullets`,
-- and when indent = 2, we want `args.bullets + 1`, hence we subtract 1.
local bullet = string.rep("*", args.bullets + expressed_style.indent - 1) .. " "
-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML
-- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing
-- the toggle box with the "more" button on the right.
local pre = is_first and args.pre and args.pre .. " " or ""
local pre_for_len = pre .. (tag and "(" .. tag .. ") " or "")
pre = pre .. (tag and m_qual.format_qualifier(tag) .. " " or "")
local post = is_first and (args.ref or "") .. (args.post and " " .. args.post or "") or ""
local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations } .. post
local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns, ", ") .. post
return formatted, formatted_for_len
end
for i, style_group in ipairs(expressed_styles) do
if #style_group.styles == 1 then
style_group.formatted, style_group.formatted_for_len =
format_style(style_group.styles.tag, style_group.styles, i == 1)
else
style_group.formatted, style_group.formatted_for_len = format_style(style_group.tag, style_group.styles, i == 1)
for j, style in ipairs(style_group.styles) do
style.formatted, style.formatted_for_len = format_style(style.tag, style, i == 1 and j == 1)
end
end
end
local function textual_len(text)
text = rsub(text, "<.->", "")
return ulen(text)
end
local maxlen = 0
for i, style_group in ipairs(expressed_styles) do
local this_len = textual_len(style_group.formatted_for_len)
if #style_group.styles > 1 then
for _, style in ipairs(style_group.styles) do
this_len = math.max(this_len, textual_len(style.formatted_for_len))
end
end
maxlen = math.max(maxlen, this_len)
end
for i, style_group in ipairs(expressed_styles) do
if #style_group.styles == 1 then
table.insert(lines, "<div>\n" .. style_group.formatted .. "</div>")
else
local inline = '\n<div class="vsShow" style="display:none">\n' .. style_group.formatted .. "</div>"
local full_prons = {}
for _, style in ipairs(style_group.styles) do
table.insert(full_prons, style.formatted)
end
local full = '\n<div class="vsHide">\n' .. table.concat(full_prons, "\n") .. "</div>"
local em_length = math.floor(maxlen * 0.68) -- from ]
table.insert(
lines,
'<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' ..
em_length ..
'em; max-width:100%;"><span class="vsToggleElement" style="float: right;"> </span>' ..
inline .. full .. "</div>"
)
end
end
-- major hack to get bullets working on the next line
return table.concat(lines, "\n") .. "\n<span></span>"
end
--Tajik from Cyrillic
local t_consonants = {
= 'ɡ',
= 'd͡ʒ',
= 'ɣ',
= 'j',
= 'ʃ',
= 't͡ʃ',
= 'x',
= 'ʒ',
= 'ʔ'
}
local t_vowels = {
= 'a',
= 'ɵ',
= 'o',
}
function export.tg_phonemic(text)
text = require('Module:languages').getByCode('tg'):transliterate(text)
text = m_str_utils.lower(text)
text = rsubn(text, "`", "ˈ")
text = rsubn(text, "%-", "")
text = rsubn(text, "", " ")
-- Replace vowels
text = rsubn(text, ".", t_vowels)
-- Replace consonants
text = rsubn(text, ".", t_consonants)
return text
end
function export.tg_phonetic(text)
text = require('Module:languages').getByCode('tg'):transliterate(text)
text = m_str_utils.lower(text)
text = rsubn(text, "(i)", "ī")
text = rsubn(text, "(u)", "ū")
text = rsubn(text, "(e)", "ē")
text = rsubn(text, "(ü)", "ō")
text = rsubn(text, "(o)", "ā")
text = rsubn(text, "ʾ", "'")
text = rsubn(text, "%-", "")
text = rsubn(text, "ġ", "ğ")
text = syllabify(text)
text = export.tg_IPA(text)
text = mw.ustring.toNFC(text)
return text
end
function export.show_tg(frame)
local tg = require("Module:languages").getByCode("tg")
local args = frame:getParent().args
local p, results = {}, {}
if args then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
if mw.title.getCurrentTitle().nsText == "Template" then
p = {"чаҳоршанбе"}
else
p = {mw.title.getCurrentTitle().text}
end
end
for _, word in ipairs(p) do
table.insert(results, {pron = "/" .. export.tg_phonemic(word) .. "/"})
table.insert(results, {pron = ""})
end
return "* " .. m_IPA.format_IPA_full { lang = tg, items = results }
end
return export