This is a private module sandbox of Catonif, for their own experimentation. Items in this module may be added and removed at Catonif's discretion; do not rely on this module's stability.
local export = {}
local m_str_utils = require("Module:string utilities")
local links_module = "Module:links"
local u = m_str_utils.char
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
-- Dialectal data later retrieved in the module.
local data
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local OVERTIE = u(0x361) -- COMBINING DOUBLE INVERTED BREVE
--[[
As can be seen from the last lines of the function, this returns a table of transcriptions,
and if do_hyph, also a string being the hyphenation. These are based on a single spelling given,
so the reason why the transcriptions are multiple is only because of the -yka alternating stress
et sim. This only accepts single-word terms. Multiword terms are handled by multiword().
--]]
local function phonemic(txt, do_hyph, lang, is_prep, period, dial)
local ante = 0
local unstressed = is_prep or false
local colloquial = true
function tsub(s, r)
txt, c = rsubn(txt, s, r)
return c > 0
end
function lg(s) return s or s end
function tfind(s) return rfind(txt, s) end
if tfind("") then error("Please replace å with á.") end
-- Save indices of uppercase characters before setting everything lowercase.
local uppercase_indices
if do_hyph then
uppercase_indices = {}
local capitals = (""):format(lg {
pl = "ĄĆĘŁŃÓŚŹŻ" --] .. "ÁÉÔÛÝ" --] .. "ḾṔẂ",
szl = "ÃĆŁŃŌŎÔÕŚŹŻ",
csb = "ÔÒÃËÙÉÓĄŚŁŻŹĆŃ",
slv = "ÃÉËÊÓÕÔÚÙŃŻ",
mas = "ÁÄÉŁŃÓÔŚÛŸŻŹ"
})
if tfind(capitals) then
local i = 1
local str = rsub(txt, "", "")
while rfind(str, capitals, i) do
local r, _ = rfind(str, capitals, i)
table.insert(uppercase_indices, r)
i = r + 1
end
end
if #uppercase_indices == 0 then
uppercase_indices = nil
end
end
txt = ulower(txt)
-- Prevent palatisation of the special case kwazi-.
tsub("^kwazi", "kwaz-i")
-- falling diphthongs <au> and <eu>, and diacriticised variants
tsub(lg { "()u", csb = "()ù" }, "%1U")
-- rising diphthongs with <iV>
local V = lg { pl = "aąeęioóuy" .. "áéôûý", szl = "aãeéioōŏôõuy", csb = "ôòãëùéóąeyuioa", slv = "aãeéëêioóõôuúùyăĭŏŭŭùy̆ā", mas = "aáäeéioóôuûÿ" }
tsub(("()" .. (lang == "slv" and "j" or "i") .. "()"):format(V, V), "%1I%2")
if txt:find("^*") then
-- The symbol <*> before a word indicates it is unstressed.
unstressed = true
txt = txt:sub(2)
elseif txt:find("^%^+") then
-- The symbol <^> before a word indicates it is stressed on the ante-penult,
-- <^^> on the ante-ante-penult, etc.
ante = txt:gsub("(%^).*", "%1"):len()
txt = txt:sub(ante + 1)
elseif txt:find("^%+") then
-- The symbol <+> indicates the word is stressed regularly on the penult. This is useful
-- for avoiding the following checks to come into place.
txt = txt:sub(2)
else
if tfind(".+.+") then
-- Some words endings trigger stress on the ante-penult or ante-ante-penult regularly.
if tfind("liśmy$") or tfind("yśmy$") or tfind("liście$") or tfind("yście$") then
ante = 2
elseif tfind("by?$") and not tfind("ła?by?$") then
ante = 1
colloquial = false
end
end
-- Recognise <-yka> and its declined form and automatically assign it an antepenult stress.
if tfind(".+.+") and dial == nil then
local endings = lg {
{ "k", "ce", "kach", "kom" },
szl = { "k", "ce", "kacj", "kōm", "kach" },
csb = { "k", "ce", "kacj", "kóm", "kama", "kach" },
slv = { "k", "cê", "kacj", "kji", "kóm", "kóma", "kamy", "kach" }
}
for _, v in ipairs(endings) do
if tfind(("%s$"):format(v)) then
ante = 1
break
end
end
end
if dial == "mpl" then
if tfind(".+j.+") then
local endings = { "", "j", "m", "ach" }
for _, v in ipairs(endings) do
if tfind(("j%s$"):format(v)) then
ante = 1
end
end
end
end
end
-- TODO: mpl, csb, szl, slv, mas
if not txt:find("%.") then
-- Don't recognise affixes whenever there's only one vowel (or dipthong).
local _, n_vowels = rsubn(txt, (""):format(V), "")
if n_vowels > 1 then
-- syllabify common prefixes as separate
local prefixes = {
"do", "wy", "za", "aktyno", "akusto", "akwa", "anarcho", "andro", "anemo", "antropo", "arachno", "archeo", "archi", "arcy", "areo", "arytmo", "audio", "awio", "balneo", "biblio", "brachy", "broncho", "ceno", "centro", "centy", "chalko", "chiro", "chloro", "chole", "chondro", "choreo", "chromato", "chrysto", "cyber", "cyklo", "cztero", "ćwierć", "daktylo", "decy", "deka", "dendro", "dermato", "diafano", "dwu", "dynamo", "egzo", "ekstra", "elektro", "encefalo", "endo", "entero", "entomo", "ergo", "erytro", "etno", "farmako", "femto", "ferro", "fizjo", "flebo", "franko", "ftyzjo", "galakto", "galwano", "germano", "geronto", "giganto", "giga", "gineko", "giro", "gliko", "gloso", "glotto", "grafo", "granulo", "grawi", "haplo", "helio", "hemato", "hepta", "hetero", "hiper", "histo", "hydro", "info", "inter", "jedno", "kardio", "kortyko", "kosmo", "krypto", "kseno", "logo", "magneto", "między", "niby", "nie", "nowo", "około", "oksy", "onto", "ornito", "para", "pierwo", "pięcio", "pneumo", "poli", "ponad", "post", "poza", "proto", "pseudo", "psycho", "radio", "samo", "sfigmo", "sklero", "staro", "stereo", "tele", "tetra", "wice", "zoo", "żyro", "ami", "ango", "ant", "a?steno", "lasto", "chroo", "cys?to", "demo", "h?ekto", "eo", "hiro", "kontra?", "mea", "mii", "aro", "rzy", "przed?", "wielk?o", "mi?elo", "eur", "nero", "allo", "astro", "atto", "brio", "heksa", "all?o", "ato", "atro", "br?io", "heksa?", "pato", "ba", "izo", "myzo", "mkro", "mio", "chemo", "gono", "kilo", "lipo", "nano", "kilk", "hem", "home?o", "fio", "mao", "hlo", "hip?o", "oo",
-- <na-, po-, o-, u-> would hit too many false positives
}
for _, v in ipairs(prefixes) do
if tfind("^"..v) then
local _, other_vowels = rsubn(v, (""):format(V), "")
if (n_vowels - other_vowels) > 0 then
tsub(("^(%s)"):format(v), "%1.")
break
end
end
end
if do_hyph then
-- syllabify common suffixes as separate
local suffixes = lg {
pl = {
"nąć",
"tw", "twie", "twm", "twami", "twach",
"dztw", "dztwie", "dztwm", "dztwami", "dztwach",
"dł", "dłm", "dłami", "dłach",
"j", "jom", "jami", "jach",
}, szl = {
"nōńć", "dło",
}, csb = {
"nąc", "dło"
}, slv = {
"nõc", "dlô"
}, mas = {
"nóncz", "dło"
}
}
for _, v in ipairs(suffixes) do
if tsub(("(%s)$"):format(v), ".%1") then break end
end
-- syllabify <istka> as /ist.ka/
if txt:find("st") then
local endings = lg {
{ "k", "ce", "kach", "kom", "kami" },
szl = { "k", "ce", "kami", "kacj", "kacach", "kōma" },
csb = { "k", "ce", "kami", "kacj", "kacach", },
}
for _, v in ipairs(endings) do
if tsub(("()st(%s)$"):format(v), "%1st.%2") then break end
end
end
end
end
end
-- syllabification
for _ = 0, 1 do
tsub(("()(*)()"):format(V, V, V), function (a, b, c)
local function find(x) return rfind(b, x) end
local function is_diagraph(thing)
local r = find(thing:format("z")) or find(thing:format("ch")) or find(thing:format("d"))
if dial == "mpl" then return r or find(thing:format("b́")) end
if lang == "slv" then return r or find(thing:format("gh")) end
if lang == "mas" then return r or find(thing:format("rż")) end
return r
end
if ((ulen(b) < 2) or is_diagraph("^%s$")) then
b = "."..b
else
local i = 2
if is_diagraph("^%s") then i = 3 end
if usub(b, i, i):find("^$") then
b = "."..b
else
b = ("%s.%s"):format(usub(b, 0, i - 1), usub(b, i))
end
end
return ("%s%s%s"):format(a, b, c)
end)
end
local hyph
if do_hyph then
-- Ignore certain symbols and diacritics for the hyphenation.
hyph = txt:gsub("'", "."):gsub("-", "")
if lang == "slv" then
local BREVE = u(0x306)
hyph = rsubn(hyph, "", {
= "j",
= "a", = "i", = "o",
= "u", = "", = "a",
})
end
hyph = hyph:lower()
-- Restore uppercase characters.
if uppercase_indices then
-- str_i loops through all the characters of the string
-- list_i loops as above but doesn't count dots
-- array_i loops through the indices at which the capital letters are
local str_i, list_i, array_i = 1, 1, 1
function h_sub(x, y) return usub(hyph, x, y) end
while array_i <= #uppercase_indices do
if h_sub(str_i, str_i) ~= "." then
if list_i == uppercase_indices then
hyph = ("%s%s%s"):format(h_sub(1,str_i-1), uupper(h_sub(str_i,str_i)), h_sub(str_i+1))
array_i = array_i + 1
end
list_i = list_i + 1
end
str_i = str_i + 1
end
end
end
tsub("'", "ˈ")
-- handle digraphs
tsub("ch", "x")
tsub("z", { ="t_ʂ", ="ʂ" })
tsub(lg { "rz", mas = "rż" }, "R")
tsub("d()", "d_%1")
if dial == "mpl" then tsub("b́", "bʲ") end
if lang == "slv" then tsub("gh", "ɣ") end
-- basic orthographical rules
-- not using lg() here for speed
if lang == "pl" then
local replacements = {
-- vowels
="ɛ", ="ɔ",
="ɔN", ="ɛN",
="u", ="ɘ",
-- consonants
="t_s", ="t_ɕ",
="ɲ", ="ɕ", ="ʑ",
="w", ="v", ="ʐ",
="ɡ", ="x",
}
if dial then
replacements = "e"
replacements = "ɒ"
if dial == "mpl" then
replacements = "ɛ̃"
replacements = "ɔ̃"
replacements = "ɨ"
replacements = "o"
replacements = "ɫ"
replacements = "pʲ"
replacements = "mʲ"
replacements = "vʲ"
-- <b́> has no unicode character and is hence handled above
else
replacements = "wɔ"
replacements = "wu"
replacements = "Y"
if data.lects.mid_o then
replacements = "o"
elseif dial == "ekr" then
replacements = "O"
end
if data.lects.front_y then
replacements = "Y"
end
if data.lects.dark_l then
replacements = "ɫ"
replacements = "lʲ"
end
if data.lects.glottal_h then
replacements = "h"
end
end
end
tsub(".", replacements)
elseif lang == "szl" then
tsub(".", {
-- vowels
="ɛ", ="ɔ",
="o", ="O",
="wɔ", = "ɔ̃",
= "ɪ",
-- consonants
="t_s", ="t_ɕ",
="ɲ", ="ɕ", ="ʑ",
="w", ="v", ="ʐ",
="ɡ", ="x",
})
elseif lang == "csb" then
tsub(".", {
-- vowels
="ɛ", ="e", ="ɔ",
="o", ="ɞ", ="ɜ",
="wɛ", ="wu", = "Y",
= "ɔ̃",
-- consonants
="t_s", ="t_ɕ",
="ɲ", ="ɕ", ="ʑ",
="w", ="v", ="ʒ",
="ɡ", ="x",
})
elseif lang == "slv" then
tsub(".", {
-- vowels
="ɛ", ="e", ="ɔ",
="o", ="ɵ", ="ə", ="E",
="ɪ", ="ʉ", ="y",
="ɔ̃", ="y̆", ="aː", -- ãăĭŏŭ
-- consonants
="t_s",
="n",
="v", ="ʒ",
="ɡ", ="x",
})
elseif lang == "mas" then
tsub(".", {
-- vowels
="ɒ", ="æ",
="ɛ", ="e",
="ɔ", ="o",
="wɔ", ="Y",
="Y", ="wu",
-- consonants
="t_s",
="ɲ", ="ʃ",
="w", ="v", ="ʒ",
="ɡ", ="x", ="ʑ",
})
end
if lang == "csb" or lang == "slv" or lang == "mas" then
tsub("ʂ", "ʃ")
tsub("ʐ", "ʒ")
end
-- palatalisation
local palatise_into = { = "ɲ", = "ɕ", = "ʑ" }
tsub("()I", function (c) return palatise_into end)
tsub("()i", function (c) return palatise_into .. "i" end)
-- voicing and devoicing
local T = "pftsʂɕkxʃx"
local D = "bdzʐʑɡʒɣ"
tsub(("(?)v"):format(T), "%1f")
tsub(("(?)R"):format(T), "%1S")
if lang == "slv" then
tsub(("(?)ɣ"):format(T), "%1x")
tsub(("(?)x"):format(D), "%1ɣ")
end
local function arr_list(x)
local r = ""
for i in pairs(x) do
r = r .. i
end
return r
end
local devoice = {
= "p",
= "t",
= "k",
= "s",
= "f",
= "ɕ",
= "ʂ",
= "ʃ",
= "S",
}
local trilled_rz = lang == "csb" or lang == "slv" or lang == "mas"
if not trilled_rz and dial then
trilled_rz = data.lects.trilled_rz
end
if trilled_rz then
devoice = nil
end
if lang == "slv" then
devoice = "x"
end
local mpl_J = dial == "mpl" and "ʲ?" or ""
local arr_list_devoice = arr_list(devoice)
if not is_prep then
tsub(("()(%s)$"):format(arr_list_devoice, mpl_J), function (a, b)
return devoice .. (type(b) == "string" and b or "")
end)
end
tsub("Y", "i")
if lang == "csb" then
tsub("()o", "%1wo")
tsub("vw", "w")
end
if lang == "slv" then
local V = "aɛeɔ̃oɵəEɪiʉuyã"
tsub("nj$", "n")
tsub("nj()", "n%1")
tsub("ɲ()", "nj%1")
tsub("ɛ$", "ə")
end
if trilled_rz then
tsub("R", "r̝")
end
if dial ~= "mpl" then
tsub("S", "ʂ")
tsub("R", "ʐ")
end
local voice = {}
for i, v in pairs(devoice) do
voice = i
end
local new_text
local devoice_string = ("()(%s?)"):format(arr_list_devoice, mpl_J, T)
local voice_string = ("()(%s?)"):format(arr_list(voice), mpl_J, D)
local function devoice_func(a, b) return devoice .. b end
local function voice_func(a, b) return voice .. b end
while txt ~= new_txt do
new_txt = txt
tsub(devoice_string, devoice_func)
tsub(voice_string, voice_func)
end
if lang == "pl" then
-- nasal vowels
tsub("N(?)", "m%1")
tsub("N(?)", "ɲ%1")
tsub("N(?_)", "ɲ%1")
tsub("N(?)", "n%1")
tsub("N(?)", "%1")
tsub("ɛN$", "ɛ")
tsub("N", "w̃")
end
-- Hyphen separator, e.g. to prevent palatisation of <kwazi->.
tsub("-", "")
tsub("_", OVERTIE)
tsub("I", "j")
tsub("U", "w")
-- Handles stress.
local function add_stress(stressed_syllable, force_initial_stress)
local stressed_txt
if force_initial_stress or (dial and data.lects.initial_stress) then
-- Deals with initially stressed dialects.
stressed_txt = "ˈ" .. txt
else
-- Accent elsewhere, usually ante-penult although can vary depending on
-- the <stressed_syllable> value, counting backwards.
local regex = ""
for _ = 0, stressed_syllable do
regex = regex .. "+%."
end
stressed_txt = rsub(txt, "%.(" .. regex .. "+)$", "ˈ%1")
-- If no stress mark could have been placed, it can only be initial,
-- e.g. in monosyllables.
if not rfind(stressed_txt, "ˈ") then
stressed_txt = "ˈ" .. stressed_txt
end
end
-- Finally strip away syllable separation marks.
return (stressed_txt:gsub("%.", ""))
end
local should_stress = not (unstressed or txt:find("ˈ"))
local prons = should_stress and add_stress(ante) or (txt:gsub("%.", ""))
if is_prep then
prons = prons .. "$"
end
if lang == "pl" then
if dial then
if dial == "ekr" then
if tfind("O") then
prons = { (prons:gsub("O", "o")), (prons:gsub("O", "u")) }
end
elseif dial == "ora" or dial == "zag" then
local stressed_initially = add_stress(0, true)
if stressed_initially ~= prons then
prons = dial == "ora" and { stressed_initially, prons } or { prons, stressed_initially }
end
elseif dial == "mpl" then
if tfind("") then
local mp_early = prons:gsub("", "r̝")
local mp_late = prons:gsub("R", "ʐ"):gsub("S", "ʂ")
if period == "early" then
prons = mp_early
elseif period == "late" then
prons = mp_late
elseif not period then
prons = {
mp_early, mp_late,
}
else
error(("'%s' is not a supported Middle Polish period, try with 'early' or 'late'."):format(period))
end
end
end
else
if should_stress and ante > 0 and colloquial then
local stressed_antepenult = add_stress(0)
if stressed_antepenult ~= prons then
prons = { prons, stressed_antepenult }
end
end
end
elseif lang == "szl" then
if tfind("O") then
prons = {
(prons:gsub("O", "ɔ")),
(prons:gsub("O", "ɔw")),
(prons:gsub("O", "ɛw")),
}
end
elseif lang == "slv" then
if tfind("E") then
local V = "aɛeɔ̃oɵəEɪiʉuyã"
prons = prons:gsub("ˈ(*)E", "ˈ%1i̯ɛ")
prons = prons:gsub("E$", "ə")
prons = prons:gsub("E", "ɛ")
end
end
if do_hyph then
return prons, hyph
else
return prons
end
end
-- TODO: This might slow things down if used too much?
local function table_insert_if_absent(t, s)
for _, v in ipairs(t) do
if v == s then return end
end
table.insert(t, s)
end
-- Returns rhyme from a transcription.
local function do_rhyme(pron, lang)
local V = ({ pl = "aɛiɔuɘ", szl = "aãɛiɔouɪ", csb = "aãɛeɜiɔoõɞu", slv = "aãɛeĭɪŏɔɵŭʉy", mas = "aɒæɛeiɔou"})
return {
rhyme = rsub(rsub(rsub(pron, "^.*ˈ", ""), ("^-()"):format(V, V), "%1"), "%.", ""),
num_syl = { select(2, rsubn(pron, (""):format(V), "")) }
}
end
--[[
Handles a single input, returning a table of transcriptions. Returns also a string of
hyphenation and a table of rhymes if it is a single-word term.
--]]
local function multiword(term, lang, period, dial)
if term:find("^%$") then
return { phonetic = term }
elseif term:find(" ") then
-- TODO: repeated
function lg(s)
return s or s
end
local prepositions = lg {
{
"beze?", "na", "dla", "do", "ku",
"nade?", "o", "ode?", "po", "pode?", "przede?",
"przeze?", "przy", "spode?", "u", "we?",
"z?", "znade?", "zza",
}, szl = {
"bezy?", "na", "dlŏ", "d", "ku",
"nady?", "ô", "ôdy?", "po", "pody?", "przedy?",
"przezy?", "przi", "spody?", "u", "w?",
"z?", "", "znady?"
}, csb = {
"beze?", "na", "dlô", "do", "kù",
"nade?", "ò", "òde?", "pò", "pòde?", "przede?",
"przeze?", "przë", "spòde?", "ù", "we?", "wew",
"z?", "zez", "zeza", "zó", "znade?"
}, slv = {
"dlo", "dô", "na", "nade?", "przêde?", "przêze?",
"przë", "pô", "pôde?", "sê?", "vô", "we?", "wôde?",
"wù", "za"
}, mas = {
"dlá", "do", "ku", "na", "nade?", "po", "pode?",
"ponade?", "poza", "prżede?", "prżeze", "prżi",
"we?", "ze?", "za", "ô", "ôde?", "û", "beze?"
}
}
local p
local contains_preps = false
for word in term:gmatch("+") do
local is_prep = false
for _, prep in ipairs(prepositions) do
if (rfind(word, ("^%s$"):format(prep))) then
is_prep = true
contains_preps = true
break
end
end
local v = phonemic(word, false, lang, is_prep, period, dial)
local sep = "%s %s"
if p == nil then
p = v
elseif type(p) == "string" then
if type(v) == "string" then
p = sep:format(p, v)
else
p = { sep:format(p, v), sep:format(p, v) }
end
else
if type(v) == "string" then
p = { sep:format(p, v), sep:format(p, v) }
else
p = { sep:format(p, v), sep:format(p, v) }
end
end
end
local function assimilate_preps(str)
local function assim(from, to, before)
str = rsub(str, ("%s(%%$ ˈ?)"):format(from, before), to.."%1")
end
local T = "ptsʂɕkx"
assim("d", "t", T)
assim("v", "f", T)
assim("z", "s", T)
if lang == "szl" then
local D = "bdzʐʑɡ"
assim("s", "z", D)
assim("ɕ", "ʑ", D)
end
return rsub(str, "%$", "")
end
if contains_preps then
if type(p) == "string" then
p = assimilate_preps(p)
else
p = assimilate_preps(p)
p = assimilate_preps(p)
end
end
return p
else
return phonemic(term, dial ~= "mpl", lang, false, period, dial)
end
end
-- This handles all the magic characters <*>, <^>, <+>, <.>, <#>.
local function normalise_input(term, title)
local function check_af(str, af, reg, repl, err_msg)
reg = reg:format(af)
if not rfind(str, reg) then
error(("the word does not %s with %s!"):format(err_msg, af))
end
return str:gsub(reg, repl)
end
local function check_pref(str, pref) return check_af(str, pref, "^(%s)", "%1.", "start") end
local function check_suf(str, suf) return check_af(str, suf, "(%s)$", ".%1", "end") end
if term == "#" then
-- The diesis stands simply for {{PAGENAME}}.
return title
elseif (term == "+") or term:find("^%^+$") or (term == "*") then
-- Inputs that are just '+', '*', '^', '^^', etc. are treated as
-- if they contained the title with those symbols preceding it.
return term .. title
-- Handle syntax like <po.>, <.ka> and <po..ka>. This allows to not respell
-- the entire word when all is needed is to specify syllabification of a prefix
-- and/or a suffix.
elseif term:find(".+%.$") then
return check_pref(title, term:sub(1, -2))
elseif term:find("^%..+") then
return check_suf(title, term:sub(2))
elseif term:find(".+%.%..+") then
return check_suf(check_pref(title, term:gsub("%.%..+", "")), term:gsub(".+%.%.", ""))
end
return term
end
-- This converts the raw information, the arguments and page title, into
-- tables to be handed over to the IPA module.
local function get_lect_line(lang_code, page_title, args_terms, args_quals, args_refs, args_period, dial_code)
if #args_terms == 1 and args_terms == "-" then
return nil, {}, {}, false
end
local pron_list = {{}}
local rhyme_list = {{}}
local hyph_list = {}
local do_hyph = false
local brackets = "/%s/"
if dial_code then
if data.lects.phonetic then
brackets = ""
end
end
-- Loops over the terms given as arguments.
for arg_index, arg_term in ipairs(args_terms) do
-- Handles magic symbols in the input.
arg_term = normalise_input(arg_term, page_title)
-- Obtains the transcription and hyphenation for the current index.
local prons, hyph = multiword(arg_term, lang_code, args_period, dial_code)
-- Obtains the possible qualifiers of the current index, separated by semicolon.
local qualifiers = {}
if args_quals then
for qual in args_quals:gmatch("+") do
table.insert(qualifiers, qual)
end
end
-- Return a single phonemic transcription with qualifiers and references
-- attached to it. An additional qualifier may be specified, which is for the
-- regular oscillations (e.g. Middle Polish <rz>, etc.). The references can
-- be omitted (if it's the second transcription of a regular oscillation).
local function new_pron(pron, additional_qualifier, dont_refs)
local ret = {
pron = brackets:format(pron),
qualifiers = qualifiers,
refs = not dont_refs and {args_refs},
}
if additional_qualifier then
local new_qualifiers = {unpack(qualifiers)}
table.insert(new_qualifiers, additional_qualifier)
ret.qualifiers = new_qualifiers
end
return ret
end
-- If the <prons> variable is a string it means only one transcription
-- was given.
if type(prons) == "string" then
table.insert(pron_list, new_pron(prons))
table.insert(rhyme_list, do_rhyme(prons, lang_code))
-- If the <pron> variable is a table and has a <phonetic> value, then simply return that.
elseif prons.phonetic then
table.insert(pron_list, {
pron = prons.phonetic,
qualifiers = qualifiers,
refs = {args_refs},
})
-- If the <prons> variably is a table and does not have a <phonetic> value, it is
-- a list of transcriptions.
else
local multiple_transcript = ({
pl = { "prescribed", "casual" },
szl = { nil, "Western", "Głogówek"},
})
if lang_code == "pl" and dial_code then
multiple_transcript = ({
mpl = { "16<sup>th</sup> c.", "17<sup>th</sup>–18<sup>th</sup> c." },
ekr = { "pre-21<sup>st</sup> c.", "21<sup>st</sup> c."},
ora = { "Poland", "Slovakia" },
zag = { "north", "south" },
})
end
for i, v in ipairs(prons) do
if #pron_list < (i + 1) then pron_list = {} end
table.insert(pron_list, new_pron(v, multiple_transcript, i ~= 1))
if #rhyme_list < (i + 1) then rhyme_list = {} end
table.insert(rhyme_list, do_rhyme(v, lang_code))
end
end
-- If a hyphenation value had been returned by the <multiword> function, it means
-- that in any case a hyphenation is required (i.e. it is not a multiword term nor is
-- the hyphenation manually turned off, etc.). If the hyphenation value acquired however
-- does not match the page title, it is not added to the table.
if hyph then
do_hyph = true
if hyph:gsub("%.", "") == page_title then
table_insert_if_absent(hyph_list, hyph)
end
end
end
-- TODO: looks rather slow.
local function merge_subtables(t)
if #t == 1 then
return t
end
local r = {}
for _, subtable in ipairs(t) do
for _, value in ipairs(subtable) do
table.insert(r, value)
end
end
return r
end
pron_list = merge_subtables(pron_list)
rhyme_list = merge_subtables(rhyme_list)
return pron_list, hyph_list, rhyme_list, do_hyph
end
-- This is the function used by the template {{zlw-mpl-IPA}}.
-- TODO: the template should be deprecated in favour of the new pl-pr template.
function export.mpl_IPA(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
= { list = true },
= { list = true, allow_holes = true },
= { list = true, allow_holes = true, alias_of = "qual" },
= {},
= { list = true, allow_holes = true },
= {}, -- for debugging or demonstration only
})
local terms = args
if #terms == 0 then
terms = { "#" }
end
local lang = require("Module:languages").getByCode("pl")
return ("* %s %s"):format(
require("Module:accent qualifier").format_qualifiers(lang, {"Middle Polish"}),
require("Module:IPA").format_IPA_full {
lang = lang,
items = (get_lect_line(
"pl",
args.title or mw.title.getCurrentTitle().text,
terms,
args.qual,
args.ref,
args.period,
"mpl"
)),
}
)
end
-- Remove any HTML from the formatted text and resolve links, since the extra characters don't contribute to the
-- displayed length.
local function convert_to_raw_text(text)
text = rsub(text, "<.->", "")
if text:find("%[%[") then
text = require(links_module).remove_links(text)
end
return text
end
-- Return the approximate displayed length in characters.
local function textual_len(text)
return ulen(convert_to_raw_text(text))
end
function export.IPA(frame)
local arg_lang = frame.args.lang
if arg_lang == "pl" then
data = require("Module:User:Catonif/zlw-lch-IPA/data/pl")
-- TODO: remember to remove User:Catonif
end
local process_args = {
= { list = true },
= { list = true, allow_holes = true },
= { list = true, allow_holes = true, alias_of = "qual" },
= {}, = { alias_of = "hyphs" },
= {}, = { alias_of = "rhymes" },
= {}, = { alias_of = "audios" },
= {}, = { alias_of = "homophones" },
= { list = true, allow_holes = true },
= {}, -- for debugging or demonstration only
}
if arg_lang == "pl" then
process_args = {}
process_args = { alias_of = "mpl_period" }
for lect_code, _ in pairs(data.lects) do
process_args = {list = true}
process_args = { list = true, allow_holes = true }
process_args = { list = true, allow_holes = true, alias_of = lect_code .. "_qual" }
process_args = { list = true, allow_holes = true }
end
for alias_code, lect_code in pairs(data.lect_aliases) do
process_args = { list = true, alias_of = lect_code }
process_args = { list = true, allow_holes = true, alias_of = lect_code .. "_qual" }
process_args = { list = true, allow_holes = true, alias_of = lect_code .. "_qual" }
process_args = { list = true, allow_holes = true, alias_of = lect_code .. "_ref" }
end
end
local args = require("Module:parameters").process(frame:getParent().args, process_args)
local lang = require("Module:languages").getByCode((arg_lang == "slv" or arg_lang == "mas") and ("zlw-" .. arg_lang) or arg_lang)
local terms = args
local title = args.title or mw.title.getCurrentTitle().text
if #terms == 0 then
terms = { "#" }
end
local pron_list, hyph_list, rhyme_list, do_hyph = get_lect_line(arg_lang, title, terms, args.qual, args.ref)
local pl_lect_prons
if arg_lang == "pl" then
for lect_code, _ in pairs(data.lects) do
if #args > 0 then
if pl_lect_prons == nil then pl_lect_prons = {} end
pl_lect_prons = get_lect_line(
"pl",
title,
args,
args,
args,
args,
lect_code
)
end
end
end
if args.hyphs then
if args.hyphs == "-" then
do_hyph = false
else
hyph_list = {}
for v in args.hyphs:gmatch("+") do
table.insert(hyph_list, v)
end
do_hyph = true
end
end
if args.rhymes then
if args.rhymes == "-" then
rhyme_list = {}
elseif args.rhymes ~= "+" then
rhyme_list = {}
for v in args.rhymes:gmatch("+") do
if rfind(v, ".+/.+") then
table.insert(rhyme_list, {
rhyme = rsub(v, "/.+", ""),
num_syl = { tonumber(rsub(v, ".+/", "")) },
})
else
error(("The manual rhyme %s did not specify syllable number as RHYME/NUM_SYL."):format(v))
end
end
end
end
-- This deals with duplicate values in rhymes.
if #rhyme_list > 0 then
local temp_rhyme_list = {}
local indices = {}
for _, v in ipairs(rhyme_list) do
local index = indices
if index == nil then
table.insert(temp_rhyme_list, v)
indices = #temp_rhyme_list
else
local different_num_syl = true
for _, ns in ipairs(temp_rhyme_list.num_syl) do
if ns == v.num_syl then
different_num_syl = false
break
end
end
if different_num_syl then
table.insert(temp_rhyme_list.num_syl, v.num_syl)
end
end
end
rhyme_list = temp_rhyme_list
end
local m_IPA_format = require("Module:IPA").format_IPA_full
local ret = ""
local do_collapse = false
if pron_list then
if pl_lect_prons then
do_collapse = true
ret = '<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: {width}em; max-width:100%;"><span class="vsToggleElement" style="float: right;"> </span>\n'
end
ret = ret .. "*" .. m_IPA_format { lang = lang, items = pron_list }
end
if pl_lect_prons then
if do_collapse then
ret = ret .. '\n<div class="vsHide">\n'
end
local m_format_qualifiers = require("Module:accent qualifier").format_qualifiers
-- First groups the lects into their dialect groups.
local grouped_lects = {}
for lect_code, lect_prons in pairs(pl_lect_prons) do
local lect_group = data.lects.group
if grouped_lects == nil then grouped_lects = {} end
table.insert(grouped_lects, { code = lect_code, prons = lect_prons })
end
-- And then displays each group in order.
local maxlen = 0
local function display_lect(value, indentation)
local formatted = ("%s%s %s"):format(indentation,
m_format_qualifiers(lang, { data.lects.name }),
m_IPA_format { lang = lang, items = value.prons }
)
maxlen = math.max(maxlen, textual_len(formatted))
ret = ret .. "\n" .. formatted
end
for group_index = 1, #data.lect_groups do
local lects = grouped_lects
local group = data.lect_groups
if lects ~= nil then
if group.single_lect then
display_lect(lects, "*")
else
-- Checks to indent Goral under Lesser Polish.
additional_indent = ""
if group.indent_with_prec then
additional_indent = "*"
if grouped_lects == nil then
ret = ret .. "\n*" .. m_format_qualifiers(lang, { data.lect_groups.name }) .. ":"
end
end
-- Dialect group header.
ret = ret .. "\n*" .. additional_indent ..
m_format_qualifiers(lang, { group.name }) .. ":"
-- The lects are sorted according to their <index> value.
table.sort(lects, function (a, b) return data.lects.index < data.lects.index end)
for _, lect in ipairs(lects) do
display_lect(lect, "**" .. additional_indent)
end
end
end
end
if do_collapse then
ret = ret .. '\n</div></div>\n'
end
local em_length = math.floor(maxlen * 0.68)
ret = m_str_utils.gsub(ret, "{width}", em_length)
end
if args.audios then
local format_audio = require("Module:audio").format_audio
local audio_index = 1
for audio in args.audios:gmatch("+") do
local caption = "Audio " .. audio_index
if audio:find("<+>$") then
caption = caption .. ", ''" ..
(audio:match("<(+)>$"))
:gsub("#", title)
:gsub("~", title .. " się")
.. "''"
audio = (audio:gsub("<+>$", ""))
end
ret = ("%s\n*%s"):format(ret, format_audio {
lang = lang,
file = audio:gsub("#", title),
caption = caption,
})
audio_index = audio_index + 1
end
end
if #rhyme_list > 0 then
ret = ("%s\n*%s"):format(ret, require("Module:rhymes").format_rhymes({ lang = lang, rhymes = rhyme_list }))
end
if do_hyph then
ret = ret .. "\n*"
if #hyph_list > 0 then
local hyphs = {}
for hyph_i, hyph_v in ipairs(hyph_list) do
hyphs = { hyph = {} }
for syl_v in hyph_v:gmatch("+") do
table.insert(hyphs.hyph, syl_v)
end
end
ret = ret..require("Module:hyphenation").format_hyphenations {
lang = lang, hyphs = hyphs, caption = "Syllabification"
}
else
ret = ret.."Syllabification: <small></small>"
if mw.title.getCurrentTitle().nsText == "" then
ret = ("%s]"):format(ret, arg_lang)
end
end
end
if args.homophones then
local homophone_list = {}
for v in args.homophones:gmatch("+") do
if v:find("<.->$") then
table.insert(homophone_list, {
term = v:gsub("<.->$", ""),
qualifiers = { (v:gsub(".+<(.-)>$", "%1")) },
})
else
table.insert(homophone_list, { term = v })
end
end
ret = ("%s\n*%s"):format(ret, require("Module:homophones").format_homophones {
lang = lang,
homophones = homophone_list,
})
end
return ret
end
return export