This is the module that powers {{sw-IPA}}
.
local export = {}
local lang = require("Module:languages").getByCode("sw")
local format_IPA_full = require("Module:IPA").format_IPA_full
local single_letters = {"a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "X", "y", "z", "'", "%.", ":", "%?"}
local complex_graphs = {"ch", "dh", "gh", "kh", "mb", "mv", "nd", "ng", "nj", "ny", "nz", "sh", "th", "_d", "_h", "_w", "_%?"}
local very_complex_graphs = {"nch", "ng'"}
local aliases = { = "unguja", = "kimvita", = "arabic"}
local names = { = "Unguja standard", = "Kimvita", = "unadapted from Arabic"}
local function is_a_vowel(letter)
return letter == "a" or letter == "e" or letter == "i" or letter == "o" or letter == "u"
end
--[==[
takes a string like "lugha ya Kiswahili" and outputs the words like {"lugha", "ya", "Kiswahili"}
a hyphen all at the beginning or all at the end is dropped
]==]
local function split_words(text)
text = string.gsub(text, "^-", "")
text = string.gsub(text, "-$", "")
local ret = {}
local s = 1
while s do
local e = string.find( text, " ", s )
if not e then
ret = string.sub ( text, s )
s = nil
elseif e == s then -- in case there are several spaces in a row
s = s+1
else
ret = string.sub( text, s, e - 1 )
s = e+1
end
end
return ret
end
--[==[
splits a comma-separated list into a table
]==]
function splitcomma(inputstr)
local t = {}
for str in string.gmatch(inputstr, "(+)") do
table.insert(t, str)
end
return t
end
local function next_vowel(graph_table, pos) -- takes a table like {"mv", "i", "t", "_d", "a"} and a position like 3 and identifies the next vowel, here 5; if no more vowels are found, returns the length of the table + 1; if the position given is of a vowel, then that position is returned
while pos <= #graph_table and not is_a_vowel(graph_table) do pos = pos + 1 end
return pos
end
local function has_break(graph_table, first_pos, second_pos) -- takes a table like {"mv", "i", "t", "_d", "a"} and two positions, and checks whether a syllable break (. or ') is present *between* those two positions
if second_pos == first_pos + 1 then
return false
else
for i = first_pos+1, second_pos-1 do
if graph_table == "." or graph_table == "'" then return true end
end
end
return false -- only happens if we didn’t already return something
end
local function last_cons(graph_table, first_pos, second_pos) -- takes a table like {"mv", "i", "t", "_d", "a"} and two positions, and returns the position of the last consonant in between those two positions
local pos = first_pos
if second_pos == first_pos + 1 then
error("error in function last_cons: no consonants found")
else
for i = first_pos + 1, second_pos - 1 do
if string.find(graph_table, "") then else pos = i end
end
end
return pos
end
local function syllabify(graph_table) -- takes a table like {"mv", "i", "t", "_d", "a"} and returns the syllabification {"mv", "i", ".", t", "_d", "a"}
local this_vowel = next_vowel(graph_table, 1)
local subs_vowel = next_vowel(graph_table, this_vowel+1)
while subs_vowel <= #graph_table do
if subs_vowel == this_vowel + 1 then
table.insert(graph_table, subs_vowel, ".")
this_vowel = subs_vowel + 1 -- because a dot was inserted
subs_vowel = next_vowel(graph_table, this_vowel+1)
elseif has_break(graph_table, this_vowel, subs_vowel) then
this_vowel = subs_vowel
subs_vowel = next_vowel(graph_table, this_vowel+1)
else
local dot_pos = last_cons(graph_table, this_vowel, subs_vowel)
table.insert(graph_table, dot_pos, ".")
this_vowel = subs_vowel + 1 -- because a dot was inserted
subs_vowel = next_vowel(graph_table, this_vowel+1)
end
end
return graph_table
end
--[==[
takes a table of phones like {"mv", "i", "t", "_d", "a"} and returns the same with a stress mark added like {"'", mv", "i", "t", "_d", "a"}
if a stress mark was already present, does nothing
if a word is monosyllabic, does nothing
]==]
local function add_stress(head)
local has_stress = false
local i = 1
while not has_stress and head do
has_stress = head == "'"
i = i+1
end
if has_stress then return head end
local one_syllable = true
i = 1
while one_syllable and head do
one_syllable = head ~= "."
i = i+1
end
if one_syllable then return head end
i = #head -- this chunk return i=0 for disyllabic and i= the position before the second-last syllable break for longer words
local n = 0
while i > 0 and n < 2 do
if head == "." then n = n+1 end
i = i-1
end
table.insert(head, i+1, "'")
if head == "." then table.remove(head,i+2) end
return head
end
local function first_graph(word) -- takes a string like "mvit_da" and returns the first phoneme like "mv"
local initial = ""
for i, let in ipairs(very_complex_graphs) do
if string.find(word, "^" .. let) then
initial = let
break
end
end
if initial == "" then
for i, let in ipairs(complex_graphs) do
if string.find(word, "^" .. let) then
initial = let
break
end
end
end
if initial == "" then
for i, let in ipairs(single_letters) do
if string.find(word, "^" .. let) then
initial = let
break
end
end
end
if initial == "" then error("unknown string encountered: " .. word) end
initial = string.gsub(initial, "%%", "")
return initial
end
local function split_graphs(word) -- takes a string like "mvit_da" and returns the phonemes like {"mv", "i", "t", "_d", "a"}
local graphs = {}
while string.len(word) ~= 0 do
local nextgraph = first_graph(word)
table.insert(graphs, nextgraph)
word = string.sub(word, string.len(nextgraph) + 1)
end
return graphs
end
local function pron_per_word(word, ipa_key)
word = string.gsub(word, "()w", "%1_w")
word = string.gsub(word,"m()", "m.%1")
local graphs = split_graphs(word)
graphs = syllabify(graphs)
graphs = add_stress(graphs)
for i, let in ipairs(graphs) do
graphs = ipa_key
end
word = table.concat(graphs)
word = mw.ustring.gsub(word, "()m()", "%1m̩%2")
word = mw.ustring.gsub(word, "^m()", "m̩%1")
word = mw.ustring.gsub(word, "()m$", "%1m̩")
word = mw.ustring.gsub(word, "^m$", "m̩")
if not mw.ustring.find(word, "") then -- if monosyllabic, type mbwa, nchi, nge
word = mw.ustring.gsub(word, "^ᵐb", "ˈm̩.b")
word = mw.ustring.gsub(word, "^ᶮc", "ˈɲ̍.c")
word = mw.ustring.gsub(word, "^ⁿtʃ", "ˈn̩.tʃ")
word = mw.ustring.gsub(word, "^ᵑɡ", "ˈŋ̍.ɡ")
end
word = mw.ustring.gsub(word, "(.)%.%1", "%1ː") -- type pumbaa
return word
end
local function generate_pron(text, ipa_key)
local words = split_words(text)
local prons = {}
for i, w in ipairs(words) do prons = pron_per_word(w, ipa_key) end
return table.concat(prons, " ")
end
local function make_clI(head)
if string.sub(head,1,1) ~= "m" then error("not a class I or III word, sorry") end
return string.gsub(head,"^m()", "m.%1")
end
function export.unguja(frame)
local args = frame:getParent().args or frame.args
local head = args or frame.args
local clI = args or false
if clI then head = make_clI(head) end
return generate_pron(head, require("Module:User:MuDavid/sw-IPA/data").unguja)
end
function export.kimvita(frame)
local args = frame:getParent().args or frame.args
local head = args or frame.args
local clI = args or false
if clI then head = make_clI(head) end
return generate_pron(head, require("Module:User:MuDavid/sw-IPA/data").kimvita)
end
function export.arabic(frame)
local args = frame:getParent().args or frame.args
local head = args or frame.args
local clI = args or false
if clI then head = make_clI(head) end
local pron = generate_pron(head, require("Module:User:MuDavid/sw-IPA/data").arabic)
pron = mw.ustring.gsub(pron, "%.()ː", "%1ː")
pron = mw.ustring.gsub(pron, "ˤ()", "ˤ%1̹")
pron = mw.ustring.gsub(pron, "tˤ", "t̪ˤ")
return pron
end
function export.swIPA(frame)
local locs = frame:getParent().args or frame.args
local note = frame:getParent().args or frame.args
if note == "y" then note = "with stress on preceding syllable" end
if locs then
locs = splitcomma(locs)
local out = {}
for i, l in ipairs(locs) do
if aliases then l = aliases end
table.insert(out, "*" .. format_IPA_full {
lang = lang,
a = {names},
items = { { = "/" .. export(frame) .. "/"} },
qq = {note}
})
end
return table.concat(out,"\n")
else
return "*" .. format_IPA_full {
lang = lang,
a = {names},
items = { { = "/" .. export.unguja(frame) .. "/"} },
qq = {note}
}
end
end
return export