Module:sandbox/sw-IPA

This module sandbox lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}
local lang = require("Module:languages").getByCode("sw")

local labial = { )w"] = "%1ʷ",}
local first_round = {
	 = "ŋ",
	 = "ɲ",
	 = "tʃ",
	 = "x",
	 = "ᶬv",
	 = "ⁿz",
	 = "ʃ",
	 = "θ",
	 = "ɑ",
	 = "ɛ",
	 = "ɔ",
}
local second_round = { -- takes into account modifications made in the previous round
	 = "ɓ",
	 = "ɗ",
	 = "ɠ",
	 = "ʄ",
	 = "j",
	 = "ˈ",
}
local third_round = { -- takes into account modifications made in the previous rounds
	 = "ᵐb",
	 = "ⁿd",
	 = "ᵑɡ",
	 = "ⁿdʒ",
	 = "ð",
	 = "ɣ",
}
local unstressed = { "wa", "ya", "la", "cha", "vya", "za", "ni", "si", "na", }
local digraphs = { = "tʃ",  = "ð",  = "ɣ",  = "x",  = "ᵐb",  = "ᶬv",  = "ⁿd",  = "ᵑɡ",
	 = "ⁿdʒ",  = "ɲ",  = "ⁿz",  = "ʃ",  = "θ",  = "ph",  = "th",  = "kh",
	 = "t̪",  = "d̪",  = "n̪", }
local locs = {  = "Unguja standard",  = "Kimvita" }

function split(chunk)
	local _, number_of_vowels = mw.ustring.gsub(chunk, "", ".")
	
	if number_of_vowels <=1 then -- type m and ka
		return chunk
	else
		local first_vowel = mw.ustring.find(chunk, "")
		local next_vowel = mw.ustring.find(chunk, "", first_vowel+1)
		
		if next_vowel == first_vowel+1 or next_vowel == first_vowel+2 then -- types maa, mara, kubwa
			return mw.ustring.sub(chunk,1,first_vowel) .. "." .. split(mw.ustring.sub(chunk,first_vowel+1))
		elseif mw.ustring.sub(chunk,first_vowel+1,first_vowel+1)=="m" or mw.ustring.sub(chunk,first_vowel+1,first_vowel+1)=="n" then -- type kunywa
			return mw.ustring.sub(chunk,1,first_vowel) .. "." .. split(mw.ustring.sub(chunk,first_vowel+1))
		elseif digraphs and next_vowel==first_vowel+3 then -- type kadha
			return mw.ustring.sub(chunk,1,first_vowel) .. "." .. split(mw.ustring.sub(chunk,first_vowel+1))
		elseif digraphs then -- type tathmini
			return mw.ustring.sub(chunk,1,first_vowel+2) .. "." .. split(mw.ustring.sub(chunk,first_vowel+3))
		else -- type falme
			return mw.ustring.sub(chunk,1,first_vowel+1) .. "." .. split(string.sub(chunk,first_vowel+2))
		end
	end
end

function syllabify(head)
	head = mw.ustring.gsub(head,"m()", ".m.%1")
	head = mw.ustring.gsub(head,"'", ".'")
	head = mw.ustring.gsub(head,"ng%.'", "ng'") -- correct overzealous previous replacement
	
	local _, number_of_vowels = mw.ustring.gsub(head, "", ".")
	
	if number_of_vowels == 0 then
		eror("cannot handle words without vowels")
	elseif number_of_vowels == 1 then
		if mw.ustring.sub( head, 1, 1 ) == "m" or mw.ustring.sub( head, 1, 1 ) == "n" then -- type nchi and mbwa
			if mw.ustring.sub( head, 2, 2 ) ~= "." and mw.ustring.sub( head, 2, 2 ) ~= "'" then head = mw.ustring.sub(head,1,1) .. "." .. mw.ustring.sub(head,2) end
			if mw.ustring.sub( head, 1, 3 ) == "n.k" or mw.ustring.sub( head, 1, 3 ) == "n.g" then
				head = mw.ustring.sub(head,1,1) .. "g'" .. mw.ustring.sub(head,2)
			end
			return head
		else -- this should not happen but you never know
			return head
		end
	else
		local chunks = mw.text.split(head,"%.")
		for i, chunk in ipairs(chunks) do chunks = split(chunk) end
		return mw.text.trim(table.concat(chunks, "."),".")
	end
end

function add_stress(head)
	if mw.ustring.find(mw.ustring.gsub(head,"ng'", "ng"), "'") then
		head = mw.ustring.gsub(head,"%.'", "'")
		return head
	else
	    _, number = mw.ustring.gsub(head, "%.", ".")
	    if number <= 1 then
    	    return "'" .. head
    	else
	        local last_dot = string.find(string.reverse(head), "%.") -- counted from the back
        	local one_before = string.len(head)-mw.ustring.find(string.reverse(head), "%.", last_dot+1)+1 -- counted from the front
        	return string.sub(head,1,one_before-1) .. "'" .. string.sub(head,one_before +1)
    	end
	end
end

function unguja(head)
	head = mw.ustring.gsub(head,"_h","")
	head = mw.ustring.gsub(head,":","")
	head = mw.ustring.gsub(head,"_d","")
	head = mw.ustring.gsub(head,"_?","")

	local prontable = {}
	for word in mw.text.gsplit(head, " ") do
		local is_unstressed = false
		for _, unstressed_word in ipairs(unstressed) do
			if word == unstressed_word then is_unstressed = true end
		end
		if is_unstressed then
			table.insert(prontable, word)
		else
			table.insert(prontable, add_stress(syllabify(word)))
		end
	end
	local pron = table.concat(prontable," ")
	
	for written, ipa in pairs(labial) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(first_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(second_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(third_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	return pron
end

function kimvita(head)
	head = mw.ustring.gsub(head,"_h","ʰ")
	head = mw.ustring.gsub(head,":","")
	head = mw.ustring.gsub(head,"_d","̪")
	head = mw.ustring.gsub(head,"_?","")

	local prontable = {}
	for word in mw.text.gsplit(head, " ") do
		local is_unstressed = false
		for _, unstressed_word in ipairs(unstressed) do
			if word == unstressed_word then is_unstressed = true end
		end
		if is_unstressed then
			table.insert(prontable, word)
		else
			table.insert(prontable, add_stress(syllabify(word)))
		end
	end
	local pron = table.concat(prontable," ")
	
	for written, ipa in pairs(labial) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(first_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(second_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	for written, ipa in pairs(third_round) do
    	pron = mw.ustring.gsub(pron, written, ipa)
	end
	return pron
end

function export.swIPA(frame)
	local args = frame:getParent().args
	local head = args or string.lower(mw.title.getCurrentTitle().text)
	local clI = args
	local loc = args or "unguja"
	
	if clI and string.sub(head,1,1) ~= "m" then error("not a class I word, sorry") end
	if clI then head = mw.ustring.gsub(head,"^m()", "m.%1") end
	
	local pron = ""
	if loc == "kimvita" then
		pron = kimvita(head)
	else
		pron = unguja(head)
	end
	if not clI then
		pron = mw.ustring.gsub(pron, "^ˈ%.()", { = "ˈm̩.b",  = "ˈn̩.d",  = "ˈn̩.dʒ",  = "ˈŋ̍.ɡ"})
		pron = mw.ustring.gsub(pron, " ˈ%.()", { = " ˈm̩.b",  = " ˈn̩.d",  = " ˈn̩.dʒ",  = " ˈŋ̍.ɡ"})
	end
	pron = mw.ustring.gsub(pron, "^m()", "m̩%1")
	pron = mw.ustring.gsub(pron, "()m()", "%1m̩%2")
	pron = mw.ustring.gsub(pron, "(.).%1", "%1ː")

	return require("Module:IPA").format_IPA_full {
				lang = lang,
				a = {locs},
				items = { {  = "/" .. pron .. "/",} },
			}
end

return export
Module:sandbox/sw-IPA

Wikious

Boobota

Sagapedia