Module:User:Theknightwho/grc-IPA

Hello, you have come here looking for the meaning of the word Module:User:Theknightwho/grc-IPA. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Theknightwho/grc-IPA, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Theknightwho/grc-IPA in singular and plural. Everything you need to know about the word Module:User:Theknightwho/grc-IPA you have here. The definition of the word Module:User:Theknightwho/grc-IPA will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Theknightwho/grc-IPA, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}

local m_a = require("Module:accent qualifier")
local m_data = mw.loadData("Module:User:Theknightwho/grc-IPA/data")
local m_grc_accent = require("Module:grc-accent")
local m_grc_utils = require("Module:grc-utilities")
local m_grc_utils_data = require("Module:grc-utilities/data")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")

-- ] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.

local diacritic = m_grc_utils_data.diacritic
local diacritics = m_grc_utils_data.diacritics

local canonicalize = m_grc_utils.canonicalize
local concat = table.concat
local convert_term -- defined below
local dump = mw.dumpObject
local find_ambig = m_grc_accent.find_ambig
local format_categories = require("Module:utilities").format_categories
local get_IPA -- defined below
local full_link = m_grc_utils.link
local gsplit = m_str_utils.gsplit
local insert = table.insert
local mark_implied_length = m_grc_accent.mark_implied_length
local reorderDiacritics = m_grc_utils.reorderDiacritics
local split = m_str_utils.split
local strip_accent = m_grc_accent.strip_accent
local tokenize = m_grc_utils.tokenize
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = m_str_utils.match
local usub = m_str_utils.sub

local MACRON = u(0x304)
local BREVE = u(0x306)
local GRAVE = u(0x300)
local ACUTE = u(0x301)
local SMOOTH = u(0x313)
local ROUGH = u(0x314)
local CIRCUM = u(0x342)
local acute_on_long_vowel = u(0x30C)
local acute_on_short_vowel = u(0x341)
local grave_pitch_mark = u(0x340)
local circumflex_on_long_vowel = u(0x302)

local pagename = mw.loadData("Module:headword/data").pagename
local lang = require("Module:languages").getByCode("grc")

local periods = {"cla", "koi1", "koi2", "byz1", "byz2"}
local inlinePeriods = {"cla", "koi2", "byz2"}

local function fetch(s, i)
	--[==[
	because we fetch a single character at a time so often
	out of bounds fetch gives ""
	]==]
	i = tonumber(i)
	
	if type(i) ~= "number" then
		error("fetch requires a number or a string equivalent to a number as its second argument.")
	end
	
	if i == 0 then
		return ""
	end
	
	local n = 0
	for ch in string.gmatch(s, ".*") do
		n = n + 1
		if n == i then
			return ch
		end
	end
	
	return ""
end

--Combining diacritics are tricky.
local tie = u(0x35C)				-- tie bar
local nonsyllabic = u(0x32F)		-- combining inverted breve below
local voiceless = u(0x325)			-- combining ring below
local aspirated = "ʰ"
local macron = "¯"
local breve = "˘"

local function is(text, X)
	if not text or not X then
		return false
	end
	local pattern = m_data.chars or error(("No data for %s."):format(dump(X)), 2)
	if X == "frontDiphth" or X == "Greekdiacritic" then
		pattern = "^" .. pattern .. "$"
	else
		pattern = "^$"
	end
	return umatch(text, pattern) and true or false
end

local env_functions = {
	preFront = function(term, index)
		local letter1, letter2 = term, term
		return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
	end,
	isIDiphth = function(term, index)
		local letter = term
		return strip_accent(letter) == "ι" and not m_data.diaer
	end,
	isUDiphth = function(term, index)
		local letter = term
		return strip_accent(letter) == "υ" and not m_data.diaer
	end,
	hasMacronBreve = function(term, index)
		local letter = term
		return letter == macron or letter == breve
	end,
}

local function decode(condition, x, term)
	--[==[
		"If" and "and" statements.
		Note that we're finding the last operator first, 
		which means that the first will get ultimately get decided first.
		If + ("and") or / ("or") is found, the function is called again,
		until if-statements are found.
		In if-statements:
		* A number represents the character under consideration:
			 -1 is the previous character, 0 is the current, and 1 is the next.
		* Equals sign (=) checks to see if the character under consideration
			is equal to a character.
		* Period (.) plus a word sends the module to the corresponding entry
			in the letter's data table.
		* Tilde (~) calls a function on the character under consideration,
			if the function exists.
	]==]
	if condition:find("/", nil, true) then -- logical or
		for or_condition in gsplit(condition, "/", true, true) do
			if decode(or_condition, x, term) then
				return true
			end
		end
		return false
	elseif condition:find("+", nil, true) then -- logical and
		for and_condition in gsplit(condition, "+", true, true) do
			if not decode(and_condition, x, term) then
				return false
			end
		end
		return true
	elseif condition:find("=", nil, true) then
		local offset, ch = condition:match("^(.-)=(.*)$")
		return ch == term
	elseif condition:find(".", nil, true) then
		local offset, quality = condition:match("^(.-)%.(.*)$")
		local ch = term
		return m_data and m_data or false
	elseif condition:find("~", nil, true) then
		local offset, func = condition:match("^(.-)~(.*)$")
		return env_functions and env_functions(term, x + offset) or false
	end
end

local function check(p, x, term)
	if type(p) ~= "table" then
		return p
	end
	--This table is sequential, with a variable number of entries.
	for _, possP in ipairs(p) do
		if type(possP) ~= "table" then
			return possP
		end
		--This table is paired, with two values: a condition and a result.
		if decode(possP, x, term) then
			return check(possP, x, term)
		end	
	end
end

local function is_long(ch)
	if ch:match(MACRON) then
		return true
	elseif ch:match(BREVE) then
		return false
	elseif ch:match("η") or ch:match("ω") then
		return true
	elseif ch:match("ε") or ch:match("ο") then
		return false
	end
	error("Could not determine length of " .. dump(ch))
end

local function get_pitch_marks(ch, accent)
	if accent == ACUTE then
		if is_long(ch) then
			return acute_on_long_vowel
		else
			return acute_on_short_vowel
		end
	elseif accent == GRAVE then
		return grave_pitch_mark
	elseif accent == CIRCUM then
		return circumflex_on_long_vowel
	end
	
	return ""
end

function export.convert_term(term, periodstart)
	term = tokenize(term)
	local IPAs, outPeriods, start = {}, {}
	if periodstart and periodstart ~= "" then
		start = false
	else
		start = true
	end
	for _, period in ipairs(periods) do 
		if period == periodstart then
			start = true
		end
		if start then
			IPAs = {}
			insert(outPeriods, period)
		end
	end
	local length, x, advance, letter, p = #term, 1, 0, "", nil
	while x <= length do
		letter = term
		local breathing = ch:match(ROUGH) and "rough" or "smooth"
		local accent = umatch(ch, "")
		local stress = accent and "ˈ" or ""
		local pitch = get_pitch_marks(ch, accent)
		
		
		
		local data = m_data
		if not data then -- no data found
			-- explicit pass
		else
			-- check to see if a multicharacter search is warranted
			advance = data.pre and check(data.pre, x, term) or 0
			p = (advance ~= 0) and m_data.p or data.p
			for _, period in ipairs(outPeriods) do
				insert(IPAs, check(p, x, term))
			end
			x = x + advance
		end
		x = x + 1
	end
	
	--Concatenate the IPAs
	for _, period in ipairs(outPeriods) do
		IPAs = {IPA = concat(IPAs, "")}
	end
	
	return IPAs, outPeriods
end
convert_term = export.convert_term

local function find_syllable_break(word, nVowel, wordEnd)
	if wordEnd then
		return ulen(word)
	elseif is(fetch(word, nVowel - 1), "liquid") then
		if is(fetch(word, nVowel - 2), "obst") then
			return nVowel - 3
		elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
			return nVowel - 4
		else
			return nVowel - 2
		end
	elseif is(fetch(word, nVowel - 1), "cons") then
		return nVowel - 2
	elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
		return nVowel - 3
	elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == "r" then
		return nVowel - 3
	else
		return nVowel - 1
	end
end

local function syllabify_word(word)
	local syllables = {}
	--[[	cVowel means "current vowel", nVowel "next vowel",
			sBreak "syllable break".							]]--
	local cVowel, nVowel, sBreak, stress, wordEnd, searching
	while word ~= "" do
		cVowel, nVowel, sBreak, stress = false, false, false, false
		
		--First thing is to find the first vowel.
		searching = 1
		local cVowelFound = false
		while not cVowel do
			local letter = fetch(word, searching)
			local nextLetter = fetch(word, searching + 1)
			if cVowelFound then
				if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == "" or letter == "ˈ" then
					cVowel = searching - 1
				elseif is(letter, "diacritic") then
					searching = searching + 1
				elseif letter == tie then
					cVowelFound = false
					searching = searching + 1
				else
					searching = searching + 1
				end
			else
				if is(letter, "vowel") then
					cVowelFound = true
				elseif letter == "ˈ" then
					stress = true
				end
				searching = searching + 1
			end
		end
	
		--Next we try and find the next vowel or the end.
		searching = cVowel + 1
		while (not nVowel) and (not wordEnd) do
			local letter = fetch(word, searching)
			if is(letter, "vowel") or letter == "ˈ" then
				nVowel = searching
			elseif letter == "" then
				wordEnd = true
			else
				searching = searching + 1
			end
		end
		
		--Finally we find the syllable break point.
		sBreak = find_syllable_break(word, nVowel, wordEnd)
		
		--Pull everything up to and including the syllable Break.
		local syllable = usub(word, 1, sBreak)
		
		--If there is a stress accent, then we need to move it to the 
		--beginning of the syllable, unless it is a monosyllabic word,
		--in which case we remove it altogether.
		if stress then
			if next(syllables) or syllable ~= word then
				syllable = "ˈ" .. syllable:gsub("ˈ", "")
			else 
				syllable = syllable:gsub("ˈ", "")
			end
			stress = false
		end
		insert(syllables, syllable)
		word = usub(word, sBreak + 1)
	end
	
	local out = nil
	
	if #syllables > 0 then
		out = concat(syllables, ".")
		out = out:gsub("%.ˈ", "ˈ")
	end
	return out
end

local function syllabify(IPAs, periods)
	--Syllabify
	local word_ipa = ""
	local ipa = {}
	for _, period in ipairs(periods) do
		ipa = {}
		for _, word in ipairs(split(IPAs.IPA, " ")) do
			word_ipa = syllabify_word(word)
			if word_ipa then
				insert(ipa, word_ipa)
			end
		end
		IPAs.IPA = concat(ipa, " ")
	end
	return IPAs
end

local function make_ambig_note(ambig)
	-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
	local ambig_note = ""
	if ambig and #ambig > 0 then
		local agr = (#ambig > 1) and { "s ", "each one" } or { " ", "it" }
			
		ambig_note = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agr
			.. mw.text.listToText(ambig) .. " by adding a macron after " .. agr
			.. " if it is long, or a breve if it is short. By default, ] assumes it is short if unmarked."
			.. "<br/><small></small>"
			.. format_categories(
				{ "Ancient Greek terms with incomplete pronunciation" }, lang)
			.."</p>\n"
	end
	return ambig_note
end

local function make_table(IPAs, ambig, periods)
	--Final format
	local inlineProns = {}
	local listOfProns = {}
	local fullProns = {}
	local periods2 = {}
	
	for _, period in ipairs(periods) do
		insert(fullProns, "* " .. m_a.format_qualifiers(lang, {"grc-" .. period}) .. " " ..
			m_IPA.format_IPA_full {
				lang = lang,
				items = {{pron = "/" .. IPAs.IPA .. "/"}},
			})
		periods2 = true 
	end
	
	for _, period in ipairs(inlinePeriods) do
		if periods2 then
			local pron = "/" .. IPAs.IPA .. "/"
			insert(inlineProns, {pron = pron})
			insert(listOfProns, pron)
		end
	end
	
	local inlineIPAlength = math.floor(math.max( ulen("IPA(key): " .. concat(listOfProns, " → ") or "") * 0.68, ulen("(15th AD Constantinopolitan) IPA(key): /" .. IPAs.byz2.IPA .. "/") * 0.68 ))
	
	local inline = '\n<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full {
		lang = lang,
		items = inlineProns,
		separator = " → ",
	} .. "</div>"
	
	local full = '\n<div class="vsHide">\n' .. concat(fullProns, "\n") .. make_ambig_note(ambig) .. "</div>"
	
	return '<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' .. inlineIPAlength .. 'em; max-width:100%;"><span class="vsToggleElement" style="float: right;">&nbsp;</span>' .. inline .. full .. "\n</div>"
end

function export.get_IPA(term, period)
	local ambig
	if period == "cla" then
		ambig = find_ambig(term)
	end
	
	term = canonicalize(ulower(term))
	for alias, letter in pairs(m_data.aliases) do
		term = term:gsub(alias, letter)
	end
	term = mark_implied_length(term)
	
	local decomposed = toNFD(term)
	if umatch(decomposed, "" .. diacritic .. "*") then
		error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
	end
	
	term = reorderDiacritics(term)
	
	local IPAs, periods = convert_term(term, period)
	
	IPAs = syllabify(IPAs, periods)
	
	return make_table(IPAs, ambig, periods)
end
get_IPA = export.get_IPA

function export.create(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		 = {default = pagename},
		 = {default = "cla"},
	})
	return get_IPA(args, args.period)
end

function export.example(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		 = true
	})

	local output = { '{| class="wikitable"' }
	local terms = mw.text.split(args, ",%s+")
	
	for _, term in pairs(terms) do
		local period = umatch(term, "%(period ?= ?(+)%)") or "cla"
		local entry = umatch(term, "(+) %(") or term or error(("No term found in %s."):format(term)) 
		local link = full_link(entry)
		local IPA = get_IPA(entry, period)
		insert(output, "\n|-\n| " .. link .. " || " .. IPA)
	end
	
	insert(output, "\n|}")
	
	return concat(output)
end

return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops. 
--Proper alerts for editors, especially on ambiguous vowels.