Module:tl-utilities

Hello, you have come here looking for the meaning of the word Module:tl-utilities. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:tl-utilities, but we will also tell you about its etymology, its characteristics and you will know how to say Module:tl-utilities in singular and plural. Everything you need to know about the word Module:tl-utilities you have here. The definition of the word Module:tl-utilities will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:tl-utilities, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local baybayin_encode_module = "Module:tl-bay_sc"

local lang = require("Module:languages").getByCode("tl")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")

local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local uupper = m_str_utils.upper

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local MACRON = u(0x0304) -- macron =  ̄
local DOTOVER = u(0x0307) -- dot over =  ̇

local vowel = "aeëəiou" -- vowel
local V = ""
local NV = ""
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = ""
local ipa_stress = "ˈˌ"
local ipa_stress_c = ""
local separator = accent .. ipa_stress .. "# .-"
local C = "" -- consonant

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local function decompose(text, recompose_e_dia)
	-- decompose everything but ñ and ü
	text = toNFD(text)
	text = rsub(text, ".", {
		 = "ñ",
		 = "Ñ",
		 = "ü",
		 = "Ü",
	})
	if recompose_e_dia then
		text = rsub(text, ".", {
			 = "ë",
			 = "Ë",
		})
	end
	return text
end

-- Fix capitalization but considers syllable breaks
local function fix_capitalization(input, caps_map)
	local syllbreak = 0
	local text = ulower(input)
	local syllbreak_chars = ".7"
	
	for i=1, #text do
		local text_pre = text:sub(1, i-1)
		local text_current = text:sub(i,i)
		local text_post = text:sub(i+1)
		local caps_current = caps_map:sub(i-syllbreak, i-syllbreak)
		if rfind(text_current, "") and not rfind(caps_current, "")then
			syllbreak = syllbreak + 1
		elseif uupper(text_current) == caps_current then
			text = table.concat({text_pre, uupper(text_current), text_post})
		end
	end
	return text
end


function export.remove_accents(str)
	str = decompose(str, "recompose e-dia")
	str = rsub(str, "(.)" .. accent_c, "%1")
	return str
end

--Cleanup Baybayin inputs--
function export.decode_baybayin(text)
	local text = rsub(text, "+", function(baybayin)
		result = lang:transliterate(baybayin, sc_Tglg)
		result = rsub(result, "()()", "%1-%2")
		result = rsub(result, "%-", "7")
		result = rsub(result, "()", "%1" .. MACRON) -- No way to know stress in Baybayin. Disable for now.
		return result
	end)
	return text
end

-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing
-- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to
-- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the
-- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the
-- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would
-- like to produce 'a.ma.in'.
--
-- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in thes
-- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra
-- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil
-- indicating the alignment failed.
function export.align_syllabification_to_spelling(syllab, spelling)
	local result = {}
	local function concat_result()
		-- Postprocess to remove dots (syllable boundaries) next to hyphens.
		return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-"))
	end
	-- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original
	-- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in
	-- syllabify_from_spelling(). We should probably get rid of the removal there and put it here.)
	syllab = decompose(syllab:gsub("ː", ""), "recompose e-dia"):gsub("7", "")
	spelling = decompose(spelling, "recompose e-dia")
	local syll_chars = rsplit(ulower(syllab), "")
	local spelling_chars = rsplit(spelling, "")
	local i = 1
	local j = 1
	local function matches(uci, ucj)
		-- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj).
		-- Both uci and ucj should be lowercase.
		-- Sound is at the key, values are the letters sound can match
		local matching_chars = {
			 = {"v"},
			 = {"i"},
			 = {"a", "e", "o", "u"},
			 = {"g", "j", "x"},
			 = {"e", "y"},
			 = {"g"},
			 = {"c", "j"},
			 = {"u"},
			 = {"f"},
			 = {"j", "c", "x", "z"},
			 = {"o"},
			 = {"u", "o"},
			 = {"i"}
		}

		return uci == ucj or (matching_chars and m_table.contains(matching_chars, ucj) and true) or false
	end
	local function silent_spelling_letter(ucj)
		return ucj == "h" or ucj == "'" or ucj == "-"
	end
	local function syll_at(pos)
		return syll_chars or ""
	end
	local function spell_at(pos)
		return spelling_chars or ""
	end
	local function uspell_at(pos)
		local c = spelling_chars
		return c and ulower(c) or ""
	end
	while i <= #syll_chars or j <= #spelling_chars do
		local uci = syll_at(i)
		local cj = spell_at(j)
		local ucj = uspell_at(j)

		if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and
			not matches(syll_at(i + 2), uspell_at(j + 1)) then
			-- As a special case, before checking whether the corresponding characters match, we have to skip an extra
			-- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the
			-- next respelling character (taking into account the syllable boundary). This is so that e.g.
			-- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against
			-- the spelling g and the second respelling g won't match. A similar case occurs with
			-- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack
			-- when syll='ba.rang.gay' matches spelling='baranggay'.
			i = i + 1
		elseif uci == "g" and ucj == "g" and uspell_at(j + 1) == TILDE  then
			table.insert(result, cj)
			table.insert(result, uspell_at(j + 1))
			i = i + 1
			j = j + 2
		elseif uci == "f" and ucj == "p" and uspell_at(j + 1) == "h"  then
			table.insert(result, cj)
			table.insert(result, uspell_at(j + 1))
			i = i + 1
			j = j + 2
		elseif matches(uci, ucj) then
			table.insert(result, cj)
			i = i + 1
			j = j + 1
		elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then
			-- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the
			-- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled
			-- letter as well as doubled letters pronounced double. Specifically, there are three possibilities,
			-- exemplified by:
			-- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig';
			-- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig';
			-- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'.
			-- If we copy the dot first, we get (1) and (2) right but not (3).
			-- If we copy the double letter first, we get (2) and (3) right but not (1).
			-- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled
			-- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not
			-- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the
			-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double).
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and
			not rfind(uspell_at(j + 1), V) then
			-- See below for silent h or apostrophe in spelling. This condition is parallel to the one directly above
			-- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman',
			-- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character
			-- isn't a vowel, because in that case we want the silent letter to go after the period, e.g.
			-- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above).
			table.insert(result, cj)
			j = j + 1
		elseif uci == "." then
			table.insert(result, uci)
			i = i + 1
		elseif ucj == uspell_at(j - 1) then
			-- A doubled letter in spelling that is pronounced single. Examples:
			-- * syllab='Ma.líg', spelling='Mallig' -> 'Ma.llig' (with l)
			-- * syllab='Lu.il.yér', spelling='Lhuillier' -> 'Lhu.ill.ier' (with l; a more complex example)
			-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)
			-- * syllab='Jé.fer.son', spelling='Jefferson' -> 'Je.ffer.son' (with f)
			-- * syllab='Je.ma', spelling='Gemma' -> 'Ge.mma' (with m)
			-- * syllab='Ha.na', spelling='Hannah' -> 'Ha.nnah' (with n)
			-- * syllab='A.by', spelling='Abby' -> 'A.bby' (with b)
			-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)
			-- * syllab='Fu.ji', spelling='Fujii' -> 'Fu.jii' (with i)
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) and not (ucj == "h" and rfind(uspell_at(j - 1), V) and rfind(uspell_at(j + 1), V)) then
			-- A silent h, apostrophe or hyphen in spelling. Examples:
			-- * syllab='adán', spelling='adhan' -> 'a.dhan'
			-- * syllab='Atanasya', spelling='Athanasia' -> 'A.tha.nas.ia'
			-- * syllab='Cýntiya', spelling='Cynthia' -> 'Cyn.thi.a'
			-- * syllab='Ermóhenes', spelling='Hermogenes' -> 'Her.mo.ge.nes'
			-- * syllab='Abduramán', spelling='Abdurahman' -> 'Ab.du.rah.man'
			-- * syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah'
			-- * syllab='pag7ibig', spelling='pag-ibig' -> 'pag-i.big'
			table.insert(result, cj)
			j = j + 1
		elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or
			uci == "y" or uci == "w" then
			-- skip character
			i = i + 1
		else
			-- non-matching character
			mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'"
				):format(spelling, j, ucj, syllab, i, uci, concat_result()))
			return nil
		end
	end
	if i <= #syll_chars or j <= #spelling_chars then
		-- left-over characters on one side or the other
		mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'"
			):format(
				spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)),
				syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)),
				concat_result()))
		return nil
	end
	return concat_result()
end

function export.has_baybayin(text)
	return text:match("")
end

-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
	text = rsub(text, "%s+", " ")
	text = rsub(text, "^ ", "")
	text = rsub(text, " $", "")
	return text
end


function export.syllabify_from_spelling(text, pagename)
	-- Auto syllabifications start --
	local vowel = vowel .. "ẃý" -- vowel
	local V = ""
	local NV = ""
	local C = "" -- consonant

	text = trim(text)
	text = canon_spaces(text)
	text = rsub(text, "+", function(baybayin)
		return "<᜶" .. export.decode_baybayin(baybayin) .. "᜶>"
	end)
	
	text = decompose(text, "recompose e-dia")

	local origtext = text
	text = string.lower(text)

	text = rsub(text, " ", "․ ")
	text = rsub(text, "$", "․")

	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub_repeatedly(text, "(?)#(?)", "#")

	-- special word "mga" 
	text = rsub(text, "#mga#", "#m.ga#")

	text = rsub(text, "ng̃", "ŋ")
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "g̃", "ġ")
	text = rsub(text, "ch", "ĉ")
	text = rsub(text, "t_s", "ć")
	text = rsub(text, "sh", "ʃ")
	text = rsub(text, "gu()", "ǵ%1")
	text = rsub(text, "qu()", "ḱ%1")
	text = rsub(text, "r", "ɾ")
	text = rsub(text, "ɾɾ", "r")
	text = rsub(text, "ʔ", "7")

	-- double dot improvements
	text = rsub(text, "()%.y", "%1..y")
	text = rsub(text, "n%.k", "n..k")

	text = rsub_repeatedly(text, "#(" .. C .. "+)i()","#%1i.%2")
	text = rsub_repeatedly(text, "#(" .. C .. "+)u()","#%1u.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")(%1)i()","%1%2.i%3")
	text = rsub_repeatedly(text, "(" .. C .. ")(%1)u()","%1%2.u%3")
	text = rsub_repeatedly(text, "(" .. C .. ")(" .. C .. ")i()","%1%2i.%3")
	text = rsub_repeatedly(text, "(" .. C .. ")(" .. C .. ")u()","%1%2u.%3")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. ")u()","%1%2.u%3")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. ")i()","%1%2.i%3")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)u()","%1.u%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)o()","%1.ó%2")
	text = rsub(text, "a(" .. accent_c .. "*)o()","a%1ó%2")

	-- eu rules
	text = rsub_repeatedly(text, "()()("  .. accent_c .. "?)()("  .. accent_c .. "?)","%1%2%3.%4%5")

	text = rsub(text, "y(*)()","ý%1%2")
	text = rsub(text, "ý(*)()()","y%1%2%3")
	text = rsub(text, "ý(" .. V .. ")", "y%1")
	text = rsub(text, "w(?)()","ẃ%1%2")
	text = rsub(text, "ẃ(*)()()","w%1%2%3")
	text = rsub(text, "ẃ(" .. V .. ")","w%1")

	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ()()" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ()()" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý()()" ,"%1%2y%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý()()" ,"%1%2y%3%4")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")

	-- "mb", "mp", "nd", "nk", "nt" combinations
	text = rsub_repeatedly(text, "(m)()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(ŋ)()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "()()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "()()()(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "()()(?)()(" .. V .. ")", "%1%2%3.%4%5")
	text = rsub_repeatedly(text, "(s)()()(" .. V .. ")", "%1%2.%3%4")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s()", "%1s.%2")

	-- Any aeëo, or stressed iu, should be syllabically divided from a following aeëo or stressed iu.
	text = rsub_repeatedly(text, "(" .. accent_c .. "*)()", "%1.%2")
	text = rsub_repeatedly(text, "(" .. accent_c .. "*)(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub(text, "(" .. accent_c .. ")()", "%1.%2")
	text = rsub_repeatedly(text, "(" .. accent_c .. ")(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
	text = rsub_repeatedly(text, "u(" .. accent_c .. "*)u", "u%1.u")

	text = rsub(text, "ĉ", "ch")
	text = rsub(text, "ć", "ts")
	text = rsub(text, "ŋ", "ng")
	text = rsub(text, "ʃ", "sh")
	text = rsub(text, "ǵ.()", "g.u%1")
	text = rsub(text, "ǵ", "gu")
	text = rsub(text, "ġ", "g̃")
	text = rsub(text, "ḱ.()", "q.u%1")
	text = rsub(text, "ḱ", "qu")
	text = rsub(text, "r", "rr")
	text = rsub(text, "ɾ", "r")

	text = rsub_repeatedly(text, "(+)", ".")
	text = rsub(text, "?-?", "-")
	text = rsub(text, "()", "|%1")
	text = rsub(text, "()", "|%1")

	text = rsub(text, "()+", "%1")

	-- remove # symbols at word and text boundaries
	text = rsub_repeatedly(text, "(?)#(?)", "")
	text = rsub(text, "․", ".")
	
	text = rsub(text, "ẃ", "w")
	text = rsub(text, "ý", "y")

	-- Fix Capitalization --
	text = fix_capitalization(text, origtext)

	-- Fix hyphens --
	-- FIXME!!! Why are we relying on looking at the pagename here? This should not be happening.
	origtext = pagename

	if (table.concat(rsplit(origtext, "-")) == table.concat(rsplit(table.concat(rsplit(text, "|")), "-"))) then
		syllbreak = 0
		for i=1, #text do
			if text:sub(i,i) == "|" then
				if origtext:sub(i-syllbreak, i-syllbreak) == "-" then
					text = table.concat({text:sub(1, i-1), "-", text:sub(i+1)})
				else
					syllbreak = syllbreak + 1
				end
			end
		end
	end
	
	-- Reencode Baybayin
	text = rsub(text, "(+)", function(baybayin)
		baybayin = baybayin:gsub("|", "/"):gsub("7", "")
		local result = require(baybayin_encode_module).transcribe(baybayin:gsub("|", "/"), false, false, false) 
		result = rsub(result, " ᜵ ", "|")
		return result
	end)

	-- FIXME! Hack -- up above we changed periods to vertical bars. The rest of the code expects periods so change
	-- them back. We should clean up the code above to leave the periods alone.
	return (text:gsub("|", "%."))
end

function export.syllabify_and_align(respelling, pagename)
	if pagename == nil then
		pagename = respelling
	end
	local syllabification = export.syllabify_from_spelling(respelling, pagename)
	return export.align_syllabification_to_spelling(syllabification, pagename)
end

-- Assimilates nasal endings in prefixes
-- options = {
-- 		 = <value> Assimilation can be "none", "partial", or "total"
-- 	}
local function nasal_adjust(text1, text2, assimilation)
	local t1 = text1 
	local t2 = text2
	
	if assimilation ~= nil and not m_table.contains({"none", "partial", "total"}, assimilation) then
		error('Assimilation options can only be "none", "partial", or "total".')	
	end
	
	t2 = rsub(t2, "^ng", "ŋ")
	
	t1 = rsub(t1, "ng(*)$", "ŋ%1")
	t1 = rsub(t1, "m(*)$", "ṃ%1")
	t1 = rsub(t1, "n(*)$", "ṇ%1")
	
	local result = t1 .. t2
	if assimilation == "partial" then
		result = rsub(result, "(*)()", "m%1%2")
		result = rsub(result, "(*)()", "n%1%2")
		result = rsub(result, "(*)()", "ŋ%1%2")
	elseif assimilation == "total" then
		result = rsub(result, "*()(" .. V .. ")%1(?)%2(" .. NV .. "+)(" .. V .. ")", "m%2m%3%2%4%5")
		result = rsub(result, "*()(?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "m%2%3m%2%3%4%5")
		result = rsub(result, "*()(" .. V .. ")%1(?)%2(" .. NV .. "+)(" .. V .. ")", "n%2n%3%2%4%5")
		result = rsub(result, "*()(?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "n%2%3n%2%3%4%5")
		result = rsub(result, "*()(" .. V .. ")r%2(" .. NV .. "+)(" .. V .. ")", "n%2n%2%3%4")
		result = rsub(result, "*()(" .. V .. ")%1(?)%2(" .. NV .. "+)(" .. V .. ")", "ŋ%2ŋ%3%2%4%5")
		result = rsub(result, "*()(?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "ŋ%2%3ŋ%2%3%4%5")
		result = rsub(result, "*()", "m")
		result = rsub(result, "*()", "n")
		result = rsub(result, "*()", "ŋ")
		result = rsub(result, "(*)()", "n%1%2")
	end
	result = rsub(result, "ŋ", "ng")
	result = rsub(result, "ṃ", "m")
	result = rsub(result, "ṇ", "n")
	return result
end

-- Adjusts d to r
-- Modify the d-r option in add affix
-- options = {
--		 = Values can be no value, "d", "r"
-- 	}
-- No value (default): If "d" does not follow any of the following consonants "d", "l", "r", then "d" would change to "r" if between vowels
-- "between": "d" would change to "r" if between vowels (regardless of the next consonant)
-- "d": Retain "d" as "d"
-- "r": Force "d" as "r"
local function d_r_adjust_root(text1, text2, d)
	local t1 = text1
	local t2 = text2
	
	local t2_start_d = rmatch(t2, "^d")
	if not t2_start_d or d == "d" then
		return t2
	end
	
	local d_valid_consonant_check = string.sub(rsub(t2, V, ""), 2, 2) -- Get consonant after "d"
	d_valid_consonant_check = not rmatch(d_valid_consonant_check, "")

	if d_valid_consonant_check or d == "between" or d == "r" then
		local t1_end_vowel = rmatch(t1, V .. "$")
		local t2_d_before_vowel = rmatch(t2, "^d" .. V)

		if (d == "r") or (t1_end_vowel and t2_d_before_vowel) then
			t2 = rsub(t2, "^d", "r")
		end
	end
	
	return t2
end

local function add_prefix(root, affix, options)
	local hyphen = ''
	local result = ulower(root)
	local root_vowel_start = rfind(result, "^(" .. V .. ")")
	local affix_consonant_end = rmatch(affix, C .. "$")
	
	if root_vowel_start then
		result = "ʔ" .. result
		result = rsub(result, "^ʔ(.)%1", "ʔ%1ʔ%1")
	end
	
	if options or (root_vowel_start and affix_consonant_end) or (ulower(root) ~= root) or (not rmatch(root, "^")) then
		hyphen = '-'
	end
	affix = affix .. hyphen
	
	result = d_r_adjust_root(affix, result, options)
	result = nasal_adjust(affix, result, options)
	
	if(ulower(root) ~= root) then
		result = rsub(result, ulower(root) .. "$", root)
	end
	
	result = rsub(result, "+", "-")
	result = rsub(result, "ʔ", "")
	return result
end

local function add_infix(root, affix, options)
	local result = root
	local new_affix = affix
	
	local first_vowel_idx = rfind(result, V)
	local special_start_conditions = {V, "ng", "ts", "ch", "sh"}
	local has_special_start = false
	
	for idx, starting in ipairs(special_start_conditions) do
      has_special_start = has_special_start or rfind(result, "^" .. starting)
      if has_special_start then break end
    end
	
	if options then
		if first_vowel_idx ~= 1 then
			new_affix = "-" .. new_affix 
		end
		new_affix = new_affix .. "-"
	end
	
	if options or has_special_start then
		result = string.sub(result, 1, first_vowel_idx-1) .. new_affix .. string.sub(result, first_vowel_idx)
	else
		result = string.sub(result, 1, 1) .. new_affix .. string.sub(result, 2)
	end
		
	result = rsub(result, "ŋ", "ng")
	return result
end

local function add_suffix(root, affix, options)
	local result = root
	local new_affix = affix
	local tl_native_verb_suffixes = {
		"an", "in", "i"
	}
	local glottal = options
	
	-- Allow diacritics on input
	result = decompose(result, true)
	if rmatch(result, V .. "$") then
		glottal = not (glottal == false)
	end
	result = rsub(result, "$", "")
	result = toNFC(result)
	
	result = rsub(result, "ng$", "ŋ")
	if new_affix == "ng" then
		result = rsub(result, "(" .. V .. ")n$", "%1")
	elseif (m_table.contains(tl_native_verb_suffixes, new_affix)) then
		if rmatch(result, V .. "$") and not glottal  then
			new_affix = "h" .. new_affix
		end
		
		if rmatch(result, V .. "d$") and 
			rmatch(new_affix, "^" .. V) and 
			options ~= "d" and 
			not options then
				result = rsub(result, "(" .. V .. ")" .. "d$", "%1r")
			end
	
		result = "#" .. result
		if options == nil then
			result = rsub(result, "()e(" .. C .. "?)$", "%1i%2")
		elseif options == "i" then
			result = rsub(result, "()ee(" .. C .. "?)$", "%1ii%2")
			result = rsub(result, "()e(" .. C .. "?)$", "%1i%2")
		end
		
		if options == nil then
			result = rsub(result, "()o(" .. C .. "?)$", "%1u%2")
		elseif options == "u" then
			result = rsub(result, "()oo(" .. C .. "?)$", "%1uu%2")
			result = rsub(result, "()o(" .. C .. "?)$", "%1u%2")
		end
		
		result = rsub(result, "#", "")
	end
	
	if options then
		new_affix = "-" .. new_affix
	end

	result = rsub(result, "ŋ", "ng")
	
	result = result .. new_affix
	
	return result
end

-- TODO
-- Prefix -- DONE
-- Consonant cluster cases
-- Infix -- DONE
-- Suffix + changing spellings -- DONE
-- Circumfix -- DONE
-- By word affixation
-- Hyphen addition
-- Nasal assimilation - DONE
-- Syllabify
-- Reduplication
-- Capitalization, hyphen on proper noun
-- Pronunciation doesn't match spelling of root, provide phonetic spellings
-- D/R change,
-- SY/DY/TS/CH/SH cases
-- /ng/ Cases
-- double o or uo
-- Metathesis (nl, w, y), iC-in-V pattern ipinasok -> inipasok, ihinanda -> inihanda, iinuwi -> iniuwi (vowels) 
-- Diacritics (optional)
-- Syncope
-- Baybayin?
-- Analyze word what affix

-- options = {
-- 		 = See nasal_adjust()
-- 		 = Word to be affixed
-- 		 = Force hyphen boolean
-- 		 = Hyphenated words should be considered as one unit if false
-- 		  = Keep consonant cluster infix
-- 		 = TODO: Metathesis if syncope
-- 		 = When adding suffix, add to prevent adding "h" at end of root ending with vowel
-- 		 = Return syllabification data
--		 = See dr_adjust()
-- 		 = When adding suffix, add to force ending "e" to become "i"
-- 		 = When adding suffix, add to force ending "o" to become "u"
-- 	}
function export.add_affix(root, affix, options)
	assert(type(root) == "string", "Expected string for root")
	assert(type(affix) == "string", "Expected string for affix")
	
	if options == nil then
		options = {}
	elseif type(options) ~= "table" then
		error("Options parameter must be a table or nil.")
	end
	
	local affix_actions = {
		 = add_prefix,
		 = add_infix,
		 = add_suffix
	}
	
	local word_idx = 1
	if tonumber(options) ~= nil and tonumber(options) >= 1 then
		options = tonumber(options)
	else
		options = 1
	end
	
	local words = rsplit(canon_spaces(root), " ")
	for i=1, #words do
		local hyph_words = rsplit(words, "-")
		
		if options == false then
			hyph_words = {words}
		end
			
		for j=1, #hyph_words do
			if (word_idx == options) then
				local affixes = rsplit(canon_spaces(affix), " ")
				for i=1, #affixes do
					local affix_type = ""
					local new_affix = affixes
					local has_beginning_hyphen = rfind(new_affix, "^-")
					local has_ending_hyphen = rfind(new_affix, "-$")
					
					if has_beginning_hyphen and has_ending_hyphen then
						affix_type = "infix"
					elseif has_beginning_hyphen then
						affix_type = "suffix"
					elseif has_ending_hyphen then
						affix_type = "prefix"
					end
					
					new_affix = rsub(new_affix, "^-", "")
					new_affix = rsub(new_affix, "-$", "")
					
					hyph_words = affix_actions(hyph_words, new_affix, options)
				end
			end

			hyph_words = rsub(hyph_words, "ʔ", "")
			word_idx = word_idx + 1
		end
		words = table.concat(hyph_words, '-')
	end

	words = table.concat(words, " ")
	words = export.remove_accents(words)
	if options then
		return export.syllabify_and_align(words)
	end
	
	return words
end

function export.reduplicate(root, syllcount, options)
	local syllable_count = syllcount
	if options == nil then
		options = {}
	elseif type(options) ~= "table" then
		error("Options parameter must be a table or nil.")
	end
	
	local word_idx = 1
	if tonumber(options) ~= nil and tonumber(options) >= 1 then
		options = tonumber(options)
	else
		options = 1
	end
	
	if syllable_count == nil then
		syllable_count = 1
	end
	
	local words = rsplit(canon_spaces(root), " ")
	for i=1, #words do
		if (word_idx == options) then
			if syllcount == "all" then
				words = words .. "-" .. words
			elseif tonumber(syllable_count) ~= nil then
				syllable_count = tonumber(syllable_count)
				local rdp_word = words
				local rdp_syllabification = export.syllabify_and_align(rsub(rdp_word, "(" .. V .. ")" .. "(" .. V .. ")", "%1.%2"))
				rdp_syllabification = rsub(rdp_syllabification, "()", "-.%1")
				rdp_syllabification = rsplit(rdp_syllabification, "%.")
				local rdp_add = ""
				mw.logObject(rdp_syllabification)
				
				for j=1, #rdp_syllabification do
					local rdp_syll = rdp_syllabification
					if j > syllable_count then
						break
					elseif j == syllable_count then
						if syllable_count ~= #rdp_syllabification then
							if rdp_syll:sub(-1) ~= "-" or syllable_count == 1 then
								rdp_syll = rsub(rdp_syll, "(".. V .. ").*", "%1")
							end
							
							rdp_syll = rsub(rdp_syll, "-$", "")
							
							if options ~= true then
								rdp_syll = rsub(rdp_syll, "(" .. C .. "+)(".. V .. ")", 
									function(consonants, vowel)
										if not m_table.contains({"ng", "ts", "ch"}, consonants)
											and not rmatch(consonants, "y") then
											consonants = consonants:sub(1,1)
										elseif consonants == "ch" then
											consonants = "ts"
										end
									
										return consonants .. vowel
									end
								)
							end
						end
					end
				
					rdp_add = rdp_add .. rdp_syll
				end
				
				if options or syllable_count > 1 or rdp_word ~= ulower(rdp_word) then
					rdp_add = rdp_add .. "-"
				end	
					
				if syllable_count == 1 then
					rdp_add = ulower(rdp_add)
				end
				
				rdp_word = d_r_adjust_root(rdp_add, rdp_word, options)
				words = rdp_add .. rdp_word
			else
				error('Syllable count must be numerical or "all".')
			end
		end
		word_idx = word_idx + 1
	end

	words = table.concat(words, " ")
	words = export.remove_accents(words)
	return words
end

-- Pang-angkop/Linker goes here
function export.add_linker(text, consider_case)
	if text == nil or type(text) ~= "string" then
		error("There should be an input parameter.")
	end
	
	local orig_text = export.remove_accents(text)
	local input_text = ulower(orig_text)
	local last_letter = rmatch(text, "(.)$")
	local is_last_caps = ulower(last_letter) ~= last_letter
	
	if(rfind(input_text, "$")) and (not is_last_caps or consider_case) then
		input_text = export.add_affix(input_text, "-ng")
	else
		input_text = input_text .. " na"
	end
	
	-- Fix capitalization with what already exists
	local linker = input_text:sub(#orig_text + 1)
	input_text = orig_text .. ((is_last_caps and consider_case) and uupper(linker) or linker)
	
	return input_text
end

function export.testing()
	mw.logObject({
		export.reduplicate("ikot", 1),
		export.reduplicate("ngiti", 1),
		export.reduplicate("bundok", 1),
		export.reduplicate("drama", 1, {
			 = true
		}),
		export.reduplicate("dating", 1),
		export.reduplicate("kain", 1),
		export.reduplicate("Tagalog", 1),
		export.reduplicate("sip-unin", 1),
		export.reduplicate("ilaw-trapiko", 3),
	})	
end

return export