Module:User:Still, when you think about it/apc-IPA

The following documentation is located at Module:User:Still, when you think about it/apc-IPA/documentation.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Stuff I want to fix before use

No testcases
No good comments in the code
Couples generation of phonemic and phonetic variants
Allows for false positives in transliteration (for example, you can write |1=كلمبة and transliterate it as |tr=ˈkil.me)
Grouping different variants together almost works, but the labels are weird:
- (غابات) IPA^(key): (chiefly Syria) /ʁaːˈbaːt/ , (chiefly Lebanon) /ʁaːˈbeːt/
- (chiefly Lebanon, regional) /ʁeːˈbeːt/
- (سلام) IPA^(key): (chiefly Syria) /saˈlaːm/ , (chiefly Lebanon, regional) /saˈleːm/
- (ذوق) IPA^(key): (chiefly Syria, traditional Druze, coastal Syria) /zoːq/ , (chiefly Lebanon, traditional Druze, coastal Syria) /zawq/ , (chiefly Syria, rural, traditional Druze) /ðoːq/ , (chiefly Lebanon, rural, traditional Druze) /ðawq/
- (chiefly Syria, affected) /ðoːʔ/ , (chiefly Lebanon, affected) /ðawʔ/
- (chiefly Lebanon) /zawʔ/ , (chiefly Lebanon) /zoːʔ/
- (ذوقي) IPA^(key): (chiefly Syria, traditional Druze, coastal Syria) /ˈzoːqi/ , (chiefly Lebanon, traditional Druze, coastal Syria) /ˈzawqe/ , (chiefly Syria, rural, traditional Druze) /ˈðoːqi/ , (chiefly Lebanon, rural, traditional Druze) /ˈðawqe/
- (chiefly Syria, affected, rural) /ˈðoːʔi/ , (chiefly Lebanon, affected, rural) /ˈðawʔe/
- (chiefly Lebanon) /ˈzawʔe/
- (chiefly Syria) /ˈzoːʔi/
- (ثورة) IPA^(key): (chiefly Syria) /ˈsoːra/ , (chiefly Lebanon) /ˈsawra/ , (chiefly Syria, rural, affected) /ˈθoːra/ , (chiefly Lebanon, rural, affected) /ˈθawra/
- (كلب) IPA^(key): /kal(ᵊ)b/
No i/u schwa for default Syrian
Misc examples

(مزيتة) IPA^(key): /ˈmazjte/
/ˈmazjta/
(ضروري) IPA^(key): (chiefly Lebanon) /dˤaˈruːri/ , (chiefly Lebanon) /dˤaˈruːre/ , (rural, chiefly Lebanon) /ðˤaˈruːri/ , (chiefly Lebanon, rural) /ðˤaˈruːre/
(غراض) IPA^(key): /ʁraːdˤ/ , (rural) /ʁraːðˤ/
(حبيبي) IPA^(key): (chiefly Lebanon) /ħaˈbiːbi/ , (chiefly Lebanon) /ħaˈbiːbe/
(ثور) IPA^(key): (chiefly Syria, Lebanon) /toːr/ , (chiefly Lebanon) /tawr/ , (chiefly Syria, rural) /θoːr/ , (chiefly Lebanon, rural) /θawr/
(ذوق, ban=ðoːʔ) IPA^(key): (chiefly Syria, traditional Druze, coastal Syria) /zoːq/ , (chiefly Lebanon, traditional Druze, coastal Syria) /zawq/ , (chiefly Syria, rural, traditional Druze) /ðoːq/ , (chiefly Lebanon, rural, traditional Druze) /ðawq/
(chiefly Lebanon, affected) /ðawʔ/
(chiefly Lebanon) /zawʔ/ , (chiefly Lebanon) /zoːʔ/
(آي) IPA^(key): (chiefly Syria) /ʔaːj/ , (chiefly Lebanon, regional) /ʔeːj/
(صاي) IPA^(key): /sˤaːj/
(صاو) IPA^(key): /sˤaːw/
(بي) IPA^(key): /bajj/
(غير) IPA^(key): /ˈʁajjar/
(بين) IPA^(key): (chiefly Syria, Lebanon) /beːn/ , (chiefly Lebanon) /bajn/
(غميضة) IPA^(key): (chiefly Syria) /ʁomˈmeːdˤa/ , (chiefly Lebanon) /ʁomˈmajdˤa/ , (chiefly Syria, rural) /ʁomˈmeːðˤa/ , (chiefly Lebanon, rural) /ʁomˈmajðˤa/
(كب) IPA^(key): /kabb/
(كلب) IPA^(key): /kal(ᵊ)b/
(آي) IPA^(key): /ʔˤaːj/
(آي) IPA^(key): /ʁˤabb/
local export = {}

local lang = require("Module:languages").getByCode("apc")
local sc = require("Module:scripts").getByCode("Arab")

local function shallow_copy(table)
	local new = {}
	for k, v in pairs(table) do
		new = v
	end
	return new
end

local function extended(table, with)
	local new = shallow_copy(table)
	for k, v in pairs(with) do
		new = v
	end
	return new
end

local function extended_arr(arr, with)
	local new = shallow_copy(arr)
	for _, v in ipairs(with) do
		new = v
	end
	return new
end

local function extended_arr_unique(arr, with)
	local new = shallow_copy(arr)
	for _, v in ipairs(with) do
		local add = true
		for _, already in ipairs(new) do
			if already == v then
				add = false
			end
		end
		if add then
			new = v
		end
	end
	return new
end

local substitutions = {
	 = "ءا",
	 = "ء",
	 = "ء",
	 = "ء",
	 = "تش"
}

-- categories to be added to a page as if "{{cln|apc|terms with <text goes here>}}"
local cats = { -- meow
	g = "/ɡ/",
	q_for_g = "/ɡ/ spelled ⟨ق⟩",
	p = "/p/",
	v = "/v/",
	ue = "/y/",
	oe = "/ø/",
	an = "adverbial -an",
	re = "feminine -re",
	nasalization = "nasalization",
	msa_interdentals = "Modern Standard Arabic interdentals"
}

-- too smallbrain to get at first blush how to do real OOP in lua so this works
local function spell(t)
	local methods = {
		with = function(self, options)
			return extended(self, options)
		end,

		cat = function(self, categories)
			local new = shallow_copy(self)
			if not new.categories then
				new.categories = {}
			end
			new.categories = extended_arr(new.categories, categories)
			return new
		end,

		spelled = function(self, spellings)
			return extended_arr(self, spellings)
		end,

		emph = function(self, spellings)
			if not spellings then
				spellings = {}
			end
			local new = self:with { emphatic = true }
			for i = #new, 1, -1 do
				local upper = mw.ustring.upper(new)
				if upper == new then
					new = nil
				else
					new = upper
				end
			end
			return extended_arr(new, spellings)
		end,

		named = function(self, name)
			return self:with { name = name }
		end
	}
	return extended(t, methods):named(t)
end

-- table of reusable segment spellings
local sp = {
	a = spell { "a" }:with { vowel = true },
	A = spell { "A" }:with { vowel = true },
	e = spell { "e" }:with { vowel = true },
	i = spell { "i" }:with { vowel = true },
	I = spell { "I" }:with { vowel = true }, -- mandatory tense
	o = spell { "o" }:with { vowel = true },
	oe = spell { "oe", "ö" }:with { vowel = true },
	u = spell { "u" }:with { vowel = true },
	U = spell { "U" }:with { vowel = true }, -- mandatory tense
	ue = spell { "ue", "ü" }:with { vowel = true },
	glottal_stop = spell { "2", "ʔ" },
	h = spell { "h" },
	ayn = spell { "3", "ʕ" }:with { pharyngeal = true },
	heth = spell { "7", "ḥ", "ħ" }:with { pharyngeal = true },
	q = spell { "q" }:with { uvular = true },
	gh = spell { "gh", "8", "ḡ" }:with { uvular = true },
	kh = spell { "kh", "x", "5", "ḵ", "ḫ" }:with { uvular = true },
	g = spell { "g" }:cat { cats.g },
	k = spell { "k" },
	y = spell { "y" }:with { semivowel = true },
	j = spell { "j" },
	l = spell { "l" },
	r = spell { "r" },
	n = spell { "n" },
	d = spell { "d" },
	t = spell { "t" },
	z = spell { "z" },
	s = spell { "s" },
	dh = spell { "dh" },
	th = spell { "th" },
	w = spell { "w" }:with { semivowel = true },
	m = spell { "m" },
	b = spell { "b" },
	p = spell { "p" }:cat { cats.p },
	v = spell { "v" }:cat { cats.v },
	f = spell { "f" },

	new = function(spellings)
		return spell(spellings)
	end,
}

local acceptable_spellings = {
	 = {
		sp.glottal_stop,
	},
	 = {
		sp.a,
		sp.A:with { affected = true }, -- for full-fus7a words or loanwords
		sp.e,                    -- for lebanese spellings, esp of loanwords?
		sp.n:cat { cats.an },
	},
	 = {
		sp.b,
		sp.p,
	},
	 = {
		sp.j,
		sp.g,
	},
	 = {
		sp.d,
		sp.dh:spelled { "z" }:named("dh")
	},
	 = {
		sp.h,
	},
	 = {
		sp.t,
		sp.e:named("fem_e"),
		sp.a:named("fem_a"),
	},
	 = {
		sp.w,
		sp.u,
		sp.U,
		sp.o,
		sp.ue,
		sp.oe,
	},
	 = {
		sp.z,
		sp.dh:with { affected = true }:cat { cats.msa_interdentals },
	},
	 = {
		sp.heth,
	},
	 = {
		sp.t:emph { "ṭ" },
	},
	 = {
		sp.y,
		sp.i,
		sp.I,
		sp.e,
	},
	 = {
		sp.a,
	},
	 = {
		sp.k,
		sp.g,
	},
	 = {
		sp.g,
	},
	 = {
		sp.l,
	},
	 = {
		sp.m,
	},
	 = {
		sp.n,
		sp.new { "~" }:named("nasalization"):with { suprasegmental = true },
	},
	 = {
		sp.s,
	},
	 = {
		sp.ayn,
	},
	 = {
		sp.f,
		sp.v,
	},
	 = {
		sp.v,
	},
	 = {
		sp.s:emph { "ṣ" },
	},
	 = {
		sp.q:emph():with { affected = true },
		sp.q,
		sp.glottal_stop,     -- for loanwords or alternative spellings?
		sp.g:cat { cats.q_for_g }, -- for "bedouin" words if needed
	},
	 = {
		sp.r,
		sp.r:emph { "ṛ" },
	},
	 = {
		sp.sh,
	},
	 = {
		sp.t,
		sp.th:spelled { "s" },
	},
	 = {
		sp.s:spelled { "s" }:named("th"):with { affected = true }:cat { cats.msa_interdentals },
		sp.t:named("th"),
	},
	 = {
		sp.kh,
	},
	 = {
		sp.z:spelled { "z" }:named("dh"):with { affected = true }:cat { cats.msa_interdentals },
		sp.d:named("dh"),
	},
	 = {
		sp.d:named("dh"):emph(),
		sp.z:named("dh"):emph():with { affected = true },
	},
	 = {
		sp.d:named("dh"):emph(),
		sp.z:named("dh"):emph():with { affected = true },
	},
	 = {
		sp.gh,
	},
	 = {
		sp.new { "*" }:named("force"):with { suprasegmental = true, erase = true },
		sp.new { '"', "ˈ" }:named("stress"):with { suprasegmental = true },
		sp.new { "." }:named("syllable"):with { suprasegmental = true },
		sp.new { "^", "ˤ" }:named("emphatic"):with { suprasegmental = true },
		sp.new { ":", "ː" }:named("length"):with { suprasegmental = true },
		sp.new { "(" }:with { suprasegmental = true },
		sp.new { ")" }:with { suprasegmental = true },
	},
}

local spelling_to_segment_map = {}

for _ar_char, spellings in pairs(acceptable_spellings) do
	for _, segment in ipairs(spellings) do
		if not segment.suprasegmental then
			for _, spelling in ipairs(segment) do
				spelling_to_segment_map = segment
			end
		end
	end
end

local ipa = {
	nasalization = { phonemic = "̃", phonetic = "̃" },
	emphatic = { phonemic = "ˤ", phonetic = "ʶ" }, -- dunno about uvularization but maybe (wish we could have variants here, uvular/velar/etc)
	syllable = { phonemic = "", phonetic = "" },
	stress = { phonemic = "ˈ", phonetic = "ˈ" },
	length = { phonemic = "ː", phonetic = "ː" },
	schwa = { phonemic = "(ᵊ)", phonetic = "(e)" }, -- could add round_schwa and unround_schwa with diff phonetic values
	low_back_a = { phonemic = "a", phonetic = "ɑ" },
	plain_a = { phonemic = "a", phonetic = "a" },
	front_a = { phonemic = "a", phonetic = "æ" },
	e = { phonemic = "e", phonetic = "e" },
	lax_medial_i = { phonemic = "e", phonetic = "e" }, -- could be phonemic = "i"
	tense_medial_i = { phonemic = "i", phonetic = "i" },
	final_i = { phonemic = "i", phonetic = "i" },
	vocalic_y = { phonemic = "j", phonetic = "i" },
	o = { phonemic = "o", phonetic = "o" },
	lax_medial_u = { phonemic = "o", phonetic = "o" }, -- could be phonemic = "u"
	tense_medial_u = { phonemic = "u", phonetic = "u" },
	final_u = { phonemic = "u", phonetic = "u" },
	vocalic_w = { phonemic = "w", phonetic = "u" },
	w = { phonemic = "w", phonetic = "w" },
	m = { phonemic = "m", phonetic = "m" },
	v = { phonemic = "v", phonetic = "v" },
	f = { phonemic = "f", phonetic = "f" },
	b = { phonemic = "b", phonetic = "b" },
	p = { phonemic = "p", phonetic = "p" },
	n = { phonemic = "n", phonetic = "n" },
	z = { phonemic = "z", phonetic = "z" },
	s = { phonemic = "s", phonetic = "s" },
	d = { phonemic = "d", phonetic = "d" },
	t = { phonemic = "t", phonetic = "t" },
	dh = { phonemic = "ð", phonetic = "ð" },
	th = { phonemic = "θ", phonetic = "θ" },
	trilled_r = { phonemic = "r", phonetic = "r" },
	flapped_r = { phonemic = "r", phonetic = "ɾ" },
	l = { phonemic = "l", phonetic = "l" },
	ch = { phonemic = "tʃ", phonetic = "tʃ" },
	zh = { phonemic = "ʒ", phonetic = "ʒ" },
	sh = { phonemic = "ʃ", phonetic = "ʃ" },
	y = { phonemic = "j", phonetic = "j" },
	g = { phonemic = "ɡ", phonetic = "ɡ" },
	k = { phonemic = "k", phonetic = "k" },
	gh = { phonemic = "ʁ", phonetic = "ʁ" }, -- could be phonetic = "ɣ"
	kh = { phonemic = "χ", phonetic = "χ" }, -- could be phonetic = "x"
	q = { phonemic = "q", phonetic = "q" },
	ayn = { phonemic = "ʕ", phonetic = "ʕ" },
	heth = { phonemic = "ħ", phonetic = "ħ" },
	h = { phonemic = "h", phonetic = "h" },
	glottal_stop = { phonemic = "ʔ", phonetic = "ʔ" },
}

for name, o in pairs(ipa) do
	o.name = name
end

local function ifelse(condition, a, b)
	if condition then
		return a
	end
	return b
end

local function canonize_tr(tr, spellings)
	local found_segment
	for _, spelling in ipairs(spellings) do
		-- XXX: is require() fine to call in hot-path code like this?
		if require("Module:table").contains(spelling, tr) then
			-- if a sub-array in spellings has no `name` field, then the first
			-- listed spelling variant counts as the canonical name
			found_segment = spelling
			break
		end
	end
	return found_segment
end

local function string_to_array(translit)
	local translit_array = {}
	local currently_polygraph = (
		mw.ustring.match(mw.ustring.sub(translit, 1, 1), "%s")
		or select(2, mw.ustring.gsub(translit, "%s", "")) % 2 == 1
	)
	for chars in string.gmatch(translit, "%S+") do
		if currently_polygraph then
			translit_array = chars
		else
			for i = 1, #chars do
				translit_array = chars:sub(i, i)
			end
		end
		currently_polygraph = not currently_polygraph
	end
	return translit_array
end

local function vocalize(target, translit)
	local error_needs_force
	local previous_found_spelling
	local found_segment
	local result = {}
	local ar_i = 1
	local tr_i = 1
	local b = {}

	translit = string_to_array(translit)
	while true do
		local tr = translit
		local ar = mw.ustring.sub(target, ar_i, ar_i)
		b = tr .. ar

		local spellings = acceptable_spellings
		if not spellings then
			error("Unrecognized character " .. ar .. " in Arabic script")
		end

		previous_found_spelling = found_segment
		found_segment = canonize_tr(tr, spellings) or canonize_tr(tr, acceptable_spellings)


		-- ar: k, dh, a, b
		-- tr: k, a, z, ", z, a, a, b
		--
		-- k k: found spelling -> k
		-- k (a): not found spelling, vowel -> a
		-- (dh) (z): found spelling -> fus7a dh
		-- dh ("): found spelling, suprasegmental -> stress
		-- dh (z): found spelling -> fus7a dh
		-- dh (a): not found spelling, vowel -> a
		-- (a) (a): found spelling, vowel -> a
		-- (b) b: found spelling -> b

		-- ar: k, l, m, ة
		-- tr: k, i, l, ., m, e
		--
		-- k k: found spelling
		-- k (i): not found spelling, vowel -> i
		-- (l) (l): found spelling -> l
		-- l (m): not found spelling -> m
		-- l (e)

		-- ar: k, dh, b
		-- tr: ", k, a, z, ., z, a, b
		--
		-- k ": found spelling, suprasegmental -> stress
		-- k (k): found spelling -> k
		-- k (a): not found spelling, vowel -> a
		-- (dh) (z): found spelling -> fus7a dh
		-- dh (.): found spelling, suprasegmental -> syllable
		-- dh (z): found spelling -> fus7a dh
		-- dh (a): not found spelling, vowel -> a
		-- (b) (b): found spelling -> b


		-- ar: T, y, a, r, ة
		-- tr: t, i, y, y, a, a, r, a
		--
		-- T t: not found spelling
		-- T (i): not found spelling, vowel -> error

		-- ar: T, y, a, r, ة
		-- tr: t, *, i, y, y, a, a, r, a
		--
		-- T t: found spelling -> t
		-- T (*): found spelling, force -> /
		-- T (i): not found spelling, vowel -> i
		-- (y) (y): found spelling -> y
		-- y (y): found spelling -> y
		-- y (a): not found spelling, vowel -> a
		-- (a) (a): found spelling, vowel -> a
		-- (r) (r): found spelling -> r
		-- r (a): not found spelling, vowel -> a
		-- (ة) a: not found spelling, allows nil -> pass


		if not found_segment and not previous_found_spelling then
			error(error_needs_force or "Unrecognized spelling")
		elseif not found_segment then
			-- XXX: currently we allow any suprasegmental character to behave like "*" in stopping error_needs_force from being raised lol

			found_segment = spelling_to_segment_map
			if not found_segment then
				error("Unrecognized transliteration character " .. tr)
			end
			if found_segment.name == "emphatic" then
				result = result:with { emphatic = true }
			elseif not found_segment.erase then
				result = found_segment
			end

			-- error to possibly be thrown on next iteration
			error_needs_force = (
				"Unexpected transliteration of Arabic " .. ar .. ": " .. tr ..
				". If this is intentional, spell it as " .. tr .. "*, with an asterisk."
			)

			if found_segment.vowel then
				ar_i = ar_i + 1
			end
			tr_i = tr_i + 1
		else
			if found_segment.name == "emphatic" then
				result = result:with { emphatic = true }
			elseif not found_segment.erase then
				result = found_segment
			end
			if found_segment.vowel then
				ar_i = ar_i + 1
			end
			tr_i = tr_i + 1
		end

		if tr_i > #translit or ar_i > mw.ustring.len(target) then
			break
		end
	end

	return result
end

local function gather_categories(translit)
	local clns = {}
	for _, v in ipairs(translit) do
		if v.categories then
			for _, category in ipairs(v.categories) do
				clns = "North Levantine Arabic terms with " .. category
			end
		end
	end
	return clns
end

local function _bake_forwards_env_of(env, token, next_token, next_seg)
	if token.name == "syllable" or token.name == "stress" then
		env.position_in_coda = nil
		env.after_pharyngeal = false
		env.after_other_back = false
		env.stressed = token.name == "stress"
	end

	if token.vowel then
		if env.position_in_coda and env.position_in_coda > 0 then
			error("Multiple vowels in a syllable")
		end
		env.position_in_coda = 0
	end

	if env.position_in_coda and not token.vowel then
		env.position_in_coda = env.position_in_coda + 1
	end

	if env.position_in_coda and env.position_in_coda > 0 then
		-- will be backfilled during the reverse pass
		env.closed_syllable = true
	end

	if not env.position_in_coda and token.pharyngeal then
		env.after_pharyngeal = true
	end

	if not env.position_in_coda and token.uvular then
		env.after_other_back = true
	end
	-- not accounting for when h can back stuff, use |ban= or spell alif as AA if need to avoid imala

	env.first_of_long = false
	if next_token and next_token.name == token.name and next_token.vowel and token.vowel then
		env.long_vowel = true
		env.first_of_long = true
	end

	if not token.vowel then
		env.long_vowel = false
	end

	if next_token and next_seg.name == token.name and not next_seg.vowel and not token.vowel then
		env.geminate = token.name
		env.first_of_long = true
	end

	if token.vowel or token.name ~= env.geminate then
		env.geminate = false
	end

	if token.name == "a" and not env.long_vowel and next_token and next_token.semivowel then
		env.diphthong = next_token.name
	end

	if env.diphthong and token.name ~= "a" and not token.semivowel then
		-- this probably leaves false positives for word-final ayy and aww, fixed below
		env.diphthong = false
	end

	if token.emphatic then
		env.emphatic = true
	end

	-- simplistic rule: any i stops emphasis spread
	if token.name == "i" then
		env.emphatic = false
	end
end

local function _bake_backwards_env_of(env, token, next_token, trackers)
	if next_token and next_token.name == token.name and next_token.vowel and token.vowel then
		env.long_vowel = true
	end

	if env.emphatic or token.emphatic then
		trackers.backwards_emphasis_spread = true
	end

	if token.name == "i" then
		trackers.backwards_emphasis_spread = false
	end

	if env.closed_syllable then
		trackers.in_closed_syllable = true
	end

	if env.diphthong and env.geminate then
		trackers.consecutive_semivowel = true
		env.diphthong = false
	end

	if env.diphthong and trackers.consecutive_semivowel then
		env.diphthong = false
	end

	if token.vowel then
		trackers.consecutive_semivowel = false
	end

	env.closed_syllable = trackers.in_closed_syllable
	env.final_syllable = trackers.in_final_syllable
	env.emphatic = trackers.backwards_emphasis_spread

	if token.name == "syllable" then
		trackers.in_final_syllable = false
		trackers.in_closed_syllable = false
	end
end

local function bake_environment(tokens)
	local segment_envs = {}
	local prev_env = {
		final_syllable = false,
		word_end = false,
		position_in_coda = nil,
		closed_syllable = false,
		stressed = false,
		emphatic = false,
		after_pharyngeal = false,
		after_other_back = false,
		long_vowel = false,
		geminate = false,
		first_of_long = false,
		diphthong = false,
	}

	for i, token in ipairs(tokens) do
		local env = {}
		for k, v in pairs(prev_env) do
			env = v
		end
		prev_env = env
		local next_token = tokens
		local next_seg = next_token
		if next_seg and next_seg.suprasegmental then
			next_seg = tokens
		end

		_bake_forwards_env_of(env, token, next_token, next_seg)

		segment_envs = env
	end

	local trackers = {
		backwards_emphasis_spread = false,
		in_final_syllable = true,
		in_closed_syllable = false,
		consecutive_semivowel = false,
	}
	for i = #segment_envs, 1, -1 do
		local environment = segment_envs
		local token = tokens
		local next_token = tokens

		_bake_backwards_env_of(environment, token, next_token, trackers)
	end

	-- set up word end
	-- does not cover diphthongs which afaict is fine, this is more for short vowels anyway
	for i = #segment_envs, 1, -1 do
		if not tokens.vowel then
			break
		end
		segment_envs.word_end = true
	end

	return segment_envs
end

local function make_ipa(tokens, segment_environments)
	-- i want to stop this logic from blowing up too much
	-- i only want to handle:
	--  * interdentals: affected vs native with vs native without
	--  * q: main glottal stop variant vs "druze, coastal syrian" q variant
	--  * imala: variant with "chiefly lebanon" -- ignored around emphatics, which if needed for eg طاولة or
	--      for coastal dialects can be done by explicitly specify different options
	--  * diphthongs: "chiefly lebanon, regional" option vs default monophthongized option (not sure if final-syllable rule would be too much to add)
	--  * final vowels:
	--    * -e > -i with "lebanon, regional"
	--    * -i > -e with "regional, chiefly lebanon"
	--  * other vowels:
	--    * short i > short e, short o > short u in stressed syllables and non-final closed syllables
	--    * regional lebanese iC#  and uC#  will be frustrating to implement here, not sure
	--  * epenthetics: always present, in parentheses (prob can't do beqaa/qalamoun CV:CC unfortunately)
	--
	-- this will give false positives, which is what ban= will be for
	-- (also i think automatic handling of variation in u/i would be too much here unfortunately)

	-- should this include tracking for v, p, g, etc in order to offer variants where they're pronounced as f/b/k?
	-- for now that can be manual i guess
	local features = {
		interdentals = {
			affected = { replacement = { z = ipa.dh, s = ipa.th } },
			native = { replacement = { d = ipa.dh, z = ipa.dh, t = ipa.th, s = ipa.th } },
		},
		epenthetics = {
			absent = { replacement = { schwa = "" } }, -- epenthetic = null
		},
		fem = {
			affected_plain_a = { replacement = { e = ipa.plain_a } },
			affected_emphatic_a = { replacement = { e = ipa.low_back_a } },
		},
		q = {
			native = {
				-- Q and q = q
				replacement = { glottal_stop = ipa.q },
			},
			affected_k = {
				-- Q = k like 2iktiSaad
				replacement = { glottal_stop = ipa.k, emphatic = "" },
			},
		},
		imala = {
			none = {
				replacement = { front_a = ipa.front_a, plain_a = ipa.plain_a },
			},
			only_plain = {
				replacement = { front_a = ipa.e },
			},
			plain_or_back = {
				replacement = { front_a = ipa.e, plain_a = ipa.e },
			},
		},
		final_vowels = {
			lebanese_i = {
				-- final -e = -i
				replacement = { e = ipa.i },
			},
			lebanese_lax = {
				-- final -i = -e
				replacement = { final_i = ipa.e, u = ipa.o },
			},
		},
		diphthongs = {
			present = {
				replacement = {
					plain_a = ipa.plain_a,
					front_a = ipa.front_a,
					low_back_a = ipa.low_back_a,
					w = ipa.w,
					y = ipa.y,
				},
			},
			absent_medial_w = {
				replacement = {
					plain_a = ipa.o,
					front_a = ipa.o,
					low_back_a = ipa.o,
					w = ipa.length,
				},
			},
			absent_medial_y = {
				replacement = {
					plain_a = ipa.e,
					front_a = ipa.e,
					low_back_a = ipa.e,
					y = ipa.length,
				},
			},
			absent_final_w = {
				replacement = {
					plain_a = ipa.o,
					front_a = ipa.o,
					low_back_a = ipa.o,
					w = ipa.length,
				},
			},
			absent_final_y = {
				replacement = {
					plain_a = ipa.e,
					front_a = ipa.e,
					low_back_a = ipa.e,
					y = ipa.length,
				},
			},
		},
		-- TODO add options for french vowels
	}
	local accents = {
		{
			features.diphthongs.absent_medial_w,
			features.diphthongs.absent_medial_y,
			features.diphthongs.absent_final_w,
			features.diphthongs.absent_final_y,
			features.imala.none,
			name = "broad_syria",
			variants = {
				{ features.q.affected_2 },
			},
			accent = { "chiefly", "Syria" },
		},
		{
			features.imala.only_plain,
			features.final_vowels.lebanese_lax,
			features.diphthongs.present,
			name = "broad_lebanon",
			accent = { "chiefly", "Lebanon" },
			variants = {
				{ features.q.affected_k },
				{ features.imala.plain_or_back,     accent = { "regional" } },
				{ features.final_vowels.lebanese_i, accent = { "regional" } },
				{
					features.diphthongs.absent_final_w,
					features.diphthongs.absent_final_y,
				}
			},
		},
		{
			features.interdentals.affected,
			name = "affected",
			accent = { "affected" },
			variants = {
				{
					features.fem.affected_emphatic_a,
					features.fem.affected_plain_a,
				},
			},
			atop = { "broad_syria", "broad_lebanon" },
		},
		{
			features.interdentals.native,
			name = "broad_rural",
			accent = { "!rural" },
			atop = { "broad_syria", "broad_lebanon" }
		},
		{
			features.q.native,
			name = "druze",
			accent = { "traditional <<Druze>>" },
			atop = { "broad_syria", "broad_lebanon", "broad_rural" },
		},
		{
			features.q.native,
			name = "coastal_syria",
			accent = { "coastal <<Syria>>" },
			atop = { "broad_syria", "broad_lebanon" },
		},
	}
	local ipa_chars = {
		add = function(self, default, tables, previous)
			if not default then
				error("nil ipa char after " .. self.name)
			end
			self = default
			self:variant(tables, previous)
		end,

		variant = function(self, tables, previous)
			for _, t in ipairs(tables or {}) do
				if previous then
					t = #self - 1
				end
				t = #self
			end
		end
	}


	for i, tok in ipairs(tokens) do
		local env = segment_environments
		local prev_env = segment_environments

		if env.position_in_coda == 2 and not prev_env.diphthong and not env.geminate and not tok.semivowel then
			ipa_chars:add(ipa.schwa, { features.epenthetics.absent })
		end
		if env.position_in_coda and env.position_in_coda > 0 and tok.vowel then
			error("Multiple vowels in a syllable")
		end

		if tok.name == "syllable" then
			ipa_chars:add(ipa.syllable)
		elseif tok.name == "stress" then
			ipa_chars:add(ipa.stress)
		elseif tok.name == "(" then
			ipa_chars:add("(")
		elseif tok.name == ")" then
			ipa_chars:add(")")
		elseif tok.vowel and env.long_vowel and not env.first_of_long then
			ipa_chars:add(ipa.length)
		elseif tok.name == "q" then
			ipa_chars:add(
				ifelse(tok.affected, ipa.q, ipa.glottal_stop),
				ifelse(
					tok.affected,
					{
						features.q.native,
						features.q.affected_k,
						features.q.affected_q,
					},
					{ features.q.native }
				)
			)
		elseif tok.name == "dh" then
			ipa_chars:add(
				ifelse(tok.affected, ipa.z, ipa.d),
				ifelse(
					tok.affected,
					{ features.interdentals.affected, features.interdentals.native },
					{ features.interdentals.native }
				)
			)
		elseif tok.name == "th" then
			ipa_chars:add(
				ifelse(tok.affected, ipa.s, ipa.t),
				ifelse(
					tok.affected,
					{ features.interdentals.affected, features.interdentals.native },
					{ features.interdentals.native }
				)
			)
		elseif env.diphthong and not tok.vowel then
			local key = "absent_medial_"
			if env.final_syllable then
				key = "absent_final_"
			end
			ipa_chars:add(
				ipa,
				{
					features.diphthongs,
					features.diphthongs.present,
				}
			)
		elseif tok.name == "a" and env.long_vowel and not env.emphatic then
			-- should plain_or_back just be back and not include plain? would that make it easier to generate the final display forms for the wiki?
			ipa_chars:add(
				ipa.front_a,
				ifelse(
					env.after_other_back,
					{ features.imala.plain_or_back },
					{
						features.imala.only_plain,
						features.imala.plain_or_back,
					}
				)
			)
			ipa_chars:variant {
				features.imala.none,
			}
		elseif tok.name == "a" and (env.after_pharyngeal or env.word_end) and not env.emphatic then
			ipa_chars:add(ipa.plain_a, ifelse(env.long_vowel, { features.imala.plain_or_back }, {}))
			if env.diphthong then
				local key = "absent_medial_"
				if env.final_syllable then
					key = "absent_final_"
				end
				ipa_chars:variant {
					features.diphthongs,
					features.diphthongs.present,
				}
			end
		elseif tok.name == "A" or tok.name == "a" then
			ipa_chars:add(
				ifelse(
					env.emphatic,
					ipa.low_back_a,
					ifelse(
						env.diphthong == "w",
						ipa.plain_a,
						ipa.front_a
					)
				)
			)
			if env.diphthong then
				local key = "absent_medial_"
				if env.final_syllable then
					key = "absent_final_"
				end
				ipa_chars:variant {
					features.diphthongs,
					features.diphthongs.present,
				}
			end
		elseif (
				(tok.name == "i" or tok.name == "u")
				and env.long_vowel
				and env.closed_syllable
				and #segment_environments >= i + 2
				and segment_environments.geminate
				and segment_environments.name ~= "syllable"
			) then
			-- add nothing -- this is specifically cooked up for cases like grippe
			-- which i believe i have as /gri:pp/  (notice tense )
			-- i'm also cutting out tok.name == "a" just in case even though i don't think
			-- we have any false positives (eg this rule could erroneously produce
			-- /mawe:dd/  or /mawa:dd/  if i allowed it to apply to a,
			-- but i think the word is just /mawa:d/ /mawe:d/ anyway)
		elseif tok.name == "i" and env.long_vowel then
			ipa_chars:add(ipa.tense_medial_i)
		elseif tok.name == "u" and env.long_vowel then
			ipa_chars:add(ipa.tense_medial_u)
		elseif tok.name == "I" then
			ipa_chars:add(ipa.tense_medial_i)
		elseif tok.name == "i" then
			if not env.long_vowel and not env.word_end then
				ipa_chars:add(
					ifelse(
						env.stressed or env.closed_syllable,
						ipa.lax_medial_i,
						ipa.tense_medial_i
					)
				)
			elseif env.word_end then
				ipa_chars:add(ipa.final_i, { features.final_vowels.lebanese_lax })
			end
		elseif tok.name == "e" and env.word_end then
			ipa_chars:add(ipa.e, { features.final_vowels.lebanese_i })
		elseif tok.name == "U" then
			ipa_chars:add(ipa.tense_medial_u)
		elseif tok.name == "u" then
			if not env.long_vowel and not env.word_end then
				ipa_chars:add(
					ifelse(
						env.stressed or env.closed_syllable,
						ipa.lax_medial_u,
						ipa.tense_medial_u
					)
				)
			elseif env.word_end then
				ipa_chars:add(ipa.final_u, { features.final_vowels.lebanese_lax })
			end
		elseif tok.name == "o" and env.word_end then
			-- don't know of an -u dialect
			ipa_chars:add(ipa.final_o)
		elseif tok.name == "7" then
			ipa_chars:add(ipa.heth)
		elseif tok.name == "x" then
			ipa_chars:add(ipa.kh)
		elseif tok.name == "3" then
			ipa_chars:add(ipa.ayn)
		elseif tok.name == "2" then
			ipa_chars:add(ipa.glottal_stop)
		elseif tok.name == "c" then
			ipa_chars:add(ipa.ch)
		elseif tok.name == "8" then
			ipa_chars:add(ipa.gh)
		elseif tok.name == "j" then
			ipa_chars:add(ipa.zh)
		elseif tok.name == "r" then
			ipa_chars:add(ifelse(env.geminate, ipa.trilled_r, ipa.flapped_r))
		elseif tok.name == "fem_e" or tok.name == "fem_a" then
			ipa_chars:add(
				ifelse(
					tok.name == "fem_e",
					ipa.e,
					ifelse(env.emphatic, ipa.low_back_a, ipa.plain_a)
				),
				ifelse(
					env.emphatic,
					{ features.fem.affected_emphatic_a },
					{ features.fem.affected_plain_a }
				)
			)
		elseif tok.semivowel and env.position_in_coda == 2 and not env.geminate then
			ipa_chars:add(ipa)
		else
			-- the mapping from token name to ipa is really sloppy, haven't checked if every token name has a corresponding entry in ipa table
			ipa_chars:add(ipa or ipa or error("no ipa: " .. tok.name))
		end

		if tok.emphatic then
			ipa_chars:add(
				ipa.emphatic,
				ifelse(
					tok.name == "q" and tok.affected,
					{ features.q.affected_k },
					{}
				)
			)
		end
	end

	local accent_groups = {}
	for _, accent_features in ipairs(accents) do
		local group = { variants = {}, accent = accent_features.accent }
		accent_groups = group

		local atop = {}
		if accent_features.atop then
			-- accent_name: broad_lebanon, broad_syria
			for _, accent_name in ipairs(accent_features.atop) do
				-- src_grp: data for broad_lebanon, data for broad_syria
				for _, src_grp in ipairs(accent_groups) do
					-- src_grp has `accent =` already so only extend with name
					atop = extended(src_grp, { name = accent_name })
				end
			end
		else
			atop = { extended(ipa_chars, { name = "main", accent = {} }) }
		end

		for i, source_accent in ipairs(atop) do
			local current_accent = accent_features.accent or {}
			group = extended(source_accent, { accent = source_accent.accent or {} })
			--= extended(
			--	source_accent,
			--	{ accent = extended_arr(current_accent, source_accent.accent or {}) }
			--)
			local contributed = false
			-- repl: (indices of imala), (indices of q)
			for _, repl in ipairs(accent_features) do
				-- idx: (index of an imala), (index of a q)
				for _, idx in ipairs(repl) do
					contributed = true
					local ch = group
					if repl.replacement then
						group = repl.replacement
					end
				end
			end
			if contributed then
				group.accent = extended_arr(group.accent, current_accent)
			end
		end

		for _, variant_list in ipairs(accent_features.variants or {}) do
			local use = false
			for _, original in ipairs(group) do
				local new = extended(
					original,
					{
						-- comment this to not repeat/qualify the entire accent
						-- for variants (eg "chiefly Lebanon, regional" vs
						-- just "regional")
						--
						accent = extended_arr(
							original.accent or {},
							variant_list.accent or {}
						)
						-- accent = variant_list.accent or {}
					}
				)
				for _, variant in ipairs(variant_list) do
					if #variant > 0 then
						use = true
					end
					for _, idx in ipairs(variant) do
						local ch = original
						if variant.replacement then
							new = variant.replacement
						end
					end
				end
				if use then
					group.variants = new
				end
			end
		end
	end

	-- error(mw.dumpObject(accent_groups))

	local pronunciations = {}
	local seen_at_position = {}
	for _, group in pairs(accent_groups) do
		local g = { accent = group.accent }
		for _, pronunciation in ipairs(group) do
			local use = false
			local sap_recursive = seen_at_position
			local last_already_seen_pron = nil
			for _, ch in ipairs(pronunciation) do
				if not sap_recursive then
					use = true
					sap_recursive = {  = pronunciation }
				end
				last_already_seen_pron = sap_recursive
				sap_recursive = sap_recursive
			end
			if use then
				g = pronunciation
			else
				last_already_seen_pron.accent = extended_arr_unique(last_already_seen_pron.accent or {},
					pronunciation.accent or {})
			end
		end
		for _, variant in ipairs(group.variants) do
			local use = false
			local sap_recursive = seen_at_position
			local last_already_seen_variant = nil
			for _, ch in ipairs(variant) do
				if not sap_recursive then
					use = true
					sap_recursive = {  = variant }
				end
				last_already_seen_variant = sap_recursive
				sap_recursive = sap_recursive
			end
			if use then
				g = variant
			else
				last_already_seen_variant.accent = extended_arr_unique(last_already_seen_variant.accent or {},
					variant.accent or {})
			end
		end
		if #g > 0 then
			pronunciations = g
		end
	end

	return pronunciations
end

function export.show(frame)
	local params = {
		 = { required = false },
		 = { required = true },
		 = { required = false },
		 = { required = false, sublist = true },
	}

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local target = args
	local translit = args.tr
	local qual = args.q

	if not target then
		local title = mw.title.getCurrentTitle()
		if title.nsText == "Template" then
			target = "كلمة"
			translit = '"kil.me'
		else
			target = title.subpageText
		end
	end

	target = mw.ustring.gsub(target, ".", substitutions)
	local tokens = vocalize(target, translit)
	local pronunciations = make_ipa(tokens, bake_environment(tokens))

	local formatted_groups = {}
	for _, group in ipairs(pronunciations) do
		local formatted_pronunciations = {}
		for i, pronunciation in ipairs(group) do
			local phonemes = {}
			local phones = {}
			for ch_i, ch in ipairs(pronunciation) do
				if ch.phonemic or ch.phonetic then
					phonemes = ch.phonemic or ch.phonetic
					phones = ch.phonetic or ch.phonemic
				else
					-- it's a string probably
					phonemes = ch
					phones = ch
				end
			end
			local phonemic = "/" .. table.concat(phonemes) .. "/"
			local phonetic = ""
			local pron = phonemic .. " " .. phonetic
			local banden = false
			for _, pattern in ipairs(args.ban or {}) do
				if mw.ustring.find(pron, pattern) then
					banden = true
				end
			end
			if not banden then
				formatted_pronunciations = {
					pron = phonemic .. " " .. phonetic,
					a = pronunciation.accent,
				}
			end
		end
		if #formatted_pronunciations > 0 then
			if #formatted_groups == 0 then
				formatted_groups = require("Module:IPA").format_IPA_full {
					lang = lang,
					q = { qual },
					items = formatted_pronunciations
				}
			else
				formatted_groups = require("Module:IPA").format_IPA_multiple(
					lang,
					formatted_pronunciations
				)
			end
		end
	end

	return (
		require("Module:columns").create_list { column_count = 1, content = formatted_groups }
		.. require("Module:utilities").format_categories(gather_categories(tokens), lang)
	)
end

return export
Module:User:Still, when you think about it/apc-IPA

Stuff I want to fix before use

Misc examples

Wikious

Boobota

Sagapedia