local export = {}
local lang = require("Module:languages").getByCode("apc")
local sc = require("Module:scripts").getByCode("Arab")
local function shallow_copy(table)
local new = {}
for k, v in pairs(table) do
new = v
end
return new
end
local function extended(table, with)
local new = shallow_copy(table)
for k, v in pairs(with) do
new = v
end
return new
end
local function extended_arr(arr, with)
local new = shallow_copy(arr)
for _, v in ipairs(with) do
new = v
end
return new
end
local substitutions = {
= "ءا",
= "ء",
= "ء",
= "ء",
= "تش"
}
-- categories to be added to a page as if "{{cln|apc|terms with <text goes here>}}"
local cats = { -- meow
g = "/ɡ/",
q_for_g = "/ɡ/ spelled ⟨ق⟩",
p = "/p/",
v = "/v/",
ue = "/y/",
oe = "/ø/",
an = "adverbial -an",
re = "feminine -re",
nasalization = "nasalization",
msa_interdentals = "Modern Standard Arabic interdentals"
}
-- too smallbrain to get at first blush how to do real OOP in lua so this works
local function spell(t)
local methods = {
with = function(self, options)
return extended(self, options)
end,
cat = function(self, categories)
local new = shallow_copy(self)
if not new.categories then
new.categories = {}
end
new.categories = extended_arr(new.categories, categories)
return new
end,
spelled = function(self, spellings)
return extended_arr(self, spellings)
end,
emph = function(self, spellings)
if not spellings then
spellings = {}
end
local new = self:with { emphatic = true }
for i = #new, 1, -1 do
local upper = mw.ustring.upper(new)
if upper == new then
new = nil
else
new = upper
end
end
return extended_arr(new, spellings)
end,
named = function(self, name)
return self:with { name = name }
end
}
return extended(t, methods):named(t)
end
-- table of reusable segment spellings
local sp = {
a = spell { "a" }:with { vowel = true },
A = spell { "A" }:with { vowel = true },
e = spell { "e" }:with { vowel = true },
i = spell { "i" }:with { vowel = true },
I = spell { "I" }:with { vowel = true }, -- mandatory tense
o = spell { "o" }:with { vowel = true },
oe = spell { "oe", "ö" }:with { vowel = true },
u = spell { "u" }:with { vowel = true },
U = spell { "U" }:with { vowel = true }, -- mandatory tense
ue = spell { "ue", "ü" }:with { vowel = true },
glottal_stop = spell { "2", "ʔ" },
h = spell { "h" },
ayn = spell { "3", "ʕ" }:with { pharyngeal = true },
heth = spell { "7", "ḥ", "ħ" }:with { pharyngeal = true },
q = spell { "q" }:with { uvular = true },
gh = spell { "gh", "8", "ḡ" }:with { uvular = true },
kh = spell { "kh", "x", "5", "ḵ", "ḫ" }:with { uvular = true },
g = spell { "g" }:cat { cats.g },
k = spell { "k" },
y = spell { "y" }:with { semivowel = true },
j = spell { "j" },
l = spell { "l" },
r = spell { "r" },
n = spell { "n" },
d = spell { "d" },
t = spell { "t" },
z = spell { "z" },
s = spell { "s" },
dh = spell { "dh" },
th = spell { "th" },
w = spell { "w" }:with { semivowel = true },
m = spell { "m" },
b = spell { "b" },
p = spell { "p" }:cat { cats.p },
v = spell { "v" }:cat { cats.v },
f = spell { "f" },
new = function(spellings)
return spell(spellings)
end,
}
local acceptable_spellings = {
= {
sp.glottal_stop,
},
= {
sp.a,
sp.A:with { affected = true }, -- for full-fus7a words or loanwords
sp.e, -- for lebanese spellings, esp of loanwords?
sp.n:cat { cats.an },
},
= {
sp.b,
sp.p,
},
= {
sp.j,
sp.g,
},
= {
sp.d,
sp.dh:spelled { "z" }:named("dh")
},
= {
sp.h,
},
= {
sp.t,
sp.e:named("fem_e"),
sp.a:named("fem_a"),
},
= {
sp.w,
sp.u,
sp.U,
sp.o,
sp.ue,
sp.oe,
},
= {
sp.z,
sp.dh:with { affected = true }:cat { cats.msa_interdentals },
},
= {
sp.heth,
},
= {
sp.t:emph { "ṭ" },
},
= {
sp.y,
sp.i,
sp.I,
sp.e,
},
= {
sp.a,
},
= {
sp.k,
sp.g,
},
= {
sp.g,
},
= {
sp.l,
},
= {
sp.m,
},
= {
sp.n,
sp.new { "~" }:named("nasalization"):with { suprasegmental = true },
},
= {
sp.s,
},
= {
sp.ayn,
},
= {
sp.f,
sp.v,
},
= {
sp.v,
},
= {
sp.s:emph { "ṣ" },
},
= {
sp.q:emph():with { affected = true },
sp.q,
sp.glottal_stop, -- for loanwords or alternative spellings?
sp.g:cat { cats.q_for_g }, -- for "bedouin" words if needed
},
= {
sp.r,
sp.r:emph { "ṛ" },
},
= {
sp.sh,
},
= {
sp.t,
sp.th:spelled { "s" },
},
= {
sp.s:spelled { "s" }:named("th"):with { affected = true }:cat { cats.msa_interdentals },
sp.t:named("th"),
},
= {
sp.kh,
},
= {
sp.z:spelled { "z" }:named("dh"):with { affected = true }:cat { cats.msa_interdentals },
sp.d:named("dh"),
},
= {
sp.d:named("dh"):emph(),
sp.z:named("dh"):emph():with { affected = true },
},
= {
sp.d:named("dh"):emph(),
sp.z:named("dh"):emph():with { affected = true },
},
= {
sp.gh,
},
= {
sp.new { "*" }:named("force"):with { suprasegmental = true, erase = true },
sp.new { '"', "ˈ" }:named("stress"):with { suprasegmental = true },
sp.new { "." }:named("syllable"):with { suprasegmental = true },
sp.new { "^", "ˤ" }:named("emphatic"):with { suprasegmental = true },
sp.new { ":", "ː" }:named("length"):with { suprasegmental = true },
sp.new { "(" }:with { suprasegmental = true },
sp.new { ")" }:with { suprasegmental = true },
},
}
local spelling_to_segment_map = {}
for _ar_char, spellings in pairs(acceptable_spellings) do
for _, segment in ipairs(spellings) do
if not segment.suprasegmental then
for _, spelling in ipairs(segment) do
spelling_to_segment_map = segment
end
end
end
end
local ipa = {
nasalization = { phonemic = "̃", phonetic = "̃" },
emphatic = { phonemic = "ˤ", phonetic = "ʶ" }, -- dunno about uvularization but maybe (wish we could have variants here, uvular/velar/etc)
syllable = { phonemic = "", phonetic = "" },
stress = { phonemic = "ˈ", phonetic = "ˈ" },
length = { phonemic = "ː", phonetic = "ː" },
schwa = { phonemic = "(ᵊ)", phonetic = "(e)" }, -- could add round_schwa and unround_schwa with diff phonetic values
low_back_a = { phonemic = "a", phonetic = "ɑ" },
plain_a = { phonemic = "a", phonetic = "a" },
front_a = { phonemic = "a", phonetic = "æ" },
e = { phonemic = "e", phonetic = "e" },
lax_medial_i = { phonemic = "e", phonetic = "e" }, -- could be phonemic = "i"
tense_medial_i = { phonemic = "i", phonetic = "i" },
final_i = { phonemic = "i", phonetic = "i" },
vocalic_y = { phonemic = "j", phonetic = "i" },
o = { phonemic = "o", phonetic = "o" },
lax_medial_u = { phonemic = "o", phonetic = "o" }, -- could be phonemic = "u"
tense_medial_u = { phonemic = "u", phonetic = "u" },
final_u = { phonemic = "u", phonetic = "u" },
vocalic_w = { phonemic = "w", phonetic = "u" },
w = { phonemic = "w", phonetic = "w" },
m = { phonemic = "m", phonetic = "m" },
v = { phonemic = "v", phonetic = "v" },
f = { phonemic = "f", phonetic = "f" },
b = { phonemic = "b", phonetic = "b" },
p = { phonemic = "p", phonetic = "p" },
n = { phonemic = "n", phonetic = "n" },
z = { phonemic = "z", phonetic = "z" },
s = { phonemic = "s", phonetic = "s" },
d = { phonemic = "d", phonetic = "d" },
t = { phonemic = "t", phonetic = "t" },
dh = { phonemic = "ð", phonetic = "ð" },
th = { phonemic = "θ", phonetic = "θ" },
trilled_r = { phonemic = "r", phonetic = "r" },
flapped_r = { phonemic = "r", phonetic = "ɾ" },
l = { phonemic = "l", phonetic = "l" },
ch = { phonemic = "tʃ", phonetic = "tʃ" },
zh = { phonemic = "ʒ", phonetic = "ʒ" },
sh = { phonemic = "ʃ", phonetic = "ʃ" },
y = { phonemic = "j", phonetic = "j" },
g = { phonemic = "ɡ", phonetic = "ɡ" },
k = { phonemic = "k", phonetic = "k" },
gh = { phonemic = "ʁ", phonetic = "ʁ" }, -- could be phonetic = "ɣ"
kh = { phonemic = "χ", phonetic = "χ" }, -- could be phonetic = "x"
q = { phonemic = "q", phonetic = "q" },
ayn = { phonemic = "ʕ", phonetic = "ʕ" },
heth = { phonemic = "ħ", phonetic = "ħ" },
h = { phonemic = "h", phonetic = "h" },
glottal_stop = { phonemic = "ʔ", phonetic = "ʔ" },
}
for name, o in pairs(ipa) do
o.name = name
end
local function ifelse(condition, a, b)
if condition then
return a
end
return b
end
local function canonize_tr(tr, spellings)
local found_segment
for _, spelling in ipairs(spellings) do
-- XXX: is require() fine to call in hot-path code like this?
if require("Module:table").contains(spelling, tr) then
-- if a sub-array in spellings has no `name` field, then the first
-- listed spelling variant counts as the canonical name
found_segment = spelling
break
end
end
return found_segment
end
local function string_to_array(translit)
local translit_array = {}
local currently_polygraph = (
mw.ustring.match(mw.ustring.sub(translit, 1, 1), "%s")
or select(2, mw.ustring.gsub(translit, "%s", "")) % 2 == 1
)
for chars in string.gmatch(translit, "%S+") do
if currently_polygraph then
translit_array = chars
else
for i = 1, #chars do
translit_array = chars:sub(i, i)
end
end
currently_polygraph = not currently_polygraph
end
return translit_array
end
local function vocalize(target, translit)
local error_needs_force
local previous_found_spelling
local found_segment
local result = {}
local ar_i = 1
local tr_i = 1
local b = {}
translit = string_to_array(translit)
while true do
local tr = translit
local ar = mw.ustring.sub(target, ar_i, ar_i)
b = tr .. ar
local spellings = acceptable_spellings
if not spellings then
error("Unrecognized character " .. ar .. " in Arabic script")
end
previous_found_spelling = found_segment
found_segment = canonize_tr(tr, spellings) or canonize_tr(tr, acceptable_spellings)
-- ar: k, dh, a, b
-- tr: k, a, z, ", z, a, a, b
--
-- k k: found spelling -> k
-- k (a): not found spelling, vowel -> a
-- (dh) (z): found spelling -> fus7a dh
-- dh ("): found spelling, suprasegmental -> stress
-- dh (z): found spelling -> fus7a dh
-- dh (a): not found spelling, vowel -> a
-- (a) (a): found spelling, vowel -> a
-- (b) b: found spelling -> b
-- ar: k, l, m, ة
-- tr: k, i, l, ., m, e
--
-- k k: found spelling
-- k (i): not found spelling, vowel -> i
-- (l) (l): found spelling -> l
-- l (m): not found spelling -> m
-- l (e)
-- ar: k, dh, b
-- tr: ", k, a, z, ., z, a, b
--
-- k ": found spelling, suprasegmental -> stress
-- k (k): found spelling -> k
-- k (a): not found spelling, vowel -> a
-- (dh) (z): found spelling -> fus7a dh
-- dh (.): found spelling, suprasegmental -> syllable
-- dh (z): found spelling -> fus7a dh
-- dh (a): not found spelling, vowel -> a
-- (b) (b): found spelling -> b
-- ar: T, y, a, r, ة
-- tr: t, i, y, y, a, a, r, a
--
-- T t: not found spelling
-- T (i): not found spelling, vowel -> error
-- ar: T, y, a, r, ة
-- tr: t, *, i, y, y, a, a, r, a
--
-- T t: found spelling -> t
-- T (*): found spelling, force -> /
-- T (i): not found spelling, vowel -> i
-- (y) (y): found spelling -> y
-- y (y): found spelling -> y
-- y (a): not found spelling, vowel -> a
-- (a) (a): found spelling, vowel -> a
-- (r) (r): found spelling -> r
-- r (a): not found spelling, vowel -> a
-- (ة) a: not found spelling, allows nil -> pass
if not found_segment and not previous_found_spelling then
error(error_needs_force or "Unrecognized spelling")
elseif not found_segment then
-- XXX: currently we allow any suprasegmental character to behave like "*" in stopping error_needs_force from being raised lol
found_segment = spelling_to_segment_map
if not found_segment then
error("Unrecognized transliteration character " .. tr)
end
if found_segment.name == "emphatic" then
result = result:with { emphatic = true }
elseif not found_segment.erase then
result = found_segment
end
-- error to possibly be thrown on next iteration
error_needs_force = (
"Unexpected transliteration of Arabic " .. ar .. ": " .. tr ..
". If this is intentional, spell it as " .. tr .. "*, with an asterisk."
)
if found_segment.vowel then
ar_i = ar_i + 1
end
tr_i = tr_i + 1
else
if found_segment.name == "emphatic" then
result = result:with { emphatic = true }
elseif not found_segment.erase then
result = found_segment
end
if found_segment.vowel then
ar_i = ar_i + 1
end
tr_i = tr_i + 1
end
if tr_i > #translit or ar_i > mw.ustring.len(target) then
break
end
end
return result
end
local function gather_categories(translit)
local clns = {}
for _, v in ipairs(translit) do
if v.categories then
for _, category in ipairs(v.categories) do
clns = "North Levantine Arabic terms with " .. category
end
end
end
return clns
end
local function _bake_forwards_env_of(env, token, next_token, next_seg)
if token.name == "syllable" or token.name == "stress" then
env.position_in_coda = nil
env.after_pharyngeal = false
env.after_other_back = false
env.stressed = token.name == "stress"
end
if token.vowel then
if env.position_in_coda and env.position_in_coda > 0 then
error("Multiple vowels in a syllable")
end
env.position_in_coda = 0
end
if env.position_in_coda and not token.vowel then
env.position_in_coda = env.position_in_coda + 1
end
if env.position_in_coda and env.position_in_coda > 0 then
-- will be backfilled during the reverse pass
env.closed_syllable = true
end
if not env.position_in_coda and token.pharyngeal then
env.after_pharyngeal = true
end
if not env.position_in_coda and token.uvular then
env.after_other_back = true
end
-- not accounting for when h can back stuff, use |ban= or spell alif as AA if need to avoid imala
env.first_of_long = false
if next_token and next_token.name == token.name and next_token.vowel and token.vowel then
env.long_vowel = true
env.first_of_long = true
end
if not token.vowel then
env.long_vowel = false
end
if next_token and next_seg.name == token.name and not next_seg.vowel and not token.vowel then
env.geminate = token.name
env.first_of_long = true
end
if token.vowel or token.name ~= env.geminate then
env.geminate = false
end
if token.name == "a" and not env.long_vowel and next_token and next_token.semivowel then
env.diphthong = next_token.name
end
if env.diphthong and token.name ~= "a" and not token.semivowel then
-- this probably leaves false positives for word-final ayy and aww, fixed below
env.diphthong = false
end
if token.emphatic then
env.emphatic = true
end
-- simplistic rule: any i stops emphasis spread
if token.name == "i" then
env.emphatic = false
end
end
local function _bake_backwards_env_of(env, token, next_token, trackers)
if next_token and next_token.name == token.name and next_token.vowel and token.vowel then
env.long_vowel = true
end
if env.emphatic or token.emphatic then
trackers.backwards_emphasis_spread = true
end
if token.name == "i" then
trackers.backwards_emphasis_spread = false
end
if env.closed_syllable then
trackers.in_closed_syllable = true
end
if env.diphthong and env.geminate then
trackers.consecutive_semivowel = true
env.diphthong = false
end
if env.diphthong and trackers.consecutive_semivowel then
env.diphthong = false
end
if token.vowel then
trackers.consecutive_semivowel = false
end
env.closed_syllable = trackers.in_closed_syllable
env.final_syllable = trackers.in_final_syllable
env.emphatic = trackers.backwards_emphasis_spread
if token.name == "syllable" then
trackers.in_final_syllable = false
trackers.in_closed_syllable = false
end
end
local function bake_environment(tokens)
local segment_envs = {}
local prev_env = {
final_syllable = false,
word_end = false,
position_in_coda = nil,
closed_syllable = false,
stressed = false,
emphatic = false,
after_pharyngeal = false,
after_other_back = false,
long_vowel = false,
geminate = false,
first_of_long = false,
diphthong = false,
}
for i, token in ipairs(tokens) do
local env = {}
for k, v in pairs(prev_env) do
env = v
end
prev_env = env
local next_token = tokens
local next_seg = next_token
if next_seg and next_seg.suprasegmental then
next_seg = tokens
end
_bake_forwards_env_of(env, token, next_token, next_seg)
segment_envs = env
end
local trackers = {
backwards_emphasis_spread = false,
in_final_syllable = true,
in_closed_syllable = false,
consecutive_semivowel = false,
}
for i = #segment_envs, 1, -1 do
local environment = segment_envs
local token = tokens
local next_token = tokens
_bake_backwards_env_of(environment, token, next_token, trackers)
end
-- set up word end
-- does not cover diphthongs which afaict is fine, this is more for short vowels anyway
for i = #segment_envs, 1, -1 do
if not tokens.vowel then
break
end
segment_envs.word_end = true
end
return segment_envs
end
local function make_ipa(tokens, segment_environments)
-- i want to stop this logic from blowing up too much
-- i only want to handle:
-- * interdentals: affected vs native with vs native without
-- * q: main glottal stop variant vs "druze, coastal syrian" q variant
-- * imala: variant with "chiefly lebanon" -- ignored around emphatics, which if needed for eg طاولة or
-- for coastal dialects can be done by explicitly specify different options
-- * diphthongs: "chiefly lebanon, regional" option vs default monophthongized option (not sure if final-syllable rule would be too much to add)
-- * final vowels:
-- * -e > -i with "lebanon, regional"
-- * -i > -e with "regional, chiefly lebanon"
-- * other vowels:
-- * short i > short e, short o > short u in stressed syllables and non-final closed syllables
-- * regional lebanese iC# and uC# will be frustrating to implement here, not sure
-- * epenthetics: always present, in parentheses (prob can't do beqaa/qalamoun CV:CC unfortunately)
--
-- this will give false positives, which is what ban= will be for
-- (also i think automatic handling of variation in u/i would be too much here unfortunately)
-- should this include tracking for v, p, g, etc in order to offer variants where they're pronounced as f/b/k?
-- for now that can be manual i guess
local features = {
interdentals = {
affected = { replacement = { z = ipa.dh, s = ipa.th } },
native = { replacement = { d = ipa.dh, z = ipa.dh, t = ipa.th, s = ipa.th } },
},
epenthetics = {
absent = { replacement = { schwa = "" } }, -- epenthetic = null
},
fem = {
affected_plain_a = { replacement = { e = ipa.plain_a } },
affected_emphatic_a = { replacement = { e = ipa.low_back_a } },
},
q = {
native = {
-- Q and q = q
replacement = { glottal_stop = ipa.q },
},
affected_2 = {
replacement = { q = ipa.glottal_stop, emphatic = "" },
},
affected_k = {
-- Q = k like 2iktiSaad
replacement = { glottal_stop = ipa.k, emphatic = "" },
},
},
imala = {
only_plain = {
replacement = { front_a = ipa.e },
},
plain_or_back = {
replacement = { front_a = ipa.e, plain_a = ipa.e },
},
},
final_vowels = {
lebanese_i = {
-- final -e = -i
replacement = { e = ipa.i },
},
lebanese_lax = {
-- final -i = -e
replacement = { final_i = ipa.e, u = ipa.o },
},
},
diphthongs = {
absent_medial_w = {
replacement = {
plain_a = ipa.o,
front_a = ipa.o,
low_back_a = ipa.o,
w = ipa.length,
},
},
absent_medial_y = {
replacement = {
plain_a = ipa.e,
front_a = ipa.e,
low_back_a = ipa.e,
y = ipa.length,
},
},
absent_final_w = {
replacement = {
plain_a = ipa.o,
front_a = ipa.o,
low_back_a = ipa.o,
w = ipa.length,
},
},
absent_final_y = {
replacement = {
plain_a = ipa.e,
front_a = ipa.e,
low_back_a = ipa.e,
y = ipa.length,
},
},
},
-- TODO add options for french vowels
}
local accents = {
{
features.diphthongs.absent_medial_w,
features.diphthongs.absent_medial_y,
features.diphthongs.absent_final_w,
features.diphthongs.absent_final_y,
none_of = {
features.imala.only_plain,
features.imala.plain_or_back,
},
name = "broad_syria",
variants = {
{ features.q.affected_2 },
},
accent = { "chiefly", "Syria" },
},
{
features.imala.only_plain,
features.final_vowels.lebanese_lax,
none_of = {
features.diphthongs.absent_medial_w,
features.diphthongs.absent_medial_y,
features.diphthongs.absent_final_w,
features.diphthongs.absent_final_y,
},
name = "broad_lebanon",
accent = { "chiefly", "Lebanon" },
variants = {
{ features.q.affected_2 },
{ features.q.affected_k },
{ features.imala.plain_or_back, accent = { "regional" } },
{ features.final_vowels.lebanese_i, accent = { "regional" } },
{
features.diphthongs.absent_final_w,
features.diphthongs.absent_final_y,
}
},
},
{
features.interdentals.affected,
name = "affected",
accent = { "affected" },
variants = {
{
features.fem.affected_emphatic_a,
features.fem.affected_plain_a,
},
},
atop = { "broad_syria", "broad_lebanon" },
},
{
features.interdentals.native,
name = "broad_rural",
accent = { "!rural" },
atop = { "broad_syria", "broad_lebanon" }
},
{
features.q.native,
name = "druze",
accent = { "traditional <<Druze>>" },
atop = { "broad_syria", "broad_lebanon", "broad_rural" },
},
{
features.q.native,
name = "coastal_syria",
accent = { "coastal <<Syria>>" },
atop = { "broad_syria", "broad_lebanon" },
},
}
local ipa_chars = {
add = function(self, default, tables, previous)
if not default then
error("nil ipa char after " .. self.name)
end
self = default
self:variant(tables, previous)
end,
variant = function(self, tables, previous)
for _, t in ipairs(tables or {}) do
if previous then
t = #self - 1
end
t = #self
end
end
}
for i, tok in ipairs(tokens) do
local env = segment_environments
local prev_env = segment_environments
if env.position_in_coda == 2 and not prev_env.diphthong and not env.geminate and not tok.semivowel then
ipa_chars:add(ipa.schwa, { features.epenthetics.absent })
end
if env.position_in_coda and env.position_in_coda > 0 and tok.vowel then
error("Multiple vowels in a syllable")
end
if tok.name == "syllable" then
ipa_chars:add(ipa.syllable)
elseif tok.name == "stress" then
ipa_chars:add(ipa.stress)
elseif tok.name == "(" then
ipa_chars:add("(")
elseif tok.name == ")" then
ipa_chars:add(")")
elseif tok.vowel and env.long_vowel and not env.first_of_long then
ipa_chars:add(ipa.length)
elseif tok.name == "q" then
ipa_chars:add(
ifelse(tok.affected, ipa.q, ipa.glottal_stop),
ifelse(
tok.affected,
{
features.q.native,
features.q.affected_k,
features.q.affected_q,
},
{ features.q.native }
)
)
elseif tok.name == "dh" then
ipa_chars:add(
ifelse(tok.affected, ipa.z, ipa.d),
ifelse(
tok.affected,
{ features.interdentals.affected, features.interdentals.native },
{ features.interdentals.native }
)
)
elseif tok.name == "th" then
ipa_chars:add(
ifelse(tok.affected, ipa.s, ipa.t),
ifelse(
tok.affected,
{ features.interdentals.affected, features.interdentals.native },
{ features.interdentals.native }
)
)
elseif env.diphthong and not tok.vowel then
local key = "absent_medial_"
if env.final_syllable then
key = "absent_final_"
end
ipa_chars:add(
ipa,
{ features.diphthongs }
)
elseif tok.name == "a" and env.long_vowel and not env.emphatic then
-- should plain_or_back just be back and not include plain? would that make it easier to generate the final display forms for the wiki?
ipa_chars:add(
ipa.front_a,
ifelse(
env.after_other_back,
{ features.imala.plain_or_back },
{
features.imala.only_plain,
features.imala.plain_or_back,
}
)
)
elseif tok.name == "a" and (env.after_pharyngeal or env.word_end) and not env.emphatic then
ipa_chars:add(ipa.plain_a, ifelse(env.long_vowel, { features.imala.plain_or_back }, {}))
if env.diphthong then
local key = "absent_medial_"
if env.final_syllable then
key = "absent_final_"
end
ipa_chars:variant {
features.diphthongs
}
end
elseif tok.name == "A" or tok.name == "a" then
ipa_chars:add(
ifelse(
env.emphatic,
ipa.low_back_a,
ifelse(
env.diphthong == "w",
ipa.plain_a,
ipa.front_a
)
)
)
if env.diphthong then
local key = "absent_medial_"
if env.final_syllable then
key = "absent_final_"
end
ipa_chars:variant {
features.diphthongs
}
end
elseif (
(tok.name == "i" or tok.name == "u")
and env.long_vowel
and env.closed_syllable
and #segment_environments >= i + 2
and segment_environments.geminate
and segment_environments.name ~= "syllable"
) then
-- add nothing -- this is specifically cooked up for cases like grippe
-- which i believe i have as /gri:pp/ (notice tense )
-- i'm also cutting out tok.name == "a" just in case even though i don't think
-- we have any false positives (eg this rule could erroneously produce
-- /mawe:dd/ or /mawa:dd/ if i allowed it to apply to a,
-- but i think the word is just /mawa:d/ /mawe:d/ anyway)
elseif tok.name == "i" and env.long_vowel then
ipa_chars:add(ipa.tense_medial_i)
elseif tok.name == "u" and env.long_vowel then
ipa_chars:add(ipa.tense_medial_u)
elseif tok.name == "I" then
ipa_chars:add(ipa.tense_medial_i)
elseif tok.name == "i" then
if not env.long_vowel and not env.word_end then
ipa_chars:add(
ifelse(
env.stressed or env.closed_syllable,
ipa.lax_medial_i,
ipa.tense_medial_i
)
)
elseif env.word_end then
ipa_chars:add(ipa.final_i, { features.final_vowels.lebanese_lax })
end
elseif tok.name == "e" and env.word_end then
ipa_chars:add(ipa.e, { features.final_vowels.lebanese_i })
elseif tok.name == "U" then
ipa_chars:add(ipa.tense_medial_u)
elseif tok.name == "u" then
if not env.long_vowel and not env.word_end then
ipa_chars:add(
ifelse(
env.stressed or env.closed_syllable,
ipa.lax_medial_u,
ipa.tense_medial_u
)
)
elseif env.word_end then
ipa_chars:add(ipa.final_u, { features.final_vowels.lebanese_lax })
end
elseif tok.name == "o" and env.word_end then
-- don't know of an -u dialect
ipa_chars:add(ipa.final_o)
elseif tok.name == "7" then
ipa_chars:add(ipa.heth)
elseif tok.name == "x" then
ipa_chars:add(ipa.kh)
elseif tok.name == "3" then
ipa_chars:add(ipa.ayn)
elseif tok.name == "2" then
ipa_chars:add(ipa.glottal_stop)
elseif tok.name == "c" then
ipa_chars:add(ipa.ch)
elseif tok.name == "8" then
ipa_chars:add(ipa.gh)
elseif tok.name == "j" then
ipa_chars:add(ipa.zh)
elseif tok.name == "r" then
ipa_chars:add(ifelse(env.geminate, ipa.trilled_r, ipa.flapped_r))
elseif tok.name == "fem_e" or tok.name == "fem_a" then
ipa_chars:add(
ifelse(
tok.name == "fem_e",
ipa.e,
ifelse(env.emphatic, ipa.low_back_a, ipa.plain_a)
),
ifelse(
env.emphatic,
{ features.fem.affected_emphatic_a },
{ features.fem.affected_plain_a }
)
)
elseif tok.semivowel and env.position_in_coda == 2 and not env.geminate then
ipa_chars:add(ipa)
else
-- the mapping from token name to ipa is really sloppy, haven't checked if every token name has a corresponding entry in ipa table
ipa_chars:add(ipa or ipa or error("no ipa: " .. tok.name))
end
if tok.emphatic then
ipa_chars:add(
ipa.emphatic,
ifelse(
tok.name == "q" and tok.affected,
{ features.q.affected_2 },
{}
)
)
end
end
local accent_groups = {}
for _, accent_features in ipairs(accents) do
local group = {variants = {}, accent = accent_features.accent}
accent_groups = group
local atop = {}
if accent_features.atop then
-- accent_name: broad_lebanon, broad_syria
for _, accent_name in ipairs(accent_features.atop) do
-- src_grp: data for broad_lebanon, data for broad_syria
for _, src_grp in ipairs(accent_groups) do
-- src_grp has `accent =` already so only extend with name
atop = extended(src_grp, { name = accent_name })
end
end
else
atop = { extended(ipa_chars, { name = "main", accent = {} }) }
end
for i, source_accent in ipairs(atop) do
local current_accent = accent_features.accent or {}
group = extended(source_accent, { accent = source_accent.accent or {} })
--= extended(
-- source_accent,
-- { accent = extended_arr(current_accent, source_accent.accent or {}) }
--)
local contributed = false
-- repl: (indices of imala), (indices of q)
for _, repl in ipairs(accent_features) do
-- idx: (index of an imala), (index of a q)
for _, idx in ipairs(repl) do
contributed = true
local ch = group
if repl.replacement then
group = repl.replacement
end
end
end
if not contributed and accent_features.none_of then
for _, variant in ipairs(accent_features.none_of) do
if #variant > 0 then
contributed = true
break
end
end
end
if contributed then
group.accent = extended_arr(group.accent, current_accent)
end
end
for _, variant_list in ipairs(accent_features.variants or {}) do
local use = false
for _, original in ipairs(group) do
local new = extended(
original,
{
-- comment this to not repeat/qualify the entire accent
-- for variants (eg "chiefly Lebanon, regional" vs
-- just "regional")
--
accent = extended_arr(
original.accent or {},
variant_list.accent or {}
)
-- accent = variant_list.accent or {}
}
)
for _, variant in ipairs(variant_list) do
if #variant > 0 then
use = true
end
for _, idx in ipairs(variant) do
local ch = original
if variant.replacement then
new = variant.replacement
end
end
end
if use then
group.variants = new
end
end
end
end
-- error(mw.dumpObject(accent_groups))
local pronunciations = {}
local seen_at_position = {}
for _, group in pairs(accent_groups) do
local g = {accent = group.accent}
for _, pronunciation in ipairs(group) do
local use = false
local sap_recursive = seen_at_position
for _, ch in ipairs(pronunciation) do
if not sap_recursive then
use = true
sap_recursive = {}
end
sap_recursive = sap_recursive
end
if use then
g = pronunciation
end
end
for _, variant in ipairs(group.variants) do
local use = false
local sap_recursive = seen_at_position
for _, ch in ipairs(variant) do
if not sap_recursive then
use = true
sap_recursive = {}
end
sap_recursive = sap_recursive
end
if use then
g = variant
end
end
if #g > 0 then
pronunciations = g
end
end
return pronunciations
end
function export.show(frame)
local params = {
= { required = false },
= { required = true },
= { required = false },
= { required = false, sublist = true },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local target = args
local translit = args.tr
local qual = args.q
if not target then
local title = mw.title.getCurrentTitle()
if title.nsText == "Template" then
target = "كلمة"
translit = '"kil.me'
else
target = title.subpageText
end
end
target = mw.ustring.gsub(target, ".", substitutions)
local tokens = vocalize(target, translit)
local pronunciations = make_ipa(tokens, bake_environment(tokens))
local formatted_groups = {}
for _, group in ipairs(pronunciations) do
local formatted_pronunciations = {}
for i, pronunciation in ipairs(group) do
local phonemes = {}
local phones = {}
for ch_i, ch in ipairs(pronunciation) do
if ch.phonemic or ch.phonetic then
phonemes = ch.phonemic or ch.phonetic
phones = ch.phonetic or ch.phonemic
else
-- it's a string probably
phonemes = ch
phones = ch
end
end
local phonemic = "/" .. table.concat(phonemes) .. "/"
local phonetic = ""
local pron = phonemic .. " " .. phonetic
local banden = false
for _, pattern in ipairs(args.ban or {}) do
if mw.ustring.find(pron, pattern) then
banden = true
end
end
if not banden then
formatted_pronunciations = {
pron = phonemic .. " " .. phonetic,
a = pronunciation.accent,
}
end
end
if #formatted_pronunciations > 0 then
if #formatted_groups == 0 then
formatted_groups = require("Module:IPA").format_IPA_full {
lang = lang,
q = { qual },
items = formatted_pronunciations
}
else
formatted_groups = require("Module:IPA").format_IPA_multiple(
lang,
formatted_pronunciations
)
end
end
end
return (
require("Module:columns").create_list { column_count = 1, content = formatted_groups }
.. require("Module:utilities").format_categories(gather_categories(tokens), lang)
)
end
return export