This module is used by many Arabic headword-line templates.
-- Authors: Benwing, CodeCat
local ar_translit = require("Module:ar-translit")
local lang = require("Module:languages").getByCode("ar")
local export = {}
local pos_functions = {}
-- diacritics
local u = mw.ustring.char
local A = u(0x064E) -- fatḥa
local AN = u(0x064B) -- fatḥatān (fatḥa tanwīn)
local U = u(0x064F) -- ḍamma
local UN = u(0x064C) -- ḍammatān (ḍamma tanwīn)
local I = u(0x0650) -- kasra
local IN = u(0x064D) -- kasratān (kasra tanwīn)
local SK = u(0x0652) -- sukūn = no vowel
local SH = u(0x0651) -- šadda = gemination of consonants
local DAGGER_ALIF = u(0x0670)
local DIACRITIC_ANY_BUT_SH = ""
-- various letters and signs
local HAMZA = u(0x0621) -- hamza on the line (stand-alone hamza) = ء
local ALIF = u(0x0627) -- ʾalif = ا
local AMAQ = u(0x0649) -- ʾalif maqṣūra = ى
local TAM = u(0x0629) -- tāʾ marbūṭa = ة
-- common combinations
local UNU = ""
-----------------------
-- Utility functions --
-----------------------
-- If Not Empty
local function ine(arg)
if arg == "" then
return nil
else
return arg
end
end
local function list_to_set(list)
local set = {}
for _, item in ipairs(list) do
set = true
end
return set
end
-- version of mw.ustring.gsub() that discards all but the first return value
function rsub(term, foo, bar)
local retval = mw.ustring.gsub(term, foo, bar)
return retval
end
local rfind = mw.ustring.find
function remove_links(text)
text = rsub(text, "%]*|", "")
text = rsub(text, "%[%[", "")
text = rsub(text, "%]%]", "")
return text
end
function reorder_shadda(text)
-- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets
-- replaced with short-vowel+shadda during NFC normalisation, which
-- MediaWiki does for all Unicode strings; however, it makes the
-- detection process inconvenient, so undo it. (For example, the tracking
-- code below would fail to detect the -un in سِتٌّ because the shadda
-- would come after the -un.)
text = rsub(text, "(" .. DIACRITIC_ANY_BUT_SH .. ")" .. SH, SH .. "%1")
return text
end
-- Tracking functions
local trackfn = require("Module:debug").track
function track(page)
trackfn("ar-headword/" .. page)
return true
end
--[==[
Examples of what you can find by looking at what links to the given
pages:
]
all unvocalized pages
]
all unvocalized pages where the plural is unvocalized,
whether specified using pl=, pl2=, etc.
]
all unvocalized pages where the head is unvocalized
]
all nouns excluding proper nouns, collective nouns,
singulative nouns where the head is unvocalized
]
nouns all proper nouns where the head is unvocalized
]
proper nouns all words that are not proper nouns
where the head is unvocalized
]
all adjectives where any parameter is unvocalized;
currently only works for heads,
so equivalent to .../unvocalized/head/adjectives
]
all pages with an empty head
]
all unvocalized pages with manual translit
]
all nouns where the head is unvocalized but has manual translit
]
all unvocalized pages without manual translit
]
all pages with any parameter containing i3rab
of either -un, -u, -a or -i
]
all pages with any parameter containing an -un i3rab ending
]
all pages where a form specified using pl=, pl2=, etc.
contains an -un i3rab ending
]
all pages with a head containing an -u i3rab ending
]
nouns (all proper nouns with a head containing i3rab
of either -un, -u, -a or -i)
In general, the format is one of the following:
Template:tracking/ar-headword/FIRSTLEVEL
Template:tracking/ar-headword/FIRSTLEVEL/ARGNAME
Template:tracking/ar-headword/FIRSTLEVEL/POS
Template:tracking/ar-headword/FIRSTLEVEL/ARGNAME/POS
FIRSTLEVEL can be one of "unvocalized", "unvocalized-empty-head" or its
opposite "unvocalized-specified", "unvocalized-manual-translit" or its
opposite "unvocalized-no-translit", "i3rab", "i3rab-un", "i3rab-u",
"i3rab-a", or "i3rab-i".
ARGNAME is either "head" or an argument such as "pl", "f", "cons", etc.
This automatically includes arguments specified as head2=, pl3=, etc.
POS is a part of speech, lowercase and pluralized, e.g. "nouns",
"adjectives", "proper nouns", "collective nouns", etc. or
"not proper nouns", which includes all parts of speech but proper nouns.
]==]
function track_form(argname, form, translit, pos)
form = reorder_shadda(remove_links(form))
function dotrack(page)
track(page)
track(page .. "/" .. argname)
if pos then
track(page .. "/" .. pos)
track(page .. "/" .. argname .. "/" .. pos)
if pos ~= "tulajdonnevek" then
track(page .. "/not proper nouns")
track(page .. "/" .. argname .. "/not proper nouns")
end
end
end
function track_i3rab(arabic, tr)
if rfind(form, arabic .. "$") then
dotrack("i3rab")
dotrack("i3rab-" .. tr)
end
end
track_i3rab(UN, "un")
track_i3rab(U, "u")
track_i3rab(A, "a")
track_i3rab(I, "i")
if form == "" or not lang:transliterate(form) then
dotrack("unvocalized")
if form == "" then
dotrack("unvocalized-empty-head")
else
dotrack("unvocalized-specified")
end
if translit then
dotrack("unvocalized-manual-translit")
else
dotrack("unvocalized-no-translit")
end
end
end
-- The main entry point.
function export.show(frame)
local poscat = frame.args or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local params = {
= {list = "head", allow_holes = true, default = ""},
= {default = ""},
= {list = true, allow_holes = true},
}
local args = frame:getParent().args -- TODO: Use ] here
-- Gather parameters
local data = {lang = lang, pos_category = poscat, categories = {}, heads = {}, translits = {}, genders = {}, inflections = { enable_auto_translit = true }}
local head = args or args or ""
local translit = ine(args)
local i = 1
local irreg_translit = false
while head do
table.insert(data.heads, head)
data.translits = translit
if ar_translit.irregular_translit(head, translit) then
irreg_translit = true
end
track_form("head", head, translit, poscat)
i = i + 1
head = ine(args)
translit = ine(args)
end
if irreg_translit then
----table.insert(data.categories, lang:getCanonicalName() .. " terms with irregular pronunciations")
end
if pos_functions then
pos_functions.func(args, data)
end
--[=[
]
]
]
]=]
if args or args or args or args or args or args or args then
track("num")
end
if args then
track("head")
end
if args then
track("g")
end
return require("Module:headword").full_headword(data)
end
-- Get a list of inflections. See handle_infl() for meaning of ARGS, ARGPREF
-- and DEFGENDER.
local function getargs(args, argpref, defgender)
-- Gather parameters
local forms = {}
local form = ine(args)
local translit = ine(args)
local gender = ine(args)
local gender2 = ine(args)
local i = 1
while form do
local genderlist = (gender or gender2) and {gender, gender2} or defgender and {defgender} or nil
track_form(argpref, form, translit)
table.insert(forms, {term = form, translit = translit, gender = genderlist})
i = i + 1
form = ine(args)
translit = ine(args)
gender = ine(args)
gender2 = ine(args)
end
return forms
end
-- Get a list of inflections from the arguments in ARGS based on argument
-- prefix ARGPREF (e.g. "pl" to snarf arguments called "pl", "pl2", etc.,
-- along with "pltr", "pl2tr", etc. and optional gender(s) "plg", "plg2",
-- "pl2g", "pl2g2", "pl3g", "pl3g2", etc.). Label with LABEL (e.g. "plural"),
-- which will appear in the headword. Insert into inflections list
-- INFLS. Optional DEFGENDER is default gender to insert if gender
-- isn't given; otherwise, no gender is inserted. (This is used for
-- singulative forms of collective nouns, and collective forms of singulative
-- nouns, which have different gender from the base form(s).)
local function handle_infl(args, data, argpref, label, defgender)
local newinfls = getargs(args, argpref, defgender)
newinfls.label = label
if #newinfls > 0 then
table.insert(data.inflections, newinfls)
end
end
-- Handle a basic inflection (e.g. plural, feminine) along with the construct,
-- definite and oblique variants of this inflection. Can also handle the base
-- construct/definite/oblique variants if both ARGPREF and LABEL are given
-- as blank strings. If NOBASE or ARGPREF is blank, skip the base inflection.
local function handle_all_infl(args, data, argpref, label, nobase)
if not nobase and argpref ~= "" then
handle_infl(args, data, argpref, label)
end
local labelsp = label == "" and "" or label .. " "
handle_infl(args, data, argpref .. "cons", labelsp .. "construct state")
handle_infl(args, data, argpref .. "def", labelsp .. "definite state")
handle_infl(args, data, argpref .. "obl", labelsp .. "oblique")
handle_infl(args, data, argpref .. "inf", labelsp .. "informal")
end
-- Handle the case where pl=-, indicating an uncountable noun.
local function handle_noun_plural(args, data)
if args == "-" then
table.insert(data.inflections, {label = "általában ]"})
table.insert(data.categories, lang:getCanonicalName() .. " megszámlálhatatlan főnevek")
else
handle_infl(args, data, "pl", "többesszám")
end
end
local valid_genders = list_to_set(
{"m", "m-s", "m-pr", "m-s-pr", "m-np", "m-s-np",
"f", "f-s", "f-pr", "f-s-pr", "f-np", "f-s-np",
"m-d", "m-d-pr", "m-d-np",
"f-d", "f-d-pr", "f-d-np",
"m-p", "m-p-pr", "m-p-np",
"f-p", "f-p-pr", "f-p-np",
"d", "d-pr", "d-np",
"p", "p-pr", "p-np",
"pr", "np"
})
local function is_masc_sg(g)
return g == "m" or g == "m-pr" or g == "m-np"
end
local function is_fem_sg(g)
return g == "f" or g == "f-pr" or g == "f-np"
end
-- Handle gender in unnamed param 2 and a second gender in param g2, inserting
-- into the list of genders in GENDER. Also, if a lemma, insert categories
-- into CATS if the gender is unexpected for the form of the noun or if multiple
-- genders occur. If gender unspecified, default to DEFAULT, which may be
-- omitted.
local function handle_gender(args, data, default, nonlemma)
local g = ine(args) or default
local g2 = ine(args)
local function process_gender(g)
if not g then
table.insert(data.genders, "?")
elseif valid_genders then
table.insert(data.genders, g)
else
error("Unrecognized gender: " .. g)
end
end
process_gender(g)
if g2 then
process_gender(g2)
end
if nonlemma then
return
end
if g and g2 then
--table.insert(data.categories, lang:getCanonicalName() .. " terms with multiple genders")
elseif is_masc_sg(g) or is_fem_sg(g) then
local head = ine(args) or ine(args)
if head then
head = rsub(reorder_shadda(remove_links(head)), UNU .. "?$", "")
local ends_with_tam = rfind(head, "^*" .. TAM .. "$") or
rfind(head, "^*" .. TAM .. " ")
if is_masc_sg(g) and ends_with_tam then
--table.insert(data.categories, lang:getCanonicalName() .. " masculine terms with feminine ending")
elseif is_fem_sg(g) and not ends_with_tam and
not rfind(head, "$") and
not rfind(head, ALIF .. HAMZA .. "$") then
--table.insert(data.categories, lang:getCanonicalName() .. " feminine terms lacking feminine ending")
end
end
end
end
-- Part-of-speech functions
pos_functions = {
func = function(args, data)
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
handle_all_infl(args, data, "f", "nőnem")
handle_all_infl(args, data, "d", "hímnem kettesszám")
handle_all_infl(args, data, "fd", "nőnem kettesszám")
handle_all_infl(args, data, "cpl", "közös többesszám")
handle_all_infl(args, data, "pl", "hímnem többesszám")
handle_all_infl(args, data, "fpl", "nőnem többesszám")
handle_infl(args, data, "el", "elative")
end
}
function handle_sing_coll_noun_infls(args, data)
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
handle_all_infl(args, data, "d", "dualis")
handle_all_infl(args, data, "pauc", "paucalis")
handle_noun_plural(args, data)
handle_all_infl(args, data, "pl", "többesszám", "nobase")
end
-- Collective and singulative tracking code. FIXME: This is old and may not
-- be needed anymore. ARGS are the template arguments. COLLSING is either
-- "coll" or "sing" according to whether we're dealing with collective or
-- singulative nouns. OTHER is the other of the two possible values of
-- COLLSING. DEFGENDER is the default gender for nouns of this type --
-- "m" for collectives, "f" for singulatives.
function track_coll_sing(args, collsing, other, defgender)
local g = ine(args) or defgender
if g ~= defgender then
track(collsing .. " n" .. defgender)
end
local otherg = ine(args)
if otherg then
track(other .. "g")
if is_masc_sg(otherg) or is_fem_sg(otherg) then
track(other .. "g/" .. otherg)
else
track(other .. "g/-")
end
end
end
pos_functions = {
func = function(args, data)
data.pos_category = "főnevek"
table.insert(data.categories, lang:getCanonicalName() .. " kollektív főnevek")
table.insert(data.inflections, {label = "kollektív"})
track_coll_sing(args, "coll", "sing", "m")
handle_gender(args, data, "m")
-- Handle sing= (the corresponding singulative noun) and singg= (its gender)
handle_infl(args, data, "sing", "szingulatív", "f")
handle_sing_coll_noun_infls(args, data)
end
}
pos_functions = {
func = function(args, data)
data.pos_category = "főnevek"
table.insert(data.categories, lang:getCanonicalName() .. " szingulatív főnevek")
table.insert(data.inflections, {label = "szingulatív"})
track_coll_sing(args, "sing", "coll", "f")
handle_gender(args, data, "f")
-- Handle coll= (the corresponding collective noun) and collg= (its gender)
handle_infl(args, data, "coll", "kollektív", "m")
handle_sing_coll_noun_infls(args, data)
end
}
function handle_noun_infls(args, data, singonly)
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
if not singonly then
handle_all_infl(args, data, "d", "dualis")
handle_noun_plural(args, data)
handle_all_infl(args, data, "pl", "többesszám", "nobase")
end
handle_all_infl(args, data, "f", "nőnem")
handle_all_infl(args, data, "m", "hímnem")
end
pos_functions = {
func = function(args, data)
handle_gender(args, data)
handle_noun_infls(args, data)
end
}
pos_functions = {
func = function(args, data)
table.insert(data.categories, lang:getCanonicalName() .. " számnevek")
handle_gender(args, data)
handle_noun_infls(args, data)
end
}
pos_functions = {
func = function(args, data)
handle_gender(args, data)
handle_noun_infls(args, data, "singular only")
end
}
pos_functions = {
params = {
= {},
= {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
},
func = function(args, data)
handle_gender(args, data)
handle_all_infl(args, data, "f", "feminine")
end
}
pos_functions = {
params = {
= {},
= {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
= {}, = {}, = {}, = {},
},
func = function(args, data)
data.pos_category = "többes számú főnevek"
handle_gender(args, data, "p", "nonlemma")
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
end
}
pos_functions = {
params = {
= {},
= {},
= {},
= { type = boolean },
},
func = function(args, data)
data.pos_category = "melléknévalakok"
handle_noun_plural(args, data)
handle_gender(args, data, "f", "nonlemma")
end
}
pos_functions = {
params = {
= {},
= {},
},
func = function(args, data)
data.pos_category = "főnévalakok"
handle_gender(args, data, "m-d", "nonlemma")
end
}
pos_functions = {
params = {
= {},
= {},
},
func = function(args, data)
data.pos_category = "melléknévalakok"
handle_gender(args, data, "m-p", "nonlemma")
end
}
pos_functions = {
params = {
= {},
= {},
},
func = function(args, data)
data.pos_category = "melléknévalakok"
handle_gender(args, data, "m-d", "nonlemma")
end
}
pos_functions = {
params = {
= {},
= {},
},
func = function(args, data)
handle_gender(args, data, nil, "nonlemma")
end
}
local valid_forms = list_to_set(
{"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII",
"XIII", "XIV", "XV", "Iq", "IIq", "IIIq", "IVq"})
local function handle_conj_form(args, data)
local form = ine(args)
if form then
if not valid_forms then
error("Invalid verb conjugation form " .. form)
end
table.insert(data.inflections, {label = ']'})
end
end
pos_functions = {
params = {
= {},
},
func = function(args, data)
handle_conj_form(args, data)
end
}
pos_functions = {
params = {
= {},
},
func = function(args, data)
data.pos_category = "participles"
table.insert(data.categories, lang:getCanonicalName() .. " aktív participiumok")
handle_conj_form(args, data)
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
handle_all_infl(args, data, "f", "nőnem")
handle_all_infl(args, data, "d", "hímnem dualis")
handle_all_infl(args, data, "fd", "nőnem dualis")
handle_all_infl(args, data, "cpl", "közös többesszám")
handle_all_infl(args, data, "pl", "hímnem többesszám")
handle_all_infl(args, data, "fpl", "nőnem többesszám")
end
}
pos_functions = {
params = {
= {},
},
func = function(args, data)
data.pos_category = "participles"
table.insert(data.categories, lang:getCanonicalName() .. " passzív participiumok")
handle_conj_form(args, data)
handle_all_infl(args, data, "", "") -- handle cons, def, obl, inf
handle_all_infl(args, data, "f", "nőnem")
handle_all_infl(args, data, "d", "hímnem dualis")
handle_all_infl(args, data, "fd", "nőnem dualis")
handle_all_infl(args, data, "cpl", "közös többesszám")
handle_all_infl(args, data, "pl", "hímnem többesszám")
handle_all_infl(args, data, "fpl", "nőnem többesszám")
end
}
return export
-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: