Modül:ar-alfabeçeviri

Merhaba, buraya Modül:ar-alfabeçeviri kelimesinin anlamını aramaya geldiniz. DICTIOUS'da Modül:ar-alfabeçeviri kelimesinin tüm sözlük anlamlarını bulmakla kalmayacak, aynı zamanda etimolojisini, özelliklerini ve Modül:ar-alfabeçeviri kelimesinin tekil ve çoğul olarak nasıl söylendiğini de öğreneceksiniz. Modül:ar-alfabeçeviri kelimesi hakkında bilmeniz gereken her şey burada. Modül:ar-alfabeçeviri kelimesinin tanımı, konuşurken veya metinlerinizi yazarken daha kesin ve doğru olmanıza yardımcı olacaktır. XXX'in ve diğer kelimelerin tanımını bilmek, kelime dağarcığınızı zenginleştirir ve size daha fazla ve daha iyi dilsel kaynaklar sağlar.
Modül belgelemesi


-- Yazarlar: Benwing, ZxxZxxZ, Atitarev

local export = {}

local U = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local gcodepoint = mw.ustring.gcodepoint

-- version of rsubn() that discards all but the first return value
function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local zwnj = U(0x200c) -- zero-width non-joiner
local alif_madda = U(0x622)
local alif_hamza_below = U(0x625)
local alif = U(0x627)
local taa_marbuuTa = U(0x629)
local laam = U(0x644)
local waaw = U(0x648)
local alif_maqSuura = U(0x649)
local yaa = U(0x64A)
local fatHataan = U(0x64B)
local Dammataan = U(0x64C)
local kasrataan = U(0x64D)
local fatHa = U(0x64E)
local Damma = U(0x64F)
local kasra = U(0x650)
local shadda = U(0x651)
local sukuun = U(0x652)
local dagger_alif = U(0x670)
local alif_waSl = U(0x671)
--local zwj = U(0x200d) -- zero-width joiner
--local lrm = U(0x200e) -- left-to-right mark
--local rlm = U(0x200f) -- right-to-left mark

local tt = {
	-- consonants
	="b", ="t", ="s̱", ="c", ="ḥ", ="ḫ",
	="d", ="ẕ", ="r", ="z", ="s", ="ş",
	="ṣ", ="ḍ", ="ṭ", ="ẓ", ="ʿ", ="ġ",
	="f", ="ḳ", ="k", ="k", ="l", ="m", ="n",
	="h",
	-- tāʾ-i merbūṭa (özel) - always after a fátḥa (a), silent at the end of
	-- an utterance, "t" in ʾiḍāfa or with pronounced tanwīn. We catch
	-- most instances of tāʾ marbūṭa before we get to this stage.
	="t", -- tāʾ marbūṭa = ة
	-- control characters
	="-", -- ZWNJ (zero-width non-joiner)
	-- ="", -- ZWJ (zero-width joiner)
	-- rare letters
	="p", ="ç", ="v", ="v", ="g", ="g", ="ḳ",
	-- semivowels or long vowels, alif, hamza, special letters
	="ā", -- ʾelif
	-- hemzeli harfler
	="ʾ", -- hemzeli elif
	="ʾ", -- hemze elifin altında
	="ʾ", -- hemzeli vav
	="ʾ", -- hemzeli yā
	="ʾ", -- hemze yerde
	-- long vowels
	="v", --"ū" after ḍamma (u) and not before diacritic
	="y", --"ī" after kasra (i) and not before diacritic
	="ā", -- ʾalif maqṣūra
	="ʾā", -- ʾalif madda
	= "", -- hemze-i vaṣl
	 = "ā", -- ʾalif xanjariyya = dagger ʾalif (Koranic diacritic)
	-- short vowels, šádda and sukūn
	="an", -- fetḥaten
	="un", --ḍammeten
	="in", -- kesreten
	="a", -- fetḥa
	="u", -- ḍamme
	="i", -- kesre
	-- şedde - çift konsonant
	="", --sukūn - no vowel
	-- ligatures
	="lā",
	="llāh",
	-- taṭvīl
	="", -- taṭvīl, sessiz
	-- rakamlar
	="1", ="2", ="3", ="4", ="5",
	="6", ="7", ="8", ="9", ="0",
	-- punctuation (leave on separate lines)
	="?", -- soru işareti
	='“', -- tırnak işareti
	='”', -- tırnak işareti
	=".", -- decimal point
	=",", -- thousands separator
	="%", -- percent sign
	=",", -- comma
	=";" -- semicolon
}

local sun_letters = "تثدذرزسشصضطظلن"
local ince_harfler = "ءإأبتثجدذزسشفكلمنوؤهيئة"
local kalin_harfler = "حخرصضطظعغق"
-- For use in implementing sun-letter assimilation of ال (al-)
local ttsun1 = {}
local ttsun2 = {}
local ttsun3 = {}
for cp in gcodepoint(sun_letters) do
	local ch = U(cp)
	ttsun1 = tt
	ttsun2 = tt .. "-" .. ch
	table.insert(ttsun3, tt)
end
-- For use in implementing elision of al-
local sun_letters_tr = table.concat(ttsun3, "")

local consonants_needing_vowels = "بتثجحخدذرزسشصضطظعغفقكڪلمنهپچڤگڨڧأإؤئءةﷲ"
-- consonants on the right side; includes alif madda
local rconsonants = consonants_needing_vowels .. "ويآ"
-- consonants on the left side; does not include alif madda
local lconsonants = consonants_needing_vowels .. "وي"
-- Arabic semicolon, comma, question mark; taṭwīl; period, exclamation point,
-- single quote for bold/italic
local punctuation = "؟،؛" .. "ـ" .. ".!'"
local numbers = "١٢٣٤٥٦٧٨٩٠"

local before_diacritic_checking_subs = {
	------------ transformations prior to checking for diacritics --------------
	-- convert llh for allāh into ll+shadda+dagger-alif+h
	{"لله", "للّٰه"},
	-- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets
	-- replaced with short-vowel+shadda during NFC normalisation, which
	-- MediaWiki does for all Unicode strings; however, it makes the
	-- transliteration process inconvenient, so undo it.
	{"()" .. shadda, shadda .. "%1"},
	-- ignore alif jamīla (otiose alif in 3pl verb forms)
	--     #1: handle ḍamma + wāw + alif (final -ū)
	{Damma .. waaw .. alif, Damma .. waaw},
	--     #2: handle wāw + sukūn + alif (final -w in -aw in defective verbs)
	--     this must go before the generation of w, which removes the waw here.
	{waaw .. sukuun .. alif, waaw .. sukuun},
	-- ignore final alif or alif maqṣūra following fatḥatan (e.g. in accusative
	-- singular or words like عَصًا "stick" or هُذًى "guidance"; this is called
	-- tanwin nasb)
	{fatHataan .. "", fatHataan},
	-- same but with the fatḥatan placed over the alif or alif maqṣūra
	-- instead of over the previous letter (considered a misspelling but
	-- common)
	{"" .. fatHataan, fatHataan},
	-- tāʾ marbūṭa should always be preceded by fatḥa, alif, alif madda or
	-- dagger alif; infer fatḥa if not
	{"()" .. taa_marbuuTa, "%1" .. fatHa .. taa_marbuuTa},
	-- similarly for alif between consonants, possibly marked with shadda
	-- (does not apply to initial alif, which is silent when not marked with
	-- hamza, or final alif, which might be pronounced as -an)
	{"(" .. shadda .. "?)" .. alif .. "()",
		"%1" .. fatHa .. alif .. "%2"},
	-- infer fatḥa in case of non-fatḥa + alif/alif-maqṣūra + dagger alif
	{"()(" .. dagger_alif .. ")", "%1" .. fatHa .. "%2"},
	-- infer kasra in case of hamza-under-alif not + kasra
	{alif_hamza_below .. "()", alif_hamza_below .. kasra .. "%1"},
	-- ignore dagger alif placed over regular alif or alif maqṣūra
	{"()" .. dagger_alif, "%1"},

	----------- rest of these concern definite article alif-lām ----------
	-- in kasra/ḍamma + alif + lam, make alif into hamzatu l-waṣl, so we
	-- handle cases like بِالتَّوْفِيق (bi-t-tawfīq) correctly
	{"()" .. alif .. laam, "%1" .. alif_waSl .. laam},
	-- al + consonant + shadda (only recognize word-initially if regular alif): remove shadda
	{"^(" .. alif .. fatHa .. "?" .. laam .. ")" .. shadda, "%1"},
	{"%s(" .. alif .. fatHa .. "?" .. laam .. ")" .. shadda, " %1"},
	{"(" .. alif_waSl .. fatHa .. "?" .. laam .. ")" .. shadda, "%1"},
	-- handle l- hamzatu l-waṣl or word-initial al-
	{"^" .. alif .. fatHa .. "?" .. laam, "el-"},
	{"%s" .. alif .. fatHa .. "?" .. laam, " el-"},
	-- next one for bi-t-tawfīq
	{"()" .. alif_waSl .. fatHa .. "?" .. laam, "%1-l-"},
	-- next one for remaining hamzatu l-waṣl (at beginning of word)
	{alif_waSl .. fatHa .. "?" .. laam, "l-"},
	-- special casing if the l in al- has a shadda on it (as in الَّذِي "that"),
	-- so we don't mistakenly double the dash
	{"l%-" .. shadda, "ll"},
	-- implement assimilation of sun letters
	{"l%-", ttsun2},
}

-- Transliterate the word(s) in TEXT. LANG (the language) and SC (the script)
-- are ignored. OMIT_I3RAAB means leave out final short vowels (ʾiʿrāb).
-- GRAY_I3RAAB means render transliterate short vowels (ʾiʿrāb) in gray.
-- FORCE_TRANSLIT causes even non-vocalized text to be transliterated
-- (normally the function checks for non-vocalized text and returns nil,
-- since such text is ambiguous in transliteration).
function export.tr(text, lang, sc, omit_i3raab, gray_i3raab, force_translit)
	-- make it possible to call this function from a template
	if type(text) == "table" then
		local function f(x) return (x ~= "") and x or nil end
		text, lang, sc, omit_i3raab, force_translit =
			f(text.args), f(text.args), f(text.args), f(text.args), f(text.args)
	end

	for _, sub in ipairs(before_diacritic_checking_subs) do
		text = rsub(text, sub, sub)
	end

	if not force_translit and not has_diacritics(text) then
		return nil
	end
	
	------------ transformations after checking for diacritics --------------
	-- Replace plain alif with hamzatu l-waṣl when followed by fatḥa/ḍamma/kasra.
	-- Must go after handling of initial al-, which distinguishes alif-fatḥa
	-- from alif w/hamzatu l-waṣl. Must go before generation of ū and ī, which
	-- eliminate the ḍamma/kasra.
	text = rsub(text, alif .. "()", alif_waSl .. "%1")
	-- ḍamma + waw not followed by a diacritic is ū, otherwise w
	text = rsub(text, Damma .. waaw .. "()", "ū%1")
	text = rsub(text, Damma .. waaw .. "$", "ū")
	-- kasra + yaa not followed by a diacritic (or ū from prev step) is ī, otherwise y
	text = rsub(text, kasra .. yaa .. "()", "ī%1")
	text = rsub(text, kasra .. yaa .. "$", "ī")
	-- convert shadda to double letter.
	text = rsub(text, "(.)" .. shadda, "%1%1")
	if not omit_i3raab and gray_i3raab then -- show ʾiʿrāb grayed in transliteration
		-- decide whether to gray out the t in ﺓ. If word begins with al- or l-, yes.
		-- Otherwise, no if word ends in a/i/u, yes if ends in an/in/un.
		text = rsub(text, "^(a?l%-+)" .. taa_marbuuTa .. "()",
			'%1<span style="color: #888888">t</span>%2')
		text = rsub(text, "(%sa?l%-+)" .. taa_marbuuTa .. "()",
			'%1<span style="color: #888888">t</span>%2')
		text = rsub(text, taa_marbuuTa .. "()", "t%1")
		text = rsub(text, taa_marbuuTa .. "()",
			'<span style="color: #888888">t</span>%1')
		text = rsub(text, ".", {
			 = '<span style="color: #888888">an</span>',
			 = '<span style="color: #888888">in</span>',
			 = '<span style="color: #888888">un</span>'
		})
		text = rsub(text, "()%s", {
			 = '<span style="color: #888888">a</span> ',
			 = '<span style="color: #888888">i</span> ',
			 = '<span style="color: #888888">u</span> '
		})
		text = rsub(text, "$", {
			 = '<span style="color: #888888">a</span>',
			 = '<span style="color: #888888">i</span>',
			 = '<span style="color: #888888">u</span>'
		})
		text = rsub(text, '</span><span style="color: #888888">', "")
	elseif omit_i3raab then -- omit ʾiʿrāb in transliteration
		text = rsub(text, "", "")
		text = rsub(text, "%s", " ")
		text = rsub(text, "$", "")
	end
	-- tāʾ marbūṭa should not be rendered by -t if word-final even when
	-- ʾiʿrāb (desinential inflection) is shown; instead, use (t) before
	-- whitespace, nothing when final; but render final -ﺍﺓ and -ﺁﺓ as -āh,
	-- consistent with Wehr's dictionary
	text = rsub(text, "()" .. taa_marbuuTa .. "$", "%1h")
	-- Ignore final tāʾ marbūṭa (it appears as "a" due to the preceding
	-- short vowel). Need to do this after graying or omitting word-final
	-- ʾiʿrāb.
	text = rsub(text, taa_marbuuTa .. "$", "")
	if not omit_i3raab then -- show ʾiʿrāb in transliteration
		text = rsub(text, taa_marbuuTa .. "%s", "(t) ")
	else
		-- When omitting ʾiʿrāb, show all non-absolutely-final instances of
		-- tāʾ marbūṭa as (t), with trailing ʾiʿrāb omitted.
		text = rsub(text, taa_marbuuTa, "(t)")
	end
	-- tatwīl should be rendered as - at beginning or end of word. It will
	-- be rendered as nothing in the middle of a word (FIXME, do we want
	-- this?)
	text = rsub(text, "^ـ", "-")
	text = rsub(text, "%sـ", " -")
	text = rsub(text, "ـ$", "-")
	text = rsub(text, "ـ%s", "- ")
	-- ince harfler
	text = rsub(text, "()()", "%1" .. "e")
	text = rsub(text, "()()", "%1" .. "ü")
	text = rsub(text, "()()", "%1" .. "en")
	text = rsub(text, "()()", "%1" .. "ün")
	text = rsub(text, "()(e)()", "%1" .. fatHa .. "%3")
	text = rsub(text, "()(ü)()", "%1" .. Damma .. "%3")
	text = rsub(text, "()(en)()", "%1" .. fatHataan .. "%3")
	text = rsub(text, "()(ün)()", "%1" .. Dammataan .. "%3")
	-- Now convert remaining Arabic chars according to table.
	text = rsub(text, ".", tt)
	text = rsub(text, "()(ā)", "ā")
	text = rsub(text, "aā", "ā")
	-- Implement elision of al- after a final vowel. We do this
	-- conservatively, only handling elision of the definite article rather
	-- than elision in other cases of hamzat al-waṣl (e.g. form-I imperatives
	-- or form-VII and above verbal nouns) partly because elision in
	-- these cases isn't so common in MSA and partly to avoid excessive
	-- elision in case of words written with initial bare alif instead of
	-- properly with hamzated alif. Possibly we should reconsider.
	-- At the very least we currently don't handle elision of الَّذِي (allaḏi)
	-- correctly because we special-case it to appear without the hyphen;
	-- perhaps we should reconsider that.
	text = rsub(text, "('* +'*)a(%-)",
		"%1%2")
	if gray_i3raab then
		text = rsub(text, "('*</span>'* +'*)a(%-)",
			"%1%2")
	end
	-- Special-case the transliteration of allāh, without the hyphen
	text = rsub(text, "^(a?)l%-lāh", "%1llāh")
	text = rsub(text, "(%sa?)l%-lāh", "%1llāh")

	return text
end

local has_diacritics_subs = {
	-- FIXME! What about lam-alif ligature?
	-- remove punctuation and shadda
	-- must go before removing final consonants
	{"", ""},
	-- Remove consonants at end of word or utterance, so that we're OK with
	-- words lacking iʿrāb (must go before removing other consonants).
	-- If you want to catch places without iʿrāb, comment out the next two lines.
	{"$", ""},
	{"%s", " "},
	-- remove consonants (or alif) when followed by diacritics
	-- must go after removing shadda
	-- do not remove the diacritics yet because we need them to handle
	-- long-vowel sequences of diacritic + pseudo-consonant
	{"()", "%1"},
	-- the following two must go after removing consonants w/diacritics because
	-- we only want to treat vocalic wāw/yā' in them (we want to have removed
	-- wāw/yā' followed by a diacritic)
	-- remove ḍamma + wāw
	{Damma .. waaw, ""},
	-- remove kasra + yā'
	{kasra .. yaa, ""},
	-- remove fatḥa/fatḥatan + alif/alif-maqṣūra
	{"", ""},
	-- remove diacritics
	{"", ""},
	-- remove numbers, hamzatu l-waṣl, alif madda
	{"", ""},
	-- remove non-Arabic characters
	{"[^" .. U(0x0600) .. "-" .. U(0x06FF) .. U(0x0750) .. "-" .. U(0x077F) ..
			 U(0x08A0) .. "-" .. U(0x08FF) .. U(0xFB50) .. "-" .. U(0xFDFF) ..
			 U(0xFE70) .. "-" .. U(0xFEFF) .. "]", ""}
}

function has_diacritics(text)
	for _, sub in ipairs(has_diacritics_subs) do
		text = rsub(text, sub, sub)
	end
	return #text == 0
end

-- Return true if transliteration TR is an irregular transliteration of
-- ARABIC. Return false if ARABIC can't be transliterated. For purposes of
-- establishing regularity, hyphens are ignored and word-final tāʾ marbūṭa
-- can be transliterated as "(t)", "" or "t".
function export.irregular_translit(arabic, tr)
	if not arabic or arabic == "" or not tr or tr == "" then
		return false
	end
	local regtr = export.tr(arabic)
	if not regtr or regtr == tr then
		return false
	end
	local arwords = rsplit(arabic, " ")
	local regwords = rsplit(regtr, " ")
	local words = rsplit(tr, " ")
	if #regwords ~= #words or #regwords ~= #arwords then
		return true
	end
	for i=1,#regwords do
		local regword = regwords
		local word = words
		local arword = arwords
		-- Resolve final (t) in auto-translit to t, h or nothing
		if rfind(regword, "%(t%)$") then
			regword = rfind(word, "āh$") and rsub(regword, "%(t%)$", "h") or
				rfind(word, "t$") and rsub(regword, "%(t%)$", "t") or
				rsub(regword, "%(t%)$", "")
		end
		-- Resolve clitics + short a + alif-lām, which may get auto-transliterated
		-- to contain long ā, to short a if the manual translit has it; note
		-- that currently in cases with assimilated l, the auto-translit will
		-- fail, so we won't ever get here and don't have to worry about
		-- auto-translit l against manual-translit assimilated char.
		local clitic_chars = "^" -- separate line to avoid L2R display weirdness
		if rfind(arword, clitic_chars .. fatHa .. "?" .. laam) and rfind(word, "^a%-") then
			regword = rsub(regword, "^()ā", "%1a")
		end
		-- Ignore hyphens when comparing
		if rsub(regword, "%-", "") ~= rsub(word, "%-", "") then
			return true
		end
	end
	return false
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: