Modül:grc-araçlar

Modül belgelemesi
Bu belgeleme Modül:grc-araçlar/belge (düzenle | geçmiş) sayfasından yansıtılmaktadır. Arayüz düzenleyicilerinin deney yapabilmeleri için ayrıca Modül:grc-araçlar/deneme tahtası sayfası kullanılabilir.
Bu modül şu Lua modüllerini kullanıyor:
local export = {}

local m_alfabearaclari = require("Modül:alfabe araçları")
local m_baglantilar = require("Modül:bağlantılar")
local lang = require("Modül:diller").getirKodaGore("grc")
local sc = require("Modül:alfabeler").getirKodaGore("polytonic")

local m_data = mw.loadData("Modül:grc-araçlar/veri")
local groups = m_data.groups
local diacritic_order = m_data.diacritic_order
local conversions = m_data.conversions
local diacritics = m_data.diacritics
local diacritic = m_data.diacritic
local macron = diacritics.macron
local breve = diacritics.breve
local spacing_macron = diacritics.spacing_macron
local spacing_breve = diacritics.spacing_breve
local rough = diacritics.rough
local smooth = diacritics.smooth
local diaeresis = diacritics.diaeresis
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local subscript = diacritics.subscript
local combining_diacritic = m_data.combining_diacritic

local UTF8_char = "*"
local basic_Greek = "" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ

local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local toNFC = mw.ustring.toNFC
local decompose = mw.ustring.toNFD

local info = {}
-- The tables are shared among different characters so that they can be checked
-- for equality if needed, and to use less space.
local vowel_t = { vowel = true }
local iota_t = { vowel = true, offglide = true }
local upsilon_t = { vowel = true, offglide = true }
-- These don't need any contents.
local rho_t = {}
-- local consonant_t = {}
local diacritic_t = { diacritic = true }
-- Needed for equality comparisons.
local breathing_t = { diacritic = true }

local function add_info(characters, t)
	if type(characters) == "string" then
		for character in string.gmatch(characters, UTF8_char) do
			info = t
		end
	else
		for i, character in ipairs(characters) do
			info = t
		end
	end
end

add_info({ macron, breve,
		diaeresis,
		acute, grave, circumflex,
		subscript,
	}, diacritic_t)

add_info({rough, smooth}, breathing_t)
add_info("ΑΕΗΟΩαεηοω", vowel_t)
add_info("Ιι", iota_t)
add_info("Υυ", upsilon_t)
-- add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant_t)
add_info("Ρρ", rho_t)

local not_recognized = {}
setmetatable(info, { __index =
	function(t, key)
		return not_recognized
	end
})

local sparseConcat = require("Modül:table").sparseConcat

local checkType = require "libraryUtil".checkType

local function _check(funcName)
	return function(argIndex, arg, expectType, nilOk)
		return checkType(funcName, argIndex, arg, expectType, nilOk)
	end
end

-- Perform a function on each Unicode character in a string.
local function forEach(str, func)
	for char in string.gmatch(str, UTF8_char) do
		func(char)
	end
end

-- This concatenates or inserts a character, then removes it from the text.
local function add(list, index, chars, text)
	if not chars then
		error("The function add cannot act on a nil character.")
	end
	if list then
		list = list .. chars
	else
		list = chars
	end
	-- Basic string function works here.
	return text:sub(#chars + 1)
end

function export.tag(term, face)
	return m_alfabearaclari.etiket_yazi(term, lang, sc, face)
end

function export.link(term, face, alt, tr)
	return m_baglantilar.tam_bag( { sozcuk = term, alt = alt, dil = lang, alf = sc, c = tr }, face)
end

local function linkNoTag(term, alt)
	return m_baglantilar.dil_bag{ sozcuk = term, dil = lang, alt = alt }
end

-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
function export.standardDiacritics(text)
	text = decompose(text)
	
	text = text:gsub(UTF8_char, conversions)
	
	return text
end

--[=[	This function arranges diacritics in the following order:
			1. macron or breve
			2. breathings or diaeresis
			3. acute, circumflex, or grave
			4. iota subscript
		Used by ].
		
		Returns an error if a sequence of diacritics contains more than one
		of each category.
]=]
local function reorderDiacriticSequence(diacritics)
	local output = {}
	forEach(diacritics,
		function (diacritic)
			local index = diacritic_order
			if not output then
				output = diacritic
			else
				-- Place breve after macron.
				if diacritic == breve then
					index = index + 1
				end
				-- The following might have odd results when there
				-- are three or more diacritics.
				table.insert(output, index, diacritic)
				-- ]
				require("Modül:debug").track("grc-utils/too many diacritics")
				--[[
				local m_templates = require("Module:grc-araçlar/şablonlar")
				error("There are two diacritics, " ..
						m_templates.addDottedCircle(output) .. " and " ..
						m_templates.addDottedCircle(diacritic) ..
						" that belong in the same position. There should be only one."
				)
				--]]
			end
		end)
	return sparseConcat(output)
end

function export.reorderDiacritics(text)
	local d = diacritics
	
	return (gsub(decompose(text),
		combining_diacritic .. combining_diacritic .. "+",
		reorderDiacriticSequence))
end

--[=[
		This breaks a word into meaningful "tokens", which are
		individual letters or diphthongs with their diacritics.
		Used by ] and ].
--]=]
local function make_tokens(text)
	local tokens, prev_info = {}, {}
	local token_i, vowel_count = 1, 0 -- Vowel count tracks .
	local prev
	for character in string.gmatch(decompose(text), UTF8_char) do
		local curr_info = info
		-- Split vowels between tokens if not a diphthong.
		if curr_info.vowel then
			vowel_count = vowel_count + 1
			if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel)
					-- υυ → υ, υ
					-- ιυ → ι, υ
					or prev_info.offglide and curr_info == upsilon_t) then
				token_i = token_i + 1
				if prev_info.vowel then
					vowel_count = 1
				end
			elseif vowel_count == 2 then
				vowel_count = 0
			end
			tokens = (tokens or "") .. character
		elseif curr_info.diacritic then
			vowel_count = 0
			tokens = (tokens or "") .. character
			if prev_info.diacritic or prev_info.vowel then
				if character == diaeresis then
					-- Split the diphthong in the current token if a diaeresis was found:
					-- the first letter, then the second letter plus any diacritics.
					local previous_vowel, vowel_with_diaeresis =
						string.match(tokens,
							"^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")
					if previous_vowel then
						tokens, tokens = previous_vowel, vowel_with_diaeresis
						token_i = token_i + 1
					end
				end
			elseif prev_info == rho_t then
				if curr_info ~= breathing_t then
					mw.log(string.format("The character %s in %s should not have the accent %s on it.",
						prev, text, require("Modül:grc-araçlar/şablonlar").addDottedCircle(character)))
				end
			else
				mw.log("The character " .. prev .. " cannot have a diacritic on it.")
			end
		else
			vowel_count = 0
			if prev then
				token_i = token_i + 1
			end
			tokens = (tokens or "") .. character
		end
		prev = character
		prev_info = curr_info
	end
	return tokens
end

local cache = {}
function export.tokenize(text)
	local decomposed = decompose(text)
	if not cache then
		cache = make_tokens(text)
	end
	return cache
end

--[=[	Places diacritics in the following order:
			1. breathings or diaeresis
			2. acute, circumflex, or grave
			3. macron or breve
			4. iota subscript
		Used by ].		]=]
function export.pronunciationOrder(text)
	text = export.standardDiacritics(text)
	
	if find(text, groups) then
	
		text = gsub(text,
			diacritic .. diacritic .. "+",
			function(sequence)
				-- Put breathing and diaeresis first, then accents, then macron or breve
				return table.concat{
					match(sequence, groups) or "",
					match(sequence, groups) or "",
					match(sequence, groups) or "",
					match(sequence, groups) or ""
				}
			end)
		
		text = gsub(text, macron, spacing_macron) -- combining to spacing macron
		text = gsub(text, breve, spacing_breve) -- combining to spacing breve
	end
	
	return toNFC(text)
end


-- Returns a table of any ambiguous vowels in the text, language-tagged.
function export.findAmbig(text, noTag)
	if (not text) or type(text) ~= "string" then
		error("The input to function findAmbig is nonexistent or not a string")
	end
	
	local lengthDiacritic = ""
	local aiu_diacritic = "^()(" .. diacritic .. "*)$"
	
	-- breaks the word into units
	local output, vowels = {}, {}
	for _, token in ipairs(export.tokenize(text)) do
		if not find(token, m_data.consonant) then
			local vowel, diacritics = match(
				token,
				aiu_diacritic
			)
			
			if vowel and (diacritics == "" or
					not find(diacritics, lengthDiacritic)) then
				local diacriticked_vowel
				if not noTag then
					diacriticked_vowel = export.tag(vowel .. diacritics)
				else
					diacriticked_vowel = vowel
				end
				
				table.insert(output, diacriticked_vowel)
				
				-- Lists the vowel letters that are ambiguous, for categorization purposes.
				vowels = true
			end
		end
	end
		
	return output, vowels
end

return export
Modül:grc-araçlar

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot