Module:zh-sortkey

Bonjour, vous êtes venu ici pour chercher la signification du mot Module:zh-sortkey. Dans DICTIOUS, vous trouverez non seulement toutes les significations du dictionnaire pour le mot Module:zh-sortkey, mais vous apprendrez également son étymologie, ses caractéristiques et comment dire Module:zh-sortkey au singulier et au pluriel. Tout ce que vous devez savoir sur le mot Module:zh-sortkey est ici. La définition du mot Module:zh-sortkey vous aidera à être plus précis et correct lorsque vous parlerez ou écrirez vos textes. Connaître la définition deModule:zh-sortkey, ainsi que celles d'autres mots, enrichit votre vocabulaire et vous fournit des ressources linguistiques plus nombreuses et de meilleure qualité.

La documentation pour ce module peut être créée à Module:zh-sortkey/Documentation

local export = {}

local namespace = mw.title.getCurrentTitle().nsText

local substring = mw.ustring.sub

local function log(...)
	if namespace == "Module" then
		mw.log(...)
	end
end

--[[
	The number of characters or ideographic sequences that must follow each
	ideographic description character.
]]
local IDchars = {
	 = 2,
	 = 2,
	 = 3,
	 = 3,
	 = 2,
	 = 2,
	 = 2,
	 = 2,
	 = 2,
	 = 2,
	 = 2,
	 = 2,
	
	--[[
	-- in future perhaps: https://www.unicode.org/L2/L2018/18012-irgn2273-four-new-idcs.pdf
	 = 2,
	 = 2,
	 = 1,
	 = 1,
	--]]
}

--[[
	Returns the index in the string where the ideographic description sequence
	(IDS) ends, or the index of the end of the string. Iterates whenever
	another ideographic description character (IDC) is found.
]]
local function findEndOfIDS(text, IDchar, i)
	if not ( text and IDchar and i) then
		return nil
	end
	
	local j = i
	local component = 1
	
	-- Number of components expected after current IDC.
	local components = IDchars
	
	while component <= components do
		j = j + 1
		
		local char = substring(text, j, j)
		
		if char == "" then
			break
		elseif IDchars then
			j = findEndOfIDS(text, char, j)
		end
		
		component = component + 1
	end
	
	--[[
		If the expected number of components has been found,
		return the current index in the text.
	]]
	if component - components == 1 then
		return j
	else
		return nil
	end
end

local function getFromModule(codepoint, start, returnModule)
	--[=[
		The sortkey modules handle two sets of codepoints.
		The first set runs from ]
		to ], then there is a gap
		of 90134 codepoints. The second set runs from
		] to
		].
	]=]
	local moduleStart = {
		 = 1,
		 = 57,
	}
	
	local moduleName = string.format(
		"Module:zh-sortkey/data/%03d",
		( codepoint - start ) / 500 + moduleStart
	)
--	log(codepoint .. ": data module: " .. moduleName)
	
	if returnModule then
		return moduleName
	else
		local success, data = pcall(mw.loadData, moduleName)
		
		if success then
	--		log("success! ... " .. codepoint .. ": " .. tostring(data))
			return data
		else
	--		log("failure: " .. codepoint .. " (" .. mw.ustring.char(codepoint) .. ")")
			return nil
		end
	end
end

function export.getData(char, returnModule)
	if type(char) == "string" then
		char = mw.ustring.codepoint(char)
	elseif type(char) ~= number then
		error("getData must operate on a single character or codepoint.")
	end
	
--	log(char, mw.ustring.char(char))
	
	-- Between first code point of the CJK Unified Ideographs Extension A block
	-- and last assigned code point of the CJK Unified Ideographs block.
	if char >= 0x3400 and char <= 0x9FEA then
		return getFromModule(char, 0x3400, returnModule)
	-- In the Supplementary Ideographic Plane, between the first code point of
	-- the CJK Unified Ideographs Extension B block and the last assigned code
	-- point of the CJK Unified Ideographs Extension F block.
	elseif char >= 0x20000 and char <= 0x2EBE0 then
		return getFromModule(char, 0x20000, returnModule)
	else
--		log("not in range: " .. char .. " (" .. mw.ustring.char(char) .. ")")
	end
	
	return nil
end

function export.makeSortKey(text, lang, sc)
	local allowed_langs = {
		zh = true,
		ja = true,
		ryu = true,
		vi = true,
		za = true,
	}
	if lang and not allowed_langs then
		return text
	end
	
	if sc and sc ~= "Hani" then
		return text
	end
	
	local sort = {}
	
	local i = 1
	while i <= mw.ustring.len(text) do
		local character = substring(text, i, i)
		--[=[
			If we encounter an ideographic description character (IDC),
			find out if it begins a valid ideographic description sequence (IDS).
			
			If the IDS is valid and a sortkey for it is listed in
			], then return
			the sortkey, and move to the next character after the
			IDS.
			
			Otherwise, insert the IDC into the sortkey and move to the next
			character after the IDC.
			
			If the IDS is valid and no sortkey for it is found, track it.
		]=]
		if IDchars then
			local j = findEndOfIDS(text, character, i)
			local IDS, data
			if j then
				IDS = substring(text, i, j)
				data = mw.loadData("Module:zh-sortkey/data/unsupported")
			end
			if not data then
				if IDS then
					require("Module:debug").track("zh-sortkey/IDS-without-sortkey")
					mw.log("ideographic description sequence without sortkey: '"
						.. IDS .. "'")
				else
					require("Module:debug").track("zh-sortkey/invalid-IDS")
					mw.log("invalid ideographic description sequence at the beginning of '"
						.. substring(text, i) .. "'")
				end
			end
			if IDS and data then
				table.insert(sort, data)
				i = j
			else
				table.insert(sort, character)
			end
		else
			table.insert(sort, export.getData(character) or character)
		end
		i = i + 1
	end
	
	sort = table.concat(sort)

	return sort
end

return export