Module:User:Theknightwho/UCA

Hello, you have come here looking for the meaning of the word Module:User:Theknightwho/UCA. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Theknightwho/UCA, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Theknightwho/UCA in singular and plural. Everything you need to know about the word Module:User:Theknightwho/UCA you have here. The definition of the word Module:User:Theknightwho/UCA will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Theknightwho/UCA, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local char = string.char
local concat = table.concat
local floor = math.floor
local insert = table.insert
local ipairs = ipairs
local split = require("Module:string utilities").split
local sub = string.sub
local tonumber = tonumber

--[==[local skipped_ranges = {}
do
	local hex = require("hex").to_hex
	local udata = require("Module:User:Theknightwho/UnicodeData.txt")
	
	local last = 0
	for line in udata:gmatch("+") do
		local cp = line:match("^%x+")
		local a = cp
		if cp then
			cp = tonumber(cp, 16)
			if cp - last > 0x1000 then
				insert(skipped_ranges, {hex(last), hex(cp - 1)})
			end
			last = cp
		end
	end
	return skipped_ranges
end]==]



local ducet = require("Module:User:Theknightwho/UCA/DUCET")

local implicit_ranges = {
	{0x3400, 0x4DBF}, -- CJK Unified Ideographs Extension A
	{0x4E00, 0x9FFF}, -- CJK Unified Ideographs
	{0xAC00, 0xD7AF}, -- Hangul Syllables
	{0xD800, 0xF8FF}, -- Surrogates, Private Use Area
	{0x12550, 0x12F8F}, -- Unassigned
	{0x13460, 0x143FF}, -- Unassigned
	{0x14680, 0x167FF}, -- Unassigned
	{0x17000, 0x1AFEF}, -- Tangut, Tangut Components, Khitan Small Script, Tangut Supplement, Unassigned
	{0x1B170, 0x1BBFF}, -- Nushu, Unassigned
	{0x1BCB0, 0x1CEFF}, -- Unassigned
	{0x1DAB0, 0x1DEFF}, -- Unassigned
	{0x20000, 0x2A6DF}, -- CJK Unified Ideographs Extension B
	{0x2A700, 0x2B73F}, -- CJK Unified Ideographs Extension C
	{0x2B740, 0x2B81F}, -- CJK Unified Ideographs Extension D
	{0x2B820, 0x2CEAF}, -- CJK Unified Ideographs Extension E
	{0x2CEB0, 0x2EBEF}, -- CJK Unified Ideographs Extension F
	{0x2EBF0, 0x2EE5F}, -- CJK Unified Ideographs Extension I
	{0x30000, 0x3134F}, -- CJK Unified Ideographs Extension G
	{0x31350, 0x323AF}, -- CJK Unified Ideographs Extension H
}

local export = {}

do
	local escapes = {
		 = "\\a",  = "\\b",  = "\\t",
		 = "\\n",  = "\\v",  = "\\f",
		 = "\\r",  = "\\\"",  = "\\\\"
	}
	
	local ranges
	
	local function base_256(w)
		w = tonumber(w, 16)
		return char(w / 0x100) .. char(w % 0x100)
	end
	
	local function process_line(line, plane, output)
		-- Get the codepoint(s), and return if not found.
		local cp = line:match("^+%f")
		if not cp then
			return
		end
		-- If there is more than one, (TODO)
		cp = split(cp, " ")
		if #cp > 1 then
			-- TODO
			return
		end
		-- Check this is the correct plane, and return if not. Planes range from 0x0 to 0x10 (17 in total), and each has 0x10000 characters, from U+(X)0000 to U+(X)FFFF.
		cp = tonumber(cp, 16)
		if floor(cp / 0x10000) ~= plane then
			return
		end
		-- Normalize codepoint by removing the plane.
		cp = cp % 0x10000
		-- Get the weights and convert each the first two weights (ranging from 0x0000 to 0xFFFF) to 2-digit base-256 and store each digit as the corresponding character (e.g. 0xFFFD is "\255" and "\253"). The final weight can be stored as 1 digit, because it only ranges from 0x0000 to 0x001F.
		local weights = {}
		for var, w1, w2, w3 in line:gmatch("%)(%x+)%.(%x+)%.(%x+)%]") do
			insert(weights, var .. base_256(w1) .. base_256(w2) .. char(tonumber(w3, 16)))
		end
		-- If there is more than one set, (TODO)
		if #weights > 1 then
			-- TODO
			return
		else
			weights = weights
		end
		for i = 1, #weights do
			output = sub(weights, i, i)
		end
	end
	
	function export.weights(plane)
		if not plane then
			error("Please enter a plane.")
		end
		-- Collate the implicit ranges for this plane (if any).
		ranges = {}
		for _, range in ipairs(implicit_ranges) do
			if floor(range / 0x10000) == plane then
				insert(ranges, range)
			end
		end
		local output = {}
		for line in ducet:gmatch("+") do
			process_line(line, plane, output)
		end
		-- Fill in any blanks with zeroes.
		for i = 1, 0x60000 do
			output = output or "\0"
		end
		for i = #output, 1, -1 do
			local b = output:byte()
			if b > 0x7E then
				output = "\\" .. b
			elseif b < 0x07 or b > 0x0D and b < 0x20 then
				local nxt = output
				if nxt and nxt:match("^%d$") then
					b = ("%03d"):format(b)
				end
				output = "\\" .. b
			else
				output = escapes or output
			end
		end
		return concat(output)
	end
end

return export