local export = {}
local error = error
local find = string.find
local gline = require("Module:string/gline")
local gsub = string.gsub
local insert = table.insert
local match = string.match
local split = require("Module:string utilities").split
local sub = string.sub
local tonumber = tonumber
local trim = require("Module:string utilities").trim
local type = type
local u = require("Module:string/char")
local unpack = unpack
local function get_UnicodeData()
local base_pagename = "Module:User:Theknightwho/UnicodeData.txt"
return require(base_pagename .. "/1") .. "\n" .. require(base_pagename .. "/2")
end
local function code_to_char(code)
return u(tonumber(code, 16))
end
local function mapping_to_chars(str)
return (gsub(str, "(+) *", code_to_char))
end
local function iterate_data_lines(data, ...)
local iter, line = gline(data)
local keys_n, keys = select("#", ...)
if keys_n > 0 then
keys, keys_n = {1, ...}, keys_n + 1
end
return function()
repeat
line = iter()
if line == nil then
return
end
local comment = find(line, "#", nil, true)
if comment then
line = sub(line, 1, comment - 1)
end
line = trim(line)
until #line > 0
local fields = split(line, "%s*;%s*", true)
local cp = fields
-- Codepoint is a range.
if find(cp, "..", nil, true) then
cp = split(cp, "..", true, true)
local cp_len = #cp
if cp_len > 2 then
error("Bad line: " .. line)
end
for i = 1, cp_len do
cp = tonumber(cp, 16) or error("Bad line: " .. line)
end
fields = cp
else
fields = tonumber(cp, 16) or error("Bad line: " .. line)
end
for i = 2, #fields do
local field = trim(fields)
fields = field ~= "" and field or nil
end
if not keys then
return unpack(fields)
end
local specific_fields = {}
for i = 1, keys_n do
specific_fields = fields]
end
return unpack(specific_fields)
end
end
local function insert_codepoints(data, cp, val)
if type(cp) ~= "table" then
data = val
return
end
for i = cp, cp do
data = val
end
end
function export.Decomposition_Mapping(include_compatibility)
local data = {}
for cp, decomp in iterate_data_lines(get_UnicodeData(), 6) do
if decomp then
local tp, mapping = match(decomp, "^<(.*)> ?(.*)")
if not tp then
insert_codepoints(data, cp, mapping_to_chars(decomp))
elseif include_compatibility then
insert_codepoints(data, cp, mapping_to_chars(mapping))
end
end
end
return data
end
function export.DerivedCombiningClass()
local data = {}
for cp, val in iterate_data_lines(require("Module:Unicode data/raw/DerivedCombiningClass.txt")) do
val = tonumber(val)
if val ~= 0 then
insert_codepoints(data, cp, val)
end
end
return data
end
-- Changes_When_NFKC_Casefolded: 10554
-- Full_Composition_Exclusion: 1120
-- NFC_QC: 1252
-- NFD_QC: 13253
-- NFKC_QC: 5096
-- NFKD_QC: 17085
-- NFKC_CF: 10554
-- NFKC_SCF: 10516
function export.DerivedNormalizationProps(f)
local data = {}
for cp, field, val in iterate_data_lines(require("Module:Unicode data/raw/DerivedNormalizationProps.txt")) do
if field == f then
insert_codepoints(data, cp, val or true)
end
end
return data
end
return export