This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.
local m_unicode = require("Module:Unicode data")
local m_uni_alias = require("Module:Unicode data/aliases")
local Array = require("Module:array")
local char_to_script = require("Module:scripts").charToScript
local concat = table.concat
local get_block_range = m_unicode.get_block_range
local get_category_long_name = m_unicode.get_category_long_name
local get_script_alias = m_unicode.get_script_alias
local html_create = mw.html.create
local insert = table.insert
local is_assigned = m_unicode.is_assigned
local lookup_category = m_unicode.lookup_category
local lookup_script = m_unicode.lookup_script
local max = math.max
local min = math.min
local new_title = mw.title.new
local process_params = require("Module:parameters").process
local safe_require = require("Module:utilities").safe_require
local u = require("Module:string utilities").char
local general_category_data = require("Module:Unicode data/category")
local general_category_aliases = general_category_data.long_names
local script_data = require("Module:Unicode data/scripts")
local export = {}
local Unicode_version = "16.0"
-- Large blocks have more than 0x1000 codepoints (1/16 of a plane).
local function is_large_block(block_start, block_end)
return block_end - (block_start - 1) > 0x1000
end
-- Parse the page name to check if parameters can be generated automatically. This works for subpages of Appendix:Unicode, in the format "Appendix:Unicode/Block name". Large blocks are divided into sublists of (up to) 0x1000 characters, which are subpages of the block's page in the format "Appendix:Unicode/Block name/X000", where "X000" is the first codepoint of the sublist.
-- If the current page follows one of these formats, returns a table with the following keys:
-- `name` - the block name
-- `block_start` - the first codepoint in the block
-- `block_end` - the last codepoint in the block
-- If the page is a range subpage, the table will have two additional keys:
-- `range_start` - the first codepoint in the range
-- `range_end` - the last codepoint in the range
local function parse_page_name(title)
title = title and new_title(title) or mw.title.getCurrentTitle()
if title.namespace ~= 100 then
return
end
local base_text, block_name = title.baseText
local is_range_subpage = base_text ~= "Unicode"
-- Appendix:Unicode/Block name/0000.
if is_range_subpage then
local base_title = new_title(base_text, 100)
-- Block name must be a subpage of Appendix:Unicode.
if base_title.baseText ~= "Unicode" then
return
end
block_name = base_title.subpageText
-- Appendix:Unicode/Block name.
else
block_name = title.subpageText
end
local block_start, block_end = get_block_range(block_name)
if not (block_start and block_end) then
return
end
local page_data = {
name = block_name,
block_start = block_start,
block_end = block_end
}
if not is_range_subpage then
return page_data
end
-- Range start must be 4/5/6-digit codepoint.
local raw = title.subpageText
local range_start = tonumber(raw, 16)
page_data.range_start = range_start
if not (
-- Must be a hex number.
range_start and
-- Must be a 4/5/6-digit codepoint.
raw == ("%04X"):format(range_start) and
-- Must be within the block's range.
range_start >= block_start and
range_start <= block_end and
-- Msut be a large block.
is_large_block(block_start, block_end)
) then
return
end
local mod = range_start % 0x1000
-- Must be the start of the block or a X000 codepoint.
if not (range_start == block_start or range_start % 0x1000 == 0) then
return
end
page_data.range_end = min(range_start - mod + 0xFFF, block_end)
return page_data
end
local function get_data_for_code_point_range(block_start, block_end, filterer)
local cps = {}
for cp = block_start, block_end do
if not filterer or filterer(cp) then
local data = {}
data.aliases = m_uni_alias
for _, item in ipairs { "name", "script", "category", "image", "image_emoji" } do
data = m_unicode(cp)
end
data.cp = cp
insert(cps, data)
end
end
return cps
end
function export.block_list_t(frame)
local required_num_param = {required = true, type = "number", allow_hex = true}
local args = process_params(frame:getParent().args, {
= required_num_param,
= required_num_param,
})
local result = {}
local start_codepoint, end_codepoint = args, args
insert(result, "{| class=\"wikitable\" style=\"width: 100%;\"\n! width=\"10%;\" | Start\n! width=\"10%;\" | End\n ! Block name\n")
for _, name, block_start, block_end in m_unicode.enum_blocks() do
if (block_start >= start_codepoint) and (block_end <= end_codepoint) then
insert(result, (
"|-\n|U+%04X\n|U+%04X\n|]\n"
):format(block_start, block_end, name, name))
end
end
insert(result, "|}")
return concat(result)
end
export.show_blocks = export.block_list_t
-- Checks if all codepoints between `block_start` and `block_end` return the same result from a given lookup function, and returns that value if so. Otherwise, returns nil.
local function get_shared_value(page_data, lookup_func)
local i, j, value = page_data.block_start, page_data.block_end
for cp = i, j do
-- Ignore unassigned codepoints.
if is_assigned(cp) then
local cp_value = lookup_func(cp)
if value == nil then
value = cp_value
elseif value ~= cp_value then
return
end
end
end
return value
end
local function navlink(target, display, left_arrow)
return ("]"):format(
target,
left_arrow and "⟵" or display,
left_arrow and display or "⟶"
)
end
local function block_navlink(block_name, left_arrow, subpage)
return block_name and navlink(
("%s../%s"):format(subpage and "../" or "", block_name),
block_name,
left_arrow
) or ""
end
local function subpage_navlink(range_start, range_end, left_arrow)
return range_start and range_end and navlink(
("../%04X"):format(range_start),
("U+%04X to U+%04X"):format(range_start, range_end),
left_arrow
) or ""
end
function export.char_list_header_t(frame)
local parent = frame:getParent()
local pagename = process_params((parent and parent:getTitle() ~= mw.title.getCurrentTitle().fullText and parent or frame).args, {
= {demo = true},
= true,
}).pagename
local page_data = parse_page_name(pagename)
if not page_data then
error("Page is not a valid subpage of ].")
end
local name = page_data.name
local prev_block, next_block, found
for _, block_name in m_unicode.enum_blocks() do
if block_name == name then
found = true
elseif found then
next_block = block_name
break
else
prev_block = block_name
end
end
local block_start, block_end = page_data.block_start, page_data.block_end
local range_start, range_end = page_data.range_start, page_data.range_end
local page_type = is_large_block(block_start, block_end) and (
range_start and "range subpage" or "large block"
)
local heading = html_create("td")
:addClass("unicode-header-heading")
:tag("h2")
:wikitext(name)
:done()
if page_type == "range subpage" then
heading = heading:attr("rowspan", 2)
end
local tbl = html_create("table")
:addClass("unicode-header-table")
:tag("tr")
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-left")
:wikitext(block_navlink(prev_block, true, page_type == "range subpage"))
:done()
:node(heading)
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-right")
:wikitext(block_navlink(next_block, false, page_type == "range subpage"))
:allDone()
if page_type == "range subpage" then
tbl = tbl:tag("tr")
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-left")
:wikitext(subpage_navlink(
range_start ~= block_start and max(block_start, range_start - 0x1000),
range_start - 1,
true
))
:done()
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-right")
:wikitext(subpage_navlink(
range_end + 1,
range_end ~= block_end and min(block_end, range_end + 0x1000),
false
))
:allDone()
end
local text = html_create():node(tbl)
local div = html_create("div"):wikitext("This page lists ")
if page_type == "range subpage" then
div = div:wikitext(("code points U+%04X to U+%04X from "):format(range_start, range_end))
else
div = div:wikitext("the characters in ")
end
div = div:wikitext(("the block of the ] standard (version %s), which covers code points from U+%04X to U+%04X."):format(
block_start, name, Unicode_version, block_start, block_end
))
text:node(div)
local properties = {}
local general_category = get_shared_value(page_data, lookup_category)
if general_category then
insert(properties, {
"Unicode character property",
"General_Category",
general_category,
get_category_long_name(general_category)
})
end
local script = get_shared_value(page_data, lookup_script)
if script then
local property = {
"Unicode script",
"Script",
script,
}
local alias = get_script_alias(script)
local script_obj = require("Module:scripts").getByCode(script)
insert(property, script_obj and ("]"):format(script_obj:getWikipediaArticle(), alias) or alias)
insert(properties, property)
end
if #properties > 0 then
div = div:wikitext(" All assigned characters in this block have ")
local list = {}
for i = 1, #properties do
local property = properties
insert(list, tostring(html_create()
:wikitext("the ")
:tag("code")
:wikitext(("]"):format(property, property))
:done()
:wikitext(" value of")
:tag("code")
:wikitext(property)
:done()
:wikitext(" (")
:tag("code")
:wikitext(property)
:done()
:wikitext(")")
))
end
div = div:wikitext(mw.text.listToText(list))
end
div = div:wikitext(".")
if page_type == "large block" then
local list = html_create("ul")
local r_start, n = block_start - block_start % 0x1000, 0
while r_start <= block_end do
n = n + 1
local r_end = r_start + 0xFFF
local actual_start, actual_end = max(block_start, r_start), min(block_end, r_end)
list = list:tag("li")
:wikitext(("]"):format(
actual_start, actual_start, u(actual_start), actual_end, u(actual_end))
)
:done()
r_start = r_end + 1
end
text = text:tag("div")
:wikitext(("Due to the size of the block, the list has been split across %s pages:"):format(n))
:node(list)
:done()
end
return tostring(text) .. require("Module:utilities").format_categories({
"Unicode blocks", name .. " block"},
nil,
name
) .. frame:extensionTag("templatestyles", nil, {src = "Module:character list/styles.css"})
end
export.show_header = export.char_list_header_t
function export.char_list_t(frame)
local parent = frame:getParent()
local num_param = {type = "number", allow_hex = true}
local args = process_params((parent and parent:getTitle() ~= mw.title.getCurrentTitle().fullText and parent or frame).args, {
= num_param,
= num_param,
= {convert = function(block_name, err)
local block_start, block_end = get_block_range(block_name)
if not (block_start and block_end) then
err("Invalid Unicode block specified")
end
return {
block_start = block_start,
block_end = block_end
}
end},
= {demo = true}
})
local result = {}
local block, range_start, range_end = args.block
if block then
range_start, range_end = block.block_start, block.block_end
elseif args and args then
range_start, range_end = args, args
else
local page_data = parse_page_name(args.pagename)
if not page_data then
error("Must give a Unicode block or character range")
end
range_start = page_data.range_start or page_data.block_start
range_end = page_data.range_end or page_data.block_end
end
local function present_codepoint(codepoint)
if not m_unicode.is_printable(codepoint) then
local character = u(codepoint)
local text = "<small>(unprintable)</small>"
if new_title(character) then
return "]"
else
return text
end
end
local link_target = m_unicode.get_entry_title(codepoint)
local display = ("%s&#%u;"):format(m_unicode.is_combining(codepoint) and "◌" or "", codepoint)
if m_unicode.is_whitespace(codepoint) then
display = "]" .. display .. "["
end
return
(link_target and "]"
or "<!-- %s --><span class=\"character-sample %s\">%s</span>"):format(
link_target or "", char_to_script(codepoint), display
)
end
local cps = get_data_for_code_point_range(range_start, range_end, is_assigned)
local emoji_image_exists = false
local submodule = math.floor(range_start / 0x1000)
local image_module = ("Module:Unicode data/images/%03X"):format(submodule)
local emoji_image_module = ("Module:Unicode data/emoji images/%03X"):format(submodule)
if safe_require(emoji_image_module) then
for _, data in ipairs(cps) do
if data.image_emoji then
emoji_image_exists = true
break
end
end
end
insert(result, [=[
{| class="wikitable sortable"
! width="12%" data-sort-type="number" | Code point
]=]
)
if emoji_image_exists then
insert(result, [=[
! width="5%" | Text-style image<br><sup>=] .. image_module .. ]</sup>
! width="5%" | Emoji-style image<br><sup>=] .. emoji_image_module .. ]</sup>
]=]
)
else
insert(result, [=[
! width="5%" | Image<br><sup>=] .. image_module .. ]</sup>
]=]
)
end
insert(result, [=[
! width="5%" | Character
]=]
)
local all_with_same_general_category = Array(cps)
:all(function(data) return data.category == cps.category end)
local all_with_same_script = Array(cps)
:all(function(data) return data.script == cps.script end)
if not all_with_same_general_category then
insert(result, " ! ]\n")
end
if not all_with_same_script then
insert(result, " ! ]\n")
end
insert(result, " ! Name\n")
for _, data in ipairs(cps) do
local alt_names = ""
local cp = data.cp
if data.aliases then
local aliases = {
= {},
= {},
= {},
= {},
= {},
}
for _, info in ipairs(data.aliases) do
insert(aliases], "<small>" .. info .. "</small>")
end
for _, name in ipairs(aliases.alternate) do
alt_names = alt_names .. (" aka %s"):format(name)
end
if #aliases.control > 0 then
alt_names = alt_names .. "; control character name: " .. concat(aliases.control, " or ")
end
for _, name in ipairs(aliases.correction) do
alt_names = alt_names .. ("<br/>Corrected name: %s"):format(name)
end
for _, name in ipairs(aliases.figment) do
alt_names = alt_names .. ("<br/>Figment name: %s"):format(name)
end
if #aliases.abbreviation > 0 then
alt_names = alt_names .. " (" .. concat(aliases.abbreviation, ", ") .. ")"
end
end
local current_image, current_image_emoji
if data.image then
current_image = ("]"):format(data.image)
else
current_image = ""
end
if emoji_image_exists then
if data.image_emoji then
current_image_emoji = ("]"):format(data.image_emoji)
else
current_image_emoji = ""
end
end
insert(result, (
" |- id=\"U-%04X\"\n" ..
" | data-sort-value=\"%u\" | U+%04X <small>(%u)</small>\n" ..
" | %s \n"
):format(
cp, cp, cp, cp,
current_image
))
if emoji_image_exists then
insert(result, (
" | %s \n"
):format(
current_image_emoji
))
end
insert(result, (
" | %s \n"
):format(
present_codepoint(cp),
data.category .. "<br />(" ..
general_category_aliases:gsub("_", " ") ..
")"
))
if not all_with_same_general_category then
insert(result, (" | %s<br />(%s) \n")
:format(
data.category,
general_category_aliases:gsub("_", " ")))
end
if not all_with_same_script then
insert(result, (" | %s<br />(%s) \n")
:format(
data.script,
script_data.aliases))
end
insert(result, (" | <small>%s</small>%s\n")
:format(
mw.text.nowiki(data.name),
alt_names))
end
insert(result,
" |}"
)
insert(result, require("Module:TemplateStyles")("Template:character info/style.css"))
return concat(result)
end
export.show = export.char_list_t
return export