Module:Unicode data/patterns

Hello, you have come here looking for the meaning of the word Module:Unicode data/patterns. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:Unicode data/patterns, but we will also tell you about its etymology, its characteristics and you will know how to say Module:Unicode data/patterns in singular and plural. Everything you need to know about the word Module:Unicode data/patterns you have here. The definition of the word Module:Unicode data/patterns will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:Unicode data/patterns, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

Generates patterns for Lua patterns or regular expressions, to be put inside set notation: or .

{{#invoke:Unicode data/patterns|make_pattern|module=Module:Unicode data submodule|value=value to look for in singles and ranges}}

Pattern for Latin script as defined by Unicode
{{#invoke:Unicode data/patterns|make_pattern|module=scripts|value=Latn}}: ⅎⁱⁿℲªºꟓA-Za-zÀ-ÖØ-öø-ʸˠ-ˤᴀ-ᴥᴬ-ᵜᵢ-ᵥᵫ-ᵷᵹ-ᶾḀ-ỿₐ-ₜK-ÅⅠ-ↈⱠ-ⱿꜢ-ꞇꞋ-ꟍꟐ-ꟑꟕ-Ƛꟲ-ꟿꬰ-ꭚꭜ-ꭤꭦ-ꭩff-stA-Za-z𐞀-𐞅𐞇-𐞰𐞲-𐞺𝼀-𝼞𝼥-𝼪
Pattern for "Common" script as defined by Unicode
{{#invoke:Unicode data/patterns|make_pattern|module=scripts|value=Zyyy}}: ·،㋿〆ーꧏ؛۝؟฿ʹ㇯×÷࣢;🟰󠀁᳓ـ꤮𝒻᳡𝒢꭛𝕆΅゠ᳺ᠅჻؅ -@[-`{-©«-¹»-¿ʹ-˟˥-˩ˬ-˿।-॥࿕-࿘᛫-᛭᜵-᜶᠂-᠃ᳩ-ᳬᳮ-ᳳᳵ-᳷ -​‎-⁤⁦-⁰⁴-⁾₀-₎₠-⃀℀-℥℧-℩ℬ-ℱℳ-⅍⅏-⅟↉-↋←-␩⑀-⑊①-⟿⤀-⭳⭶-⮕⮗-⯿⸀-⹝⿰-〄〈-〠〰-〷〼-〿゛-゜・-ー㆐-㆟㇀-㇥㈠-㉟㉿-㋏㍘-㏿䷀-䷿꜀-꜡ꞈ-꞊꠰-꠹꭪-꭫﴾-﴿︐-︙︰-﹒﹔-﹦﹨-﹫!-@[-`{-・゙-゚¢-₩│-○-�𐄀-𐄂𐄇-𐄳𐄷-𐄿𐆐-𐆜𐇐-𐇼𐋡-𐋻𛲠-𛲣𜰀-𜳹𜴀-𜺳𜽐-𜿃𝀀-𝃵𝄀-𝄦𝄩-𝅦𝅪-𝅺𝆃-𝆄𝆌-𝆩𝆮-𝇪𝋀-𝋓𝋠-𝋳𝌀-𝍖𝍠-𝍸𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕊-𝕐𝕒-𝚥𝚨-𝟋𝟎-𝟿𞱱-𞲴𞴁-𞴽🀀-🀫🀰-🂓🂠-🂮🂱-🂿🃁-🃏🃑-🃵🄀-🆭🇦-🇿🈁-🈂🈐-🈻🉀-🉈🉐-🉑🉠-🉥🌀-🛗🛜-🛬🛰-🛼🜀-🝶🝻-🟙🟠-🟫🠀-🠋🠐-🡇🡐-🡙🡠-🢇🢐-🢭🢰-🢻🣀-🣁🤀-🩓🩠-🩭🩰-🩼🪀-🪉🪏-🫆🫎-🫜🫟-🫩🫰-🫸🬀-🮒🮔-🯹󠀠-󠁿
Pattern for titlecase letters as defined by Unicode
{{#invoke:Unicode data/patterns|make_pattern|module=category|value=Lt}}: DžLjNjDzᾼῌῼᾈ-ᾏᾘ-ᾟᾨ-ᾯ

local export = {}
local Array = require "Module:array"

local unpack = unpack or table.unpack -- Lua 5.2 compatibility

local function numeric_character_reference(code_point)
	return ("&#x%04X;"):format(code_point)
end

function export.all_ranges_per_value(data_module)
	local value_to_ranges = setmetatable({}, {
		__index = function(self, key)
			local value = Array()
			self = value
			return value
		end,
	})

	for code_point, value in pairs(data_module.singles) do
		value_to_ranges:insert { code_point, code_point }
	end

	for _, range in ipairs(data_module.ranges) do
		local low, high, value = unpack(range)
		value_to_ranges:insert { low, high }
	end
	
	return value_to_ranges
end

function export.ranges_per_value(data_module, value_to_find)
	local ranges = Array()

	for code_point, value in pairs(data_module.singles) do
		if value == value_to_find then
			ranges:insert { code_point, code_point }
		end
	end

	for _, range in ipairs(data_module.ranges) do
		local low, high, value = unpack(range)
		if value == value_to_find then
			ranges:insert { low, high }
		end
	end
	
	return ranges
end

local function sort_ranges(ranges)
	table.sort(
		ranges,
		function (a, b)
			return a < b
		end)
end

-- Makes a pattern suitable to put inside  or 
-- in a Lua pattern or regular expression.
local function make_pattern(ranges, char_ref)
	local output = Array()
	
	for _, range in ipairs(ranges) do
		if char_ref then
			output:insert(numeric_character_reference(range))
		else
			output:insert(mw.ustring.char(range))
		end
		if range ~= range then
			output:insert "-"
			if char_ref then
				output:insert(numeric_character_reference(range))
			else
				output:insert(mw.ustring.char(range))
			end
		end
	end
	
	return output:concat()
end

-- Assumes ranges are sorted and that only one range has bad characters.
-- Treats all characters U+0000-U+001F as invalid in wikitext, but only some are.
local function sanitize_ranges(ranges)
	for i, range in ipairs(ranges) do
		if 0 <= range and range <= 0x1F then
			if 0 <= range and range <= 0x1F then
				table.remove(ranges, i)
				break
			else
				range = 0x20
			end
		end
	end
end

function export.make_pattern(frame)
	local module_name = frame.args.module
	if not module_name then
		error("Provide name of submodule of Module:Unicode data in |module= parameter.")
	end
	
	local value = frame.args.value
	if not value then
		error("Provide value to search for in |value= parameter.")
	end
	
	local ranges = export.ranges_per_value(require("Module:Unicode data/" .. module_name), value)
	
	sanitize_ranges(ranges)
	
	return make_pattern(ranges, false)
end

function export.show_all_patterns(frame)
	local module_name = frame.args.module
	if not module_name then
		error("Provide name of submodule of Module:Unicode data in |module=.")
	end
	local value_to_ranges = export.all_ranges_per_value(require("Module:Unicode data/" .. module_name))
	
	for _, ranges in pairs(value_to_ranges) do
		sort_ranges(ranges)
	end
	
	local output = Array()
	for value, ranges in require "Module:table".sortedPairs(value_to_ranges) do
		output:insert("\n* " .. value .. ": ")
		output:insert "<code>"
		output:insert(make_pattern(ranges, true))
		output:insert "</code>"
	end
	
	return output:concat()
end

return export