A documentação para este módulo pode ser criada na página Módulo:utilities/doc
local export = {}
local data = mw.loadData("Module:utilities/data")
local notneeded = data.notneeded
local neededhassubpage = data.neededhassubpage
-- A helper function to escape magic characters in a string.
-- Magic characters: ^$()%.*+-?
function export.pattern_escape(text)
if type(text) == "table" then
text = text.args
return (mw.ustring.gsub(text, "(*+%-?])", "%%%1"))
-- A helper function to resolve HTML entities into plaintext.
-- Iterates over entities in a string, and uses the MW decode function. Selectively uses the decodeNamedEntities parameter to save memory where possible.
function export.get_entities(text)
for entity in text:gmatch("&+;") do
if entity:match("^&+;$") and entity ~= "<" and entity ~= ">" and entity ~= "&" and entity ~= """ and entity ~= " " then
text = text:gsub(export.pattern_escape(entity), function(cap1) return mw.text.decode(cap1, true) end)
text = text:gsub(export.pattern_escape(entity), mw.text.decode)
return text
-- A helper function to convert plaintext into HTML entities where these match the characters given in set.
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
function export.make_entities(text, set, raw)
text = not raw and export.get_entities(text) or text
return mw.text.encode(text, set)
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
function export.get_plaintext(text)
-- Remove strip markers and HTML tags.
text = mw.text.unstrip(text)
:gsub("<+>", "")
-- Parse internal links for the display text.
text = text:gsub("%]+)%]%]",
-- These aren't real links.
local falsePositives = {"Category", "File", "Image"}
for _, falsePositive in ipairs(falsePositives) do
if capture:match("^" .. falsePositive .. ":") then return "" end
capture = capture:match("|(.+)") or capture
return capture
-- Parse external links for the display text.
text = text:gsub("%]+)%]",
return capture:match("https?://]+%s(]+)") or ""
-- Strip bold, italics and soft hyphens.
text = text
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("", "")
-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = export.get_entities(text)
return mw.text.trim(text)
function export.plain_gsub(text, pattern, replacement)
local invoked = false
if type(text) == "table" then
invoked = true
if text.args then
local frame = text
local params = {
= {},
= {},
= { allow_empty = true },
local args = require("Module:parameters").process(frame.args, params)
text = args
pattern = args
replacement = args
error("If the first argument to plain_gsub is a table, it should be a frame object.")
if not ( type(pattern) == "string" or type(pattern) == "number" ) then
error("The second argument to plain_gsub should be a string or a number.")
if not ( type(replacement) == "string" or type(replacement) == "number" ) then
error("The third argument to plain_gsub should be a string or a number.")
pattern = export.pattern_escape(pattern)
if invoked then
text = mw.ustring.gsub(text, pattern, replacement)
return text
return mw.ustring.gsub(text, pattern, replacement)
Format the categories with the appropriate sort key. CATEGORIES is a list of
-- LANG is an object encapsulating a language; if nil, the object for
language code 'und' (undetermined) will be used.
-- SORT_KEY is placed in the category invocation, and indicates how the
page will sort in the respective category. Normally this should be nil,
and a default sort key based on the subpage name (the part after the
colon) will be used.
-- SORT_BASE lets you override the default sort key used when SORT_KEY is
nil. Normally, this should be nil, and a language-specific default sort
key is computed from the subpage name (e.g. for Russian this converts
Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF,
so that effectively ё sorts after е instead of the default Wikimedia
sort, which (I think) is based on Unicode sort order and puts ё after я,
the last letter of the Cyrillic alphabet.
-- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing
is output if the page isn't in the main, Appendix:, Reconstruction: or
Citations: namespaces.
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
local title_obj = mw.title.getCurrentTitle()
local allowedNamespaces = {
= true, = true, = true, = true -- (main), Appendix, Citations, Reconstruction
if force_output or allowedNamespaces or title_obj.prefixedText == "Wiktionary:Sandbox" then
local PAGENAME = title_obj.text
local SUBPAGENAME = title_obj.subpageText
if not lang then
lang = require("Module:languages").getByCode("und")
if sort_key ~= "-" then
-- Generate a default sort key
sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc)
if sort_key and sort_key ~= "" then
-- Gather some statistics regarding sort keys
if mw.ustring.upper(sort_key) == sort_base then
table.insert(categories, "Sort key tracking/redundant")
sort_key = sort_base
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- ].
if sort_key == "" then
sort_key = nil
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
sort_key = mw.ustring.upper(sort_base or SUBPAGENAME)
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories = "]"
return table.concat(out_categories, "")
return ""
function export.catfix(lang, sc)
if not lang then
require("Module:debug").track("catfix/no lang")
return nil
elseif type(lang) ~= "table" then
require("Module:debug").track("catfix/lang not table")
return nil
local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
if sc and not sc.getCode then
error('The second argument to the function "catfix" should be a script object from Module:scripts.')
-- To add script classes to links on pages created by category boilerplate templates.
if not sc then
sc = data.catfix_scripts
if sc then
sc = require("Module:scripts").getByCode(sc)
return "<span id=\"catfix\" style=\"display:none;\" class=\"CATFIX-" .. mw.uri.anchorEncode(canonicalName) .. "\">" ..
require("Module:script utilities").tag_text(" ", lang, sc, nil) ..
function export.catfix_template(frame)
local params = {
= {},
= { alias_of = "sc" },
= {},
local args = require("Module:parameters").process(frame:getParent().args, params)
local lang = require("Module:languages").getByCode(args) or require("Module:languages").err(args, 1)
local sc = args.sc
if sc then
sc = require("Module:scripts").getByCode(sc) or error('The script code "' .. sc .. '", provided in the second parameter, is not valid.')
return export.catfix(lang, sc)
-- Not exporting because it is not used yet.
local function getDateTense(frame)
local name_num_mapping = { = 1, = 2, = 3, = 4, = 5, = 6,
= 7, = 8, = 9, = 10, = 11, = 12,
= 1, = 2, = 3, = 4, = 5, = 6, = 7, = 8, = 9, = 10, = 11, = 12}
local month = name_num_mapping]
local date = os.time({year = frame.args, day = frame.args, month = month})
local today = os.time() -- 12 AM/PM
local diff = os.difftime(date, today)
local daylength = 24 * 3600
if diff < -daylength / 2 then return "past"
if diff > daylength / 2 then return "future"
else return "present" end
function export.make_id(lang, str)
--[[ If called with invoke, first argument is a frame object.
If called by a module, first argument is a language object. ]]
local invoked = false
if type(lang) == "table" then
if lang.args then
invoked = true
local frame = lang
local params = {
= {},
= {},
local args = require("Module:parameters").process(frame:getParent().args, params)
local langCode = args
str = args
local m_languages = require("Module:languages")
lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1)
elseif not lang.getCanonicalName then
error("The first argument to make_id should be a language object.")
if not ( type(str) == "string" or type(str) == "number" ) then
error("The second argument to make_id should be a string or a number.")
local id = require("Module:senseid").anchor(lang, str)
if invoked then
return '<li class="senseid" id="' .. id .. '">'
return id
return export