Módulo:String

Documentación del módulo
Este módulo proporciona acceso a funciones básicas sobre cadenas (strings).
La mayoría de estas funciones se pueden invocar con parámetros con nombre, sin nombre o una mezcla. Si se usan parámetros con nombre hay que tener en cuenta que el Mediawiki elimina los espacios en blanco iniciales y finales del valor del parámetro. Según el uso previsto puede ser conveniente tanto conservarlos como eliminarlos.
Algunas funciones admiten parámetros con patrones Lua que son una especie de expresiones regulares. Véase en el Manual de patrones Ustring.
Esta documentación está transcluida desde Módulo:String/doc.
Los editores pueden experimentar en la zona de pruebas de este módulo.
Por favor, añade las categorías e interwikis a la subpágina de documentación. Subpáginas de este módulo.
local export = {}
local unpack = unpack or table.unpack
--tomado de en.wiktionary.orghttps://dictious.com/es/Module:string y en.wiktionary.orghttps://dictious.com/es/Module:string utilities

-- Cannot include null byte.
local UTF8_char = "*" -- o bien "*"
local pattern_escape = require("Módulo:String/escapar")

local mw = mw
local string = string
local table = table
local ustring = mw.ustring

local byte = string.byte
local char = string.char
local concat = table.concat
local find = string.find
local format = string.format
local gmatch = string.gmatch
local gsub = string.gsub
local insert = table.insert
local len = string.len
local load_data = mw.loadData
local lower = string.lower
local match = string.match
local next = next
local reverse = string.reverse
local select = select
local sort = table.sort
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local type = type
local ucodepoint = ustring.codepoint
local ufind = ustring.find
local ugcodepoint = ustring.gcodepoint
local ugmatch = ustring.gmatch
local ugsub = ustring.gsub
local ulower = ustring.lower
local umatch = ustring.match
local unpack = unpack
local upper = string.upper
local usub = ustring.sub
local uupper = ustring.upper

--==]
function export.explode_utf8(str)
	local byte = string.byte
	local sub = string.sub
	
	local str_len = #str
	local text = {}
	local n, i, b = 1, 0, nil
	
	while n <= str_len do
		b = byte(str, n)
		i = i + 1
		if b < 0xC0 then
			text = sub(str, n, n)
			n = n + 1
		elseif b < 0xE0 then
			text = sub(str, n, n + 1)
			n = n + 2
		elseif b < 0xF0 then
			text = sub(str, n, n + 2)
			n = n + 3
		else
			text = sub(str, n, n + 3)
			n = n + 4
		end
	end
	
	return text
end

-- A helper function which takes a string, position and type ("byte" or "char"), and returns the equivalent position for the other type (e.g. iterate_utf8("字典", 2, "char") returns 4, because character 2 of "字典" begins with byte 4). `pos` can be positive or negative, and the function will iterate over the string forwards or backwards (respectively) until it reaches the input position. Checks byte-by-byte; skipping over trailing bytes, and then calculating the correct byte trail for any leading bytes (i.e. how many trailing bytes should follow); these trailing bytes are then checked together.
-- The optional parameters `init_from_type` and `init_to_type` can be used to start part-way through an iteration to improve performance, if multiple values need to be returned from the same string. For example, iterate_utf8("слова́рь", 11, "byte", 5, 3) will begin checking at byte 5/the start of character 3. Note: The function won't check if these values match each other (as the only way to do this would be to run the iteration from the beginning), so mismatched values will return incorrect results.
local function iterate_utf8(text, pos, from_type, init_from_type, init_to_type)
	-- Position 0 is always valid and never changes.
	if pos == 0 then
		return pos
	end
	
	local to_type
	if from_type == "char" then
		to_type = "byte"
	else
		to_type = "char"
	end
	
	-- Positive positions iterate forwards; negative positions iterate backwards.
	local iterate_val
	if pos > 0 then
		iterate_val = 1
	else
		iterate_val = -1
	end
	
	-- Adjust init_from_type and init_to_type to the iteration before, so that matches for the position given by them will work.
	local trail, cp, min, b = 0, nil, nil, nil
	local c, leading_byte = {}, nil
	c = init_from_type and init_from_type ~= 0 and init_from_type - iterate_val or 0
	c = init_to_type and init_to_type ~= 0 and init_to_type - iterate_val or 0
	
	while true do
		if pos > 0 then
			b = text:byte(c.byte + 1)
		else
			b = text:byte(text:len() + c.byte)
		end
		-- Position byte doesn't exist, so iterate the return value and return it.
		if not b then
			return c + iterate_val
		elseif b < 0x80 then
			-- 1-byte codepoint, 00-7F.
			trail = 0
			cp = b
			min = 0
			leading_byte = true
		elseif b < 0xc0 then
			-- A trailing byte.
			leading_byte = false
		elseif b < 0xc2 then
			-- An overlong encoding for a 1-byte codepoint.
			error("String " .. text .. " is not UTF-8.")
		elseif b < 0xe0 then
			-- 2-byte codepoint, C2-DF.
			trail = 1
			cp = b - 0xc0
			min = 0x80
			leading_byte = true
		elseif b < 0xf0 then
			-- 3-byte codepoint, E0-EF.
			trail = 2
			cp = b - 0xe0
			min = 0x800
			leading_byte = true
		elseif b < 0xf4 then
			-- 4-byte codepoint, F0-F3.
			trail = 3
			cp = b - 0xf0
			min = 0x10000
			leading_byte = true
		elseif b == 0xf4 then
			-- 4-byte codepoint, F4.
			-- Make sure it doesn't decode to over U+10FFFF.
			if text:byte(c.byte + 2) > 0x8f then
				error("String " .. text .. " is not UTF-8.")
			end
			trail = 3
			cp = 4
			min = 0x100000
			leading_byte = true
		else
			-- Codepoint over U+10FFFF, or invalid byte.
			error("String " .. text .. " is not UTF-8.")
		end
		
		-- Check subsequent bytes for multibyte codepoints.
		if leading_byte then
			local from, to
			if pos > 0 then
				from, to = c.byte + 2, c.byte + 1 + trail
			else
				from, to = text:len() + c.byte + 1, text:len() + c.byte + trail
			end
			for trailing_byte = from, to do
				b = text:byte(trailing_byte)
				if not b or b < 0x80 or b > 0xbf then
					error("String " .. text .. " is not UTF-8.")
				end
				cp = cp * 0x40 + b - 0x80
			end
			local next_byte = text:byte(to + 1)
			if next_byte and next_byte >= 0x80 and next_byte <= 0xbf then
				-- Too many trailing bytes.
				error("String " .. text .. " is not UTF-8.")
			elseif cp < min then
				-- Overlong encoding.
				error("String " .. text .. " is not UTF-8.")
			end
		end
		
		c.byte = c.byte + iterate_val
		if leading_byte then
			c.char = c.char + iterate_val
		end
		
		if c == pos then
			return c
		end
	end
end

--==]
function export.corregirSecuenciasIncorrectas(text, sc)
	if sc._rawData.normalizationFixes and sc._rawData.normalizationFixes.from then
		for i, from in ipairs(sc._rawData.normalizationFixes.from) do
			text = export.gsub(text, from, sc._rawData.normalizationFixes.to or "")
		end
	end
	return text
end

-- Implements a modified form of Unicode normalization for instances where there are identified deficiencies in the default Unicode combining classes.
local function fixNormalization(text, sc)
	if sc._rawData.normalizationFixes and sc._rawData.normalizationFixes.combiningClasses then
		local combiningClassFixes = sc._rawData.normalizationFixes.combiningClasses
		local charsToFix = concat(require("Módulo:tabla").keysToList(combiningClassFixes))
		if export.match(text, "") then
			local codepoint, u = mw.ustring.codepoint, mw.ustring.char
			-- Obtain the list of default combining classes.
			local combiningClasses = mw.loadData("Módulo:scripts/datos/combiningClasses")
			-- For each character that needs fixing, find all characters with combining classes equal to or lower than its default class, but greater than its new class (i.e. intermediary characters).
			for charToFix, newCombiningClass in pairs(combiningClassFixes) do
				local intermediaryChars = {}
				for character, combiningClass in pairs(combiningClasses) do
					if newCombiningClass < combiningClass and combiningClass <= combiningClasses then
						insert(intermediaryChars, u(character))
					end
				end
				-- Swap the character with any intermediary characters that are immediately before it.
				text = export.gsub(text, "(+)(" .. charToFix .. ")", "%2%1")
			end
		end
	end
	return text
end


--==]
function export.char(cod)
	return mw.ustring.char(cod)	
end

function export.toNFD(cod, sc)
	local text = mw.ustring.toNFD(cod)
	if sc then
		return fixNormalization(text, sc)
	end
	return text
end

function export.toNFC(cod, sc)
	local text = mw.ustring.toNFC(cod)	
	if sc then
		return fixNormalization(text, sc)
	end
	return text
end

function export.toNFKD(cod, sc)
	local text = mw.ustring.toNFKD(cod)
	if sc then
		return fixNormalization(text, sc)
	end
	return text
end

function export.toNFKC(cod, sc)
	local text = mw.ustring.toNFKC(cod)	
	if sc then
		return fixNormalization(text, sc)
	end
	return text
end

--==]
function export.charsToBytes(text, c)
	return iterate_utf8(text, c, "char")
end

--==]
function export.bytesToChars(text, pos)
	local byte_ = text:byte(pos)
	if byte_ and byte_ >= 0x80 and byte_ <= 0xbf then
		error("Byte " .. pos .. " is not a leading byte.")
	end
	return iterate_utf8(text, pos, "byte")
end

-- A helper function which iterates through a pattern, and returns two values: a potentially modified version of the pattern, and a boolean indicating whether the returned pattern is simple (i.e. whether it can be used with the stock string library); if not, then the pattern is complex (i.e. it must be used with the ustring library, which is much more resource-intensive).
local function patternSimplifier(text, pattern, plain)
	-- If `plain` is set, then the pattern is treated as literal (so is always simple). Only used by find.
	if plain then
		return pattern, true
	--If none of these are present, then the pattern has to be simple.
	elseif not (
		pattern:match("%.-%]") or
		pattern:match("") or
		pattern:match("%%") or
		pattern:match("%]+%]") or
		pattern:match("%.") or
		pattern:match("%.$") or
		pattern:match("%%b.?") or
		pattern:match("()", 1, true)
	) then
		return pattern, true
	end
	-- Otherwise, the pattern could go either way.
	-- Build up the new pattern in a table, then concatenate at the end. we do it this way, as occasionally entries get modified along the way.
	local new_pattern = {}
	local L, pos, b = pattern:len(), 0, nil
	local char_, next_char

	-- `escape` and `balanced` are counters, which ensure the effects of % or %b (respectively) are distributed over the following bytes.
	-- `set` is a boolean that states whether the current byte is in a charset.
	-- `capture` keeps track of how many layers of capture groups the position is in, while `captures` keeps a tally of how many groups have been detected (due to the string library limit of 32).
	local escape, set, balanced, capture, captures = 0, false, 0, 0, 0

	while pos < L do
		pos = pos + 1
		b = pattern:byte(pos)
		if escape > 0 then escape = escape - 1 end
		if balanced > 0 then balanced = balanced - 1 end
		char_ = next_char or pattern:sub(pos, pos)
		next_char = pattern:sub(pos + 1, pos + 1)
		if escape == 0 then
			if char_ == "%" then
				-- Apply % escape.
				if next_char == "." or next_char == "%" or next_char == "" then
					escape = 2
					if balanced > 0 then balanced = balanced + 1 end
				-- These charsets make the pattern complex.
				elseif next_char:match("") then
					return pattern, false
				-- This is "%b".
				elseif next_char == "b" then
					balanced = 4
				end
			-- Enter or leave a charset.
			elseif char_ == "[" then
				set = true
			elseif char_ == "]" then
				set = false
			elseif char_ == "(" then
				capture = capture + 1
			elseif char_ == ")" then
				if capture > 0 and set == false and balanced == 0 then
					captures = captures + 1
					capture = capture - 1
				end
			end
		end
		
		-- Multibyte char.
		if b > 0x7f then
			-- If followed by "*", "+" or "-", then 2-byte chars can be converted into charsets. However, this is not possible with 3 or 4-byte chars, as the charset would be too permissive, because if the trailing bytes were in a different order then this could be a different valid character.
			if next_char == "*" or next_char == "+" or next_char == "-" then
				local prev_pos = pattern:byte(pos - 1)
				if prev_pos > 0xc1 and prev_pos < 0xe0 then
					new_pattern = "
					insert(new_pattern, char_ .. "]")
				else
					return pattern, false
				end
			-- If in a charset or used in "%b", then the pattern is complex.
			-- If followed by "?", add "?" after each byte.
			elseif next_char == "?" then
				insert(new_pattern, char_ .. "?")
				local check_pos, check_b, i = pos, pattern:byte(pos), #new_pattern
				while check_b and check_b < 0xc0 do
					check_pos = check_pos - 1
					check_b = pattern:byte(check_pos)
					i = i - 1
					new_pattern = new_pattern .. "?"
				end
				pos = pos + 1
				next_char = pattern:sub(pos + 1, pos + 1)
			elseif set or balanced > 0 then
				return pattern, false
			else
				insert(new_pattern, char_)
			end
		elseif char_ == "." then
			-- "*", "+", "-" are always okay after ".", as they don't care how many bytes a char has.
			if set or next_char == "*" or next_char == "+" or next_char == "-" or escape > 0 then
				insert(new_pattern, char_)
			-- If followed by "?", make sure "?" is after the leading byte of the UTF-8 char pattern, then skip forward one.
			elseif next_char == "?" then
				insert(new_pattern, "?*")
				pos = pos + 1
				next_char = pattern:sub(pos + 1, pos + 1)
			-- If used with "%b", pattern is complex.
			elseif balanced > 0 then
				return pattern, false
			-- Otherwise, add the UTF-8 char pattern.
			else
				insert(new_pattern, "*")
			end
		-- Negative charsets are always complex, unless the text has no UTF-8 chars.
		elseif char_ == "") then
			return pattern, false
		-- "()" matches the position unless escaped or used with "%b", so always necessitates ustring (as we need it to match the char position, not the byte one).
		elseif char_ == "(" and next_char == ")" and balanced == 0 and escape == 0 and text:match("") then
			return pattern, false
		else
			insert(new_pattern, char_)
		end
	end
	if captures > 32 then
		return pattern, false
	else
		pattern = concat(new_pattern)
		return pattern, true
	end
end

--==]
function export.len(text)
	local len_bytes = text:len()
	if not text:match("") then
		return len_bytes
	else
		return iterate_utf8(text, len_bytes, "byte")
	end
end

--[[Longitud de un string en CARACTERES. Más rápido que ulen.
Los btes inválidos en UTF8 (192, 193, and 245-255) se reemplazan con U+FFFD--]]
function export.ulen(text)
	return type(text) == "number" and len(text) or #text - #gsub(text, "+", "")
end

--==]
function export.sub(text, i_char, j_char)
	if not text:match("") then
		return text:sub(i_char, j_char)
	end
	local i_byte, j_byte
	if j_char then
		if i_char > 0 and j_char > 0 then
			if j_char < i_char then return "" end
			i_byte = iterate_utf8(text, i_char, "char")
			j_byte = iterate_utf8(text, j_char + 1, "char", i_char, i_byte) - 1
		elseif i_char < 0 and j_char < 0 then
			if j_char < i_char then return "" end
			j_byte = iterate_utf8(text, j_char + 1, "char") - 1
			i_byte = iterate_utf8(text, i_char, "char", j_char, j_byte)
		-- For some reason, usub with i=0, j=0 returns the same result as for i=1, j=1, while string.sub always returns "". However, usub does return "" with i=1, j=0. As such, we need to adjust j_char to 1 if i_char is either 0, or negative with a magnitude greater than the length of the string.
		elseif j_char == 0 then
			i_byte = iterate_utf8(text, i_char, "char")
			if i_byte == 0 or -i_byte > text:len() then j_char = 1 end
			j_byte = iterate_utf8(text, j_char + 1, "char") - 1
		else
			i_byte = iterate_utf8(text, i_char, "char")
			j_byte = iterate_utf8(text, j_char + 1, "char") - 1
		end
	else
		i_byte = iterate_utf8(text, i_char, "char")
	end
	return text:sub(i_byte, j_byte)
end

do
	local function get_codepoint(b1, b2, b3, b4)
		if b1 < 128 then
			return b1, 1
		elseif b1 < 224 then
			return 0x40 * b1 + b2 - 0x3080, 2
		elseif b1 < 240 then
			return 0x1000 * b1 + 0x40 * b2 + b3 - 0xE2080, 3
		end
		return 0x40000 * b1 + 0x1000 * b2 + 0x40 * b3 + b4 - 0x3C82080, 4
	end

	function export.codepoint(str, i, j)
		if type(str) == "number" then
			return byte(str, i, j)
		end
		i, j = i or 1, j == -1 and #str or i or 1
		if i == 1 and j == 1 then
			return (get_codepoint(byte(str, 1, 4)))
		elseif i < 0 or j < 0 then
			return ucodepoint(str, i, j) -- FIXME
		end
		local n, nb, ret, nr = 0, 1, {}, 0
		while n < j do
			n = n + 1
			if n < i then
				local b = byte(str, nb)
				nb = nb + (b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or 4)
			else
				local b1, b2, b3, b4 = byte(str, nb, nb + 3)
				if not b1 then
					break
				end
				nr = nr + 1
				local add
				ret, add = get_codepoint(b1, b2, b3, b4)
				nb = nb + add
			end
		end
		return unpack(ret)
	end
	codepoint = export.codepoint
	
	function export.gcodepoint(str, i, j)
		i, j = i or 1, j ~= -1 and j or nil
		if i < 0 or j and j < 0 then
			return ugcodepoint(str, i, j) -- FIXME
		end
		local n, nb = 1, 1
		while n < i do
			local b = byte(str, nb)
			if not b then
				break
			end
			nb = nb + (b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or 4)
			n = n + 1
		end
		
		return function()
			if j and n > j then
				return nil
			end
			n = n + 1
			local b1, b2, b3, b4 = byte(str, nb, nb + 3)
			if not b1 then
				return nil
			end
			local ret, add = get_codepoint(b1, b2, b3, b4)
			nb = nb + add
			return ret
		end
	end
end

--==]
function export.lower(text)
	if not text:match("") then
		return text:lower()
	else
		return mw.ustring.lower(text)
	end
end

--==]
function export.upper(text)
	if not text:match("") then
		return text:upper()
	else
		return mw.ustring.upper(text)
	end
end

--[==[Devuelve la posición (en caracteres, no en bytes) de la primera ocurrencia del patrón dentro del texto. Usa string.find si es posible, sino mw.ustring.find.
init_char: desde dónde empezar la búsqueda
plain: si es texto plano o expresión regular (de Lua)]==]
function export.find(text, pattern, init_char, plain)
	if type(text) ~= "string" then
		return nil
	end
	local simple
	pattern, simple = patternSimplifier(text, pattern, plain)
	-- If the pattern is simple but multibyte characters are present, then init_char needs to be converted into bytes for string.find to work properly, and the return values need to be converted back into chars.
	if simple then
		if not text:match("") then
			return text:find(pattern, init_char, plain)
		else
			local init_byte = init_char and iterate_utf8(text, init_char, "char")
			local byte1, byte2, c1, c2, c3, c4, c5, c6, c7, c8, c9 = text:find(pattern, init_byte, plain)
			
			-- If string.find returned nil, then return nil.
			if not (byte1 and byte2) then
				return nil
			end
			
			-- Get first return value. If we have a positive init_char, we can save resources by resuming at that point.
			local char1, char2
			if (not init_char) or init_char > 0 then
				char1 = iterate_utf8(text, byte1, "byte", init_byte, init_char)
			else
				char1 = iterate_utf8(text, byte1, "byte")
			end
			
			-- If byte1 and byte2 are the same, don't bother running iterate_utf8 twice. Otherwise, resume iterate_utf8 from byte1 to find char2.
			if byte1 == byte2 then
				char2 = char1
			else
				char2 = iterate_utf8(text, byte2, "byte", byte1, char1)
			end
			
			return unpack{char1, char2, c1, c2, c3, c4, c5, c6, c7, c8, c9}
		end
	else
		return mw.ustring.find(text, pattern, init_char, plain)
	end
end

--[==[Devuelve el patrón encontrado (como texto plano) dentro de un texto. Usa string.match si es posible, sino mw.ustring.match.
init: desde dónde comenzar la búsqueda]==]
function export.match(text, pattern, init)
	if type(text) ~= "string" then
		return nil
	end
	local simple
	pattern, simple = patternSimplifier(text, pattern)
	if simple then
		if init and text:find("") then
			init = iterate_utf8(text, init, "char")
		end
		return text:match(pattern, init)
	else
		return mw.ustring.match(text, pattern, init)
	end
end

--==]
function export.gmatch(text, pattern)
	local simple
	pattern, simple = patternSimplifier(text, pattern)
	if simple then
		return text:gmatch(pattern)
	else
		return mw.ustring.gmatch(text, pattern)
	end
end

--==]
function export.gsub(text, pattern, repl, n)
	local simple
	pattern, simple = patternSimplifier(text, pattern)
	if simple then
		return text:gsub(pattern, repl, n)
	else
		return ugsub(text, pattern, repl, n)
	end
end

--==]
function export.gsubb(text, pattern, repl, n)
	local simple
	pattern, simple = patternSimplifier(text, pattern)
    local a, b = nil, nil
	if simple then
		a, b = text:gsub(pattern, repl, n)
	else
		a, b = ugsub(text, pattern, repl, n)
	end
	return a, b > 0
end

--==]
function export.gsub_rep(text, pattern, repl)
	while true do
		local new = export.gsub(text, pattern, repl)
		if new == text then
			return new
		end
		text = new
	end
end

--[==[Elimina espacios en blanco de más (al comienzo, al final y espacios dobles).
Si se especifica, en vez de eliminar espacios elimina el patrón especificado]==]
function export.strip(str, pattern)
	if type(pattern) ~= "string" then
		str = export.gsub(str, "%s+", " ")
		str = export.gsub(str, "^ ", "")
		str = export.gsub(str, " $", "")
	else
		str = export.gsub(str, "("..pattern..")+", "%1")
		str = export.gsub(str, "^"..pattern, "")
		str = export.gsub(str, pattern.."$", "")
	end
	
	return str
end

--[==[
-- Reimplementation of mw.ustring.split() that includes any capturing
-- groups in the splitting pattern. This works like Python's re.split()
-- function, except that it has Lua's behavior when the split pattern
-- is empty (i.e. advancing by one character at a time; Python returns the
-- whole remainder of the string).
]==]
function export.split(str, pattern)
	local ret = {}
	-- (.-) corresponds to (.*?) in Python or Perl; () captures the
	-- current position after matching.
	pattern = "(.-)" .. pattern .. "()"
	local start = 1
	while true do
		-- Did we reach the end of the string?
		if start > #str then
			insert(ret, "")
			return ret
		end
		-- match() returns all captures as multiple return values;
		-- we need to insert into a table to get them all.
		local captures = {export.match(str, pattern, start)}
		-- If no match, add the remainder of the string.
		if #captures == 0 then
			insert(ret, export.sub(str, start))
			return ret
		end
		local newstart = table.remove(captures)
		-- Special case: If we don't advance by any characters, then advance
		-- by one character; this avoids an infinite loop, and makes splitting
		-- by an empty string work the way mw.ustring.split() does. If we
		-- reach the end of the string this way, return immediately, so we
		-- don't get a final empty string.
		if newstart == start then
			insert(ret, export.sub(str, start, start))
			table.remove(captures, 1)
			start = start + 1
			if start > #str then
				return ret
			end
		else
			insert(ret, table.remove(captures, 1))
			start = newstart
		end
		-- Insert any captures from the splitting pattern.
		for _, x in ipairs(captures) do
			insert(ret, x)
		end
	end
end

local function uclcfirst(text, dolower)
	local function douclcfirst(text)
		-- Actual function to re-case of the first letter.
		local first_letter = export.sub(text, 1, 1)
		first_letter = dolower and export.lower(first_letter) or export.upper(first_letter)
		return first_letter .. export.sub(text, 2)
	end
	-- If there's a link at the beginning, re-case the first letter of the
	-- link text. This pattern matches both piped and unpiped links.
	-- If the link is not piped, the second capture (linktext) will be empty.
	local link, linktext, remainder = export.match(text, "^%]+)%|?(.-)%]%](.*)$")
	if link then
		return "]" .. remainder
	end
	return douclcfirst(text)
end

--==]
function export.ucfirst(text)
	return uclcfirst(text, false)
end

--==]
function export.lcfirst(text)
	return uclcfirst(text, true)
end

do
	local entities

	local function decode_numeric_entity(code, pattern, base)
		local cp = match(code, pattern) and tonumber(code, base)
		return cp and cp < 0x110000 and export.char(cp) or nil
	end

	local function decode_entity(hash, x, code)
		if hash == "#" then
			return x == "" and decode_numeric_entity(code, "^%d+$") or
				decode_numeric_entity(code, "^%x+$", 16)
		end
		entities = entities or load_data("Módulo:datos/entidades")
		return entities
	end

	-- Para decodificar las secuencias HTML
	function export.decode_html(str)
		return find(str, "&", 1, true) and
			gsub(str, "&(#?)(?)(+);", decode_entity) or str
	end
end

--Para escapar los caracteres "riesgosos" a entidad HTML, minimizando el riesgo de que sean malinterpretados.
function export.encode_html(text)
	-- Spacing characters in isolation generally need to be escaped in order to be properly processed by the MediaWiki software.
	if not mw.ustring.match(text, "%S") then
		return mw.text.encode(text, "%s")
	else
		return mw.text.encode(text, "!#%%&*+/:;<=>?@_{|}")
	end
end

--==]
function export.count(text, pattern, plain)
	if not (type(text) == "string" or type(text) == "number") then
		error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.')
	end
	
	if not (type(pattern) == "string" or type(pattern) == "number") then
		error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.')
	end
	
	if plain then
		pattern = pattern_escape(pattern)
	end
	
	local _, count = gsub(text, pattern, "")
	
	return count
end

--==]
function export.matchToArray(text, pattern)
	local matches = {}
	local i = 0
	for match in mw.ustring.gmatch(text, pattern) do
		i = i + 1
		matches = match
	end

    return matches
end

--[=[Iterador, similar a gmatch pero también devuelve el número de coincidencia junto con
la coincidencia, como en ipairs().
	
	Invoke thus:
	
		for i, whole_match in require("Module:string").imatch(text, pattern) do
			
		end
	
	or
	
		for i, capture1]] in require("Module:string").imatch(text, pattern) do
			
		end
	
	For example, this code
		for i, whole_match in require("Module:string").imatch("a b c", "") do
			mw.log(i, whole_match)
		end
	will log
		1	a
		2	b
		3	c
]=]
function export.imatch(text, pattern, pos, plain, use_basic_Lua_function)
	local i = 0
	pos = pos or 0
	if not string.find(pattern, "%b()") then
		pattern = "(" .. pattern .. ")"
	end
	local find = use_basic_Lua_function and string.find or mw.ustring.find
	return function()
		i = i + 1
		local return_values = { find(text, pattern, pos, plain) }
		local j = return_values
		
		if return_values then
			pos = j + 1
			-- Skip the first two returned values, which are the indices of the
			-- whole match.
			return i, unpack(return_values, 3)
		end
	end
end

--==]
function export.UTF8iter(s)
	return export.imatch(s, UTF8_char)
end

--==]
function export.reverse(s)
	s = s:gsub(UTF8_char, function (c) return #c > 1 and c:reverse() end)
	return s:reverse()
end


--==]
function export.escapebytes(s)
	return (gsub(s,
		'.',
		function(char)
			return ('\\%03d'):format(byte(char))
		end))
end


---FUNCIONES VIEJAS
function export.plantilla_sub(frame)
	local s = frame.args
	local i1 = tonumber(frame.args)
	local i2 = tonumber(frame.args)
	if not s or not i1 then
		return nil
	end
	return export.sub(s, i1, i2)
end

function export.left(frame)
  local idx = tonumber(frame.args)
  if (not idx) or idx < 1 then
    return ""
  end
  return usub(frame.args,1,idx)
end

function export.right(frame)
  local laenge = tonumber(frame.args)
  if (not laenge) or laenge < 1 then
    return ""
  else
  laenge = - laenge
  end
  return usub(frame.args,laenge,-1)
end

function export.subrev(frame)
  local zlang = export.ulen(frame.args)
  if (not zlang) then
    return ""
  end
  local von = tonumber(frame.args)
  if (not von) or von < 1 then
    von = 1
  end
  if von > zlang then
    return ""
  else
    von = zlang - von + 1
  end
  local laenge = tonumber(frame.args)
  if (not laenge) or laenge < 1 then
    laenge = 1
  end
  local bis = von + laenge - 1
  if (bis > zlang) then
    return ""
  end
  return usub(frame.args,von,bis)
end

function export.crop(frame)
  local s = frame.args
  local cut = tonumber(frame.args)
  local laenge =  export.ulen(s)
  if (not cut) or (cut < 1) then
    return s
  end
  return usub(s,1,laenge - cut)
end

function export.cropleft(frame)
  local s = frame.args
  local cut = tonumber(frame.args)
  local laenge =  export.ulen(s)
  if (not cut) or (cut < 1) then
    return s
  end
  return usub(s,cut+1,-1)
end

return export
Módulo:String

Separar Módulo:String en sílabas

Listado de errores ortográficos de Módulo:String

A continuación puedes ir al link que te lleva a una lista con los errores ortográficos más generalizados, para que los tomes en consideración y sepas cómo no cometerlos.Sin más que agregar, aquí tienes el listado de errores ortográficos de Módulo:String

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot