--[[
NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary.
Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by removing these, since you are almost guaranteed to slow things down, which could seriously impact performance on pages which call this module hundreds or thousands of times.
It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly.
]]
local export = {}
local require = require
local require_when_needed = require("Module:require when needed")
local m_parser = require("Module:parser")
local m_str_utils = require("Module:string utilities")
local mw = mw
local mw_title = mw.title
local mw_uri = mw.uri
local string = string
local table = table
local Parser, Node = m_parser.new()
local anchor_encode = mw_uri.anchorEncode
local build_template -- Defined below.
local class_else_type = m_parser.class_else_type
local concat = table.concat
local decode_entities = m_str_utils.decode_entities
local encode_entities = m_str_utils.encode_entities
local encode_uri = mw_uri.encode
local find = string.find
local format = string.format
local getmetatable = getmetatable
local gsub = string.gsub
local html_create = mw.html.create
local insert = table.insert
local ipairs = ipairs
local is_internal_title = require_when_needed("Module:pages", "is_internal_title")
local is_node = m_parser.is_node
local load_data = mw.loadData
local lower = string.lower
local make_title = mw_title.makeTitle -- unconditionally adds the specified namespace prefix
local match = string.match
local new_frame -- Defined below.
local new_node = Node.new
local new_title = mw_title.new -- specified namespace prefix is only added if the input doesn't contain one
local next = next
local pairs = pairs
local parse -- Defined below.
local pattern_escape = m_str_utils.pattern_escape
local pcall = pcall
local php_trim = m_str_utils.php_trim
local rep = string.rep
local replacement_escape = m_str_utils.replacement_escape
local reverse = string.reverse
local scribunto_param_key = m_str_utils.scribunto_param_key
local select = select
local setmetatable = setmetatable
local sorted_pairs = require_when_needed("Module:table", "sortedPairs")
local split = m_str_utils.split
local sub = string.sub
local table_len = require_when_needed("Module:table", "length")
local title_equals = mw_title.equals
local toNFC = mw.ustring.toNFC
local tonumber = tonumber
local tostring = m_parser.tostring
local tostring_node = Node.__tostring
local type = type
local umatch = mw.ustring.match
local unpack = unpack
local uupper = m_str_utils.upper
local data = load_data("Module:template parser/data")
local magic_words = load_data("Module:data/magic words")
local parser_extension_tags = load_data("Module:data/parser extension tags")
local php_htmlspecialchars_data = data.php_htmlspecialchars
local valid_attribute_name = data.valid_attribute_name
local INF = math.huge
local NEG_INF = -INF
local current_frame = mw.getCurrentFrame()
local page_title = mw_title.getCurrentTitle()
local namespace_has_subpages = mw.site.namespaces.hasSubpages
local raw_pagename = page_title.fullText
local template_memo = {}
local template_name_memo = {}
local template_prefixed_name_memo = {}
local parser_variable_memo = {}
local parser_function_memo = {}
------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------
local function php_htmlspecialchars(str, compat)
return (gsub(str, compat and "" or "", php_htmlspecialchars_data))
end
-- Normalizes a string for use in comparisons which emulate PHP's equals
-- operator, which coerces certain strings to numbers: those within the range
-- -2^63 to 2^63 - 1 which don't have decimal points or exponents are coerced
-- to integers, while any others are coerced to doubles if possible; otherwise,
-- they remain as strings. PHP and Lua have the same precision for doubles, but
-- Lua's integer precision range is -2^53 + 1 to 2^53 - 1. Any integers within
-- Lua's precision, as well as all doubles, are simply coerced to numbers, but
-- PHP integers outside of Lua's precision are emulated as normalized strings,
-- with leading 0s and any + sign removed. The `not_long` flag is used for the
-- second comparator if the first did not get normalized to a long integer, as
-- PHP will only coerce strings to integers if it's possible for both
-- comparators.
local function php_normalize_string(str, not_long)
local num = tonumber(str)
-- Must be a number that isn't inf, NaN or hexadecimal.
if not num or num == INF or num == NEG_INF or num ~= num or match(str, "^%s*?0()") then
return str
-- If `not_long` is set or within Lua's precision, return as a number.
elseif not_long or num < 9007199254740992 and num > -9007199254740992 then
return num, "number"
end
local sign, str_no_0 = match(str, "^%s*(?)0*(%d+)$")
-- If it doesn't match the pattern for a long integer, return as a double.
if not str_no_0 then
return num, "number"
end
-- Otherwise, check if it's a long integer. 2^63 is 9223372036854775808, so
-- slice off the last 15 digits and deal with the two parts separately. If
-- the integer value would be too high/low, return as a string.
local high = tonumber(sub(str_no_0, 1, -16))
if high > 9223 then
return str
elseif high == 9223 then
local low = tonumber(sub(str_no_0, -15))
-- Range is -2^63 to 2^63 - 1 (not symmetrical).
if low > 372036854775808 or low == 372036854775808 and sign ~= "-" then
return str
end
end
return (sign == "+" and "" or sign) .. str_no_0, "long integer", num
end
-- Equivalent to PHP's == comparison for strings.
local function php_string_equals(str1, str2)
if str1 == str2 then
return true
end
local str1_type, str1_num
str1, str1_type, str1_num = php_normalize_string(str1)
if str1 == str2 then
return true
elseif str1_type == "long integer" then
local str2_type
str2, str2_type = php_normalize_string(str2)
return str2 == (str2_type == "number" and str1_num or str1)
elseif str1_type == "number" then
return str1 == php_normalize_string(str2, true)
end
return false
end
------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------
Node.keys_to_remove = {"handler", "head", "pattern", "route", "step"}
local function expand(obj, frame)
return is_node(obj) and obj:expand(frame) or obj
end
export.expand = expand
function Node:expand(frame)
local output = {}
for i = 1, #self do
output = expand(self, frame)
end
return concat(output)
end
local Wikitext = Node:new_class("wikitext")
-- force_node ensures the output will always be a node.
function Wikitext:new(this, force_node)
if type(this) ~= "table" then
return force_node and new_node(self, {this}) or this
elseif #this == 1 then
local this1 = this
return force_node and not is_node(this1) and new_node(self, this) or this1
end
local success, str = pcall(concat, this)
if success then
return force_node and new_node(self, {str}) or str
end
return new_node(self, this)
end
local Parameter = Node:new_class("parameter")
-- First value is the parameter name.
-- Second value is the parameter's default value.
-- Any additional values are ignored: e.g. "{{{a|b|c}}}" is parameter "a" with default value "b" (*not* "b|c").
function Parameter:new(this)
local this2 = this
if class_else_type(this2) == "argument" then
insert(this2, 2, "=")
this2 = Wikitext:new(this2)
end
return new_node(self, {this, this2})
end
function Parameter:__tostring()
local output = {}
for i = 1, #self do
output = tostring(self)
end
return "{{{" .. concat(output, "|") .. "}}}"
end
function Parameter:next(i)
i = i + 1
if i <= 2 then
return self, self, i
end
end
function Parameter:get_name(frame)
return scribunto_param_key(expand(self, frame))
end
function Parameter:get_default(frame)
local default = self
if default ~= nil then
return expand(default, frame)
end
return "{{{" .. expand(self, frame) .. "}}}"
end
function Parameter:expand(frame)
local name, result = expand(self, frame)
if frame then
local args = frame.args
if args then
result = args
end
end
-- Parameter in use.
if result ~= nil then
return result
end
result = self
-- Default.
if result ~= nil then
return expand(result, frame)
end
return "{{{" .. name .. "}}}"
end
local Argument = Node:new_class("argument")
function Argument:__tostring()
return tostring(self) .. "=" .. tostring(self)
end
function Argument:expand(frame)
return expand(self, frame) .. "=" .. expand(self, frame)
end
local Template = Node:new_class("template")
function Template:__tostring()
local output = {}
for i = 1, #self do
output = tostring(self)
end
return "{{" .. concat(output, "|") .. "}}"
end
local function retrieve_magic_word_data(chunk)
local mgw_data = magic_words
if mgw_data then
return mgw_data
end
local normalized = uupper(chunk)
mgw_data = magic_words
if mgw_data and not mgw_data.case_sensitive then
return mgw_data
end
end
-- Returns the name required to transclude the title object `title` using
-- template {{ }} syntax.
local function get_template_invocation_name(title)
if not is_internal_title(title) then
error("Template invocations require a valid page title, which cannot contain an interwiki prefix.")
end
local namespace = title.namespace
-- If not in the template namespace, include the prefix (or ":" if
-- mainspace).
if namespace ~= 10 then
return namespace == 0 and ":" .. title.text or title.prefixedText
end
-- If in the template namespace and it shares a name with a magic word,
-- it needs the prefix "Template:".
local text = title.text
local colon = find(text, ":", 1, true)
if not colon then
local mgw_data = retrieve_magic_word_data(text)
return mgw_data and mgw_data.parser_variable and title.prefixedText or text
end
local mgw_data = retrieve_magic_word_data(sub(text, 1, colon - 1))
if mgw_data and (mgw_data.parser_function or mgw_data.transclusion_modifier) then
return title.prefixedText
end
-- Also if "Template:" is necessary for disambiguation (e.g.
-- "Template:Category:Foo" can't be abbreviated to "Category:Foo").
local check = new_title(text, 10)
return check and title_equals(title, check) and text or title.prefixedText
end
export.getTemplateInvocationName = get_template_invocation_name
-- Resolve any redirects. If the redirect target is an interwiki link, then
-- the template won't fail, but the redirect page itself gets transcluded
-- (i.e. the template name shouldn't be normalized to the target).
-- title.redirectTarget increments the expensive parser function count, but
-- avoids extraneous transclusions polluting template lists and the
-- performance hit caused by indiscriminately grabbing redirectTarget.
-- However, if the expensive parser function limit has already been hit,
-- redirectTarget is used as a fallback.
local function resolve_redirect(title, force_transclusion)
if not force_transclusion then
local success, is_redirect = pcall(getmetatable(title).__index, title, "isRedirect")
if success and not is_redirect then
return title
end
end
local redirect = title.redirectTarget
return is_internal_title(redirect) and redirect or title
end
local function parse_template_name(name, has_args, fragment, force_transclusion)
local chunks, colon, start, n, p = {}, find(name, ":", 1, true), 1, 0, 0
while colon do
-- Pattern applies PHP ltrim.
local mgw_data = retrieve_magic_word_data(match(sub(name, start, colon - 1), ".*") or "")
if not mgw_data then
break
end
local priority = mgw_data.priority
if not (priority and priority > p) then
local pf = mgw_data.parser_function and mgw_data.name or nil
if pf then
n = n + 1
chunks = pf .. ":"
return chunks, "parser function", sub(name, colon + 1)
end
break
end
n = n + 1
chunks = mgw_data.name .. ":"
start, p = colon + 1, priority
colon = find(name, ":", start, true)
end
if start > 1 then
name = sub(name, start)
end
name = php_trim(name)
-- Parser variables can only take SUBST:/SAFESUBST: as modifiers.
if not has_args and p <= 1 then
local mgw_data = retrieve_magic_word_data(name)
local pv = mgw_data and mgw_data.parser_variable and mgw_data.name or nil
if pv then
n = n + 1
chunks = pv
return chunks, "parser variable"
end
end
-- Handle relative template names.
if namespace_has_subpages then
-- If the name starts with "/", it's treated as a subpage of the
-- current page. Final slashes are trimmed, but this can't affect
-- the intervening slash (e.g. {{///}} refers to "{{PAGENAME}}/").
local initial = sub(name, 1, 1)
if initial == "/" then
name = raw_pagename .. (match(name, "^/.*") or "/")
-- If it starts with "../", trim it and any that follow, and go up
-- that many subpage levels. Then, treat any additional text as
-- a subpage of that page; final slashes are trimmed.
elseif initial == "." and sub(name, 2, 3) == "./" then
local n = 4
while sub(name, n, n + 2) == "../" do
n = n + 3
end
-- Retain an initial "/".
name = sub(name, n - 1)
-- Trim the relevant number of subpages from the pagename.
local pagename, i = reverse(raw_pagename), 0
for _ = 1, (n - 1) / 3 do
i = find(pagename, "/", i + 1, true)
-- Fail if there aren't enough slashes.
if not i then
return nil
end
end
-- Add the subpage text; since the intervening "/" is retained
-- in `name`, it can be trimmed along with any other final
-- slashes (e.g. {{..///}} refers to "{{BASEPAGENAME}}".)
name = reverse(sub(pagename, i + 1)) .. (match(name, "^.*") or "")
end
end
local title = new_title(name, 10)
if not is_internal_title(title) then
return nil
end
-- If `fragment` is set, save the original title's fragment, since it
-- won't carry through to any redirect targets.
if fragment then
fragment = title.fragment
end
title = resolve_redirect(title, force_transclusion)
local chunk = get_template_invocation_name(title)
-- Set the fragment (if applicable).
if fragment then
chunk = chunk .. "#" .. fragment
end
chunks = chunk
return chunks, "template"
end
-- Normalize the template name, check it's a valid template, then memoize the
-- results, using false for invalid titles. Parser functions need to have the
-- first argument extracted from the title, as it comes after the colon. Note:
-- force_transclusion avoids incrementing the expensive parser function count by
-- forcing transclusion instead. This should only be used when there is a real
-- risk that the expensive parser function limit of 500 will be hit.
local function process_name(self, frame, force_transclusion)
local name = expand(self, frame)
local has_args, norm = #self > 1
if not has_args then
norm = parser_variable_memo
if norm then
return norm, "parser variable"
end
end
norm = template_name_memo
if norm then
local pf_arg1 = parser_function_memo
return norm, pf_arg1 and "parser function" or "template", pf_arg1
elseif norm == false then
return nil
end
local chunks, subclass, pf_arg1 = parse_template_name(name, has_args, nil, force_transclusion)
-- Fail if invalid.
if not chunks then
template_name_memo = false
return nil
end
local chunk1 = chunks
-- Fail on SUBST:.
if chunk1 == "SUBST:" then
template_name_memo = false
return nil
-- Any modifiers are ignored.
elseif subclass == "parser function" then
local pf = chunks
template_name_memo = pf
parser_function_memo = pf_arg1
return pf, "parser function", pf_arg1
end
-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after).
if chunks == "MSGNW:" then
pf_arg1 = chunks
local pf = "MSGNW:"
template_name_memo = pf
parser_function_memo = pf_arg1
return pf, "parser function", pf_arg1
end
-- Ignore any remaining modifiers, as they've done their job.
local output = chunks
if subclass == "parser variable" then
parser_variable_memo = output
else
template_name_memo = output
end
return output, subclass
end
function Template:get_name(frame, force_transclusion)
-- Only return the first return value.
return (process_name(self, frame, force_transclusion))
end
local function template_argument_table(self, start, frame)
local template_args, implicit = {}, 0
for i = start, #self do
local arg = self
if class_else_type(arg) == "argument" then
template_args, frame))] = php_trim(expand(arg, frame))
else
implicit = implicit + 1
template_args = expand(arg, frame) -- Not trimmed.
end
end
return template_args
end
function Template:get_arguments(frame)
local name, subclass, pf_arg1 = process_name(self, frame)
if name == nil then
return nil
elseif subclass == "parser variable" then
return {}
elseif subclass == "template" then
return template_argument_table(self, 2, frame)
end
local pf_args = {pf_arg1}
for i = 2, #self do
pf_args = expand(self, frame) -- Not trimmed.
end
return pf_args
end
local parser_functions = {}
parser_functions = function(self, arg1, frame)
local result = self
return result and php_trim(expand(result, frame)) or ""
end
parser_functions = function(self, arg1, frame)
local arg2 = self
local result = self[php_string_equals(
php_trim(decode_entities(expand(arg1, frame))),
arg2 and php_trim(decode_entities(expand(arg2, frame))) or ""
) and 3 or 4]
return result and php_trim(expand(result, frame)) or ""
end
-- "/<(?:strong|span|p|div)\s(?:*\s+)*?class=\"(?:*\s+)*?error(?:\s*)?\"/"
parser_functions = function(self, arg1, frame)
local arg1, tag, head = php_trim(expand(arg1, frame)), 1
while true do
tag, head = match(arg1, "<(???n?g?)%s()", head)
if not tag then -- No error.
local result = self
-- Defaults to arg1 if arg3 is not supplied, which has already
-- been expanded and trimmed.
return result and php_trim(expand(result, frame)) or arg1
elseif ( -- Error.
(tag == "strong" or tag == "span" or tag == "p" or tag == "div") and
match(arg1, "^-%fclass=\"-%ferror%f-\"", head)
) then
local result = self
return result and php_trim(expand(result, frame)) or ""
end
end
end
-- Special handling of Special: and Media: titles is already dealt with by
-- Scribunto.
local function do_ifexist(title)
-- title.isExternal avoids incrementing the expensive function count.
if not title or title.isExternal then
return false
end
return title.exists
end
parser_functions = function(self, arg1, frame)
local success, result = pcall(do_ifexist, new_title(php_trim(expand(arg1, frame))))
result = self
return result and php_trim(expand(result, frame)) or ""
end
parser_functions = function(self, arg1, frame)
-- TODO: Lua-based implementation and catch error message.
local result = self
return result and php_trim(expand(result, frame)) or ""
end
local function do_invoke(mod, func, frame)
-- FIXME: why does require(mod) cause a protected metatable error?
local result, i = {require(mod)(frame)}, 0
-- Concatenate the return values, matching mw.executeFunction.
while true do
i = i + 1
local v = result
if v == nil then
return i == 2 and result or concat(result)
end
result = tostring(v)
end
end
parser_functions = function(self, arg1, frame)
-- Get the module, function, arguments and child frame, then use pcall
-- to call the function. Only the parts specific to the invoke are
-- protected, to avoid catching parser-internal errors.
local mod, func, child_frame = "Module:" .. php_trim(expand(arg1, frame)), self
if func then
func = php_trim(expand(func, frame))
end
local args = template_argument_table(self, 3, frame)
if frame then
local get_title = frame.getTitle
child_frame = new_frame(mod, args, get_title and get_title(frame) or frame.title or current_frame:getTitle(), frame.args or {})
else
child_frame = new_frame(mod, args, current_frame:getTitle(), {})
end
-- Ensure mw.getCurrentFrame returns child_frame for the duration of the
-- invoke.
local mw_get_current_frame = mw.getCurrentFrame
mw.getCurrentFrame = function()
return child_frame
end
local success, result = pcall(do_invoke, mod, func, child_frame)
mw.getCurrentFrame = mw_get_current_frame
-- If the module threw an error, fallback to real preprocessing so that
-- the error can be caught and assigned an ID by Scribunto, which
-- enables traceback, categorization etc. Scribunto uses
-- Validator::cleanUp( strval( $result ) ) to ensure that all outputs
-- are valid UTF-8, and outputs in normalization form C; non-UTF-8
-- strings are repaired with replacement characters (U+FFFD) as
-- necessary, which is non-trivial. toNFC can check validity and outputs
-- the correct form, but returns nil if the input (i.e. the module
-- output) is non-UTF-8. Since this should be rare, it's fine to use the
-- fallback for such strings as well (for now, but ideally this should
-- be avoided). TODO.
-- FIXME: preprocess needs to be by a real frame object.
return success and toNFC(result) or current_frame:preprocess(self:__tostring())
end
parser_functions = function(self, arg1, frame)
arg1 = php_trim(decode_entities(expand(arg1, frame)))
local found, default, default_found, last_arg, last_arg_had_no_equals
for i = 2, #self do
local arg = self
if class_else_type(arg) == "argument" then
if found then
return php_trim(expand(arg, frame))
end
local test = php_trim(decode_entities(expand(arg, frame)))
if php_string_equals(arg1, test) then
return php_trim(expand(arg, frame))
end
last_arg_had_no_equals = false
if default_found or match(test, "^#()$") then
default = arg
default_found = false
end
else
last_arg_had_no_equals = true
arg = expand(arg, frame)
last_arg = arg
arg = php_trim(decode_entities(arg))
if php_string_equals(arg1, arg) then
found = true
elseif match(arg, "^#()$") then
default_found = true
end
end
end
if last_arg_had_no_equals then
return php_trim(last_arg)
elseif default == nil then
return ""
end
return php_trim(expand(default, frame))
end
parser_functions = function(self, arg1, frame)
local name = lower(php_trim(expand(arg1, frame)))
local content = self
if not content then
local tag = "<" .. name .. "/>"
return parser_extension_tags and current_frame:preprocess(tag) or tag
end
content = expand(content, frame)
local attributes, n = {}, 0
for i = 3, #self do
local arg = self
if class_else_type(arg) == "argument" then
n = n + 1
local v = php_trim(expand(arg, frame))
attributes = " " .. php_htmlspecialchars(php_trim(expand(arg, frame))) .. "=\"" .. (
(v == "\"\"" or v == "''") and v or
php_htmlspecialchars(match(v, "^(.+)$") or v, true)
) .. "\""
end
end
local tag = "<" .. name .. concat(attributes) .. ">" .. content .. "</" .. name .. ">"
return parser_extension_tags and current_frame:preprocess(tag) or tag
end
-- parser_functions = function(self, arg1, frame)
-- TODO
--end
-- parser_functions = function(self, arg1, frame)
-- TODO
-- end
-- parser_functions = function(self, arg1, frame)
-- TODO
-- end
-- parser_functions = function(self, arg1, frame)
-- TODO
-- end
local parser_variables = {}
parser_variables = "|"
parser_variables = "="
-- FIXME: calculate on demand.
parser_variables = page_title.text
-- TODO: isIncludable and loop checks.
local function parse_then_expand(name, args)
local template, prefixed_name = template_memo
if template == nil then
local title = new_title(name, 10)
prefixed_name = title.prefixedText
local content = title:getContent()
template = content and parse(content, true) or Wikitext:new("]")
template_memo = template
template_prefixed_name_memo = prefixed_name
else
prefixed_name = template_prefixed_name_memo
end
return template:expand{title = prefixed_name, args = args}
end
-- BIG TODO: manual template expansion.
function Template:expand(frame)
local name, subclass, pf_arg1 = process_name(self, frame)
if name == nil then
local output = {}
for i = 1, #self do
output = expand(self, frame)
end
return "{{" .. concat(output, "|") .. "}}"
elseif subclass == "parser variable" then
return parser_variables or current_frame:preprocess("{{" .. name .. "}}")
elseif subclass == "parser function" then
local parser_function = parser_functions
if parser_function then
return parser_function(self, pf_arg1, frame)
end
local output = {name .. pf_arg1}
for i = 2, #self do
output = expand(self, frame)
end
return current_frame:preprocess("{{" .. concat(output, "|") .. "}}")
end
return parse_then_expand(name, self and self:get_arguments(frame) or nil)
end
local Tag = Node:new_class("tag")
function Tag:__tostring()
-- FIXME: ensure the same order as given in self.attributes, as `next` is
-- unpredictable.
local open_tag, attributes, n = {"<", self.name}, self:get_attributes(), 2
for k, v in next, attributes do
n = n + 1
open_tag = " " .. php_htmlspecialchars(k) .. "=\"" .. php_htmlspecialchars(v, true) .. "\""
end
if self.self_closing then
return concat(open_tag) .. "/>"
end
return concat(open_tag) .. ">" .. concat(self) .. "</" .. self.name .. ">"
end
function Tag:get_attributes()
local raw = self.attributes
if not raw then
self.attributes = {}
return self.attributes
elseif type(raw) == "table" then
return raw
end
if sub(raw, -1) == "/" then
raw = sub(raw, 1, -2)
end
local attributes, head = {}, 1
-- Semi-manual implementation of the native regex.
while true do
local name, loc = match(raw, "(*)()", head)
if not name then
break
end
head = loc
local v
loc = match(raw, "^*=*()", head)
if loc then
head = loc
-- Either "", '' or the value ends on a space/at the end. Missing
-- end quotes are repaired by closing the value at the end.
v, loc = match(raw, "^\"(*)\"?()", head)
if not v then
v, loc = match(raw, "^'(*)'?()", head)
if not v then
v, loc = match(raw, "^(*)()", head)
end
end
head = loc
end
-- valid_attribute_name is a pattern matching a valid attribute name.
-- Defined in the data due to its length - see there for more info.
if umatch(name, valid_attribute_name) then
-- Sanitizer applies PHP strtolower (ASCII-only).
attributes = v and decode_entities(
php_trim((gsub(v, "+", " ")))
) or ""
end
end
self.attributes = attributes
return attributes
end
function Tag:expand(frame)
return current_frame:preprocess(self:__tostring())
end
local Heading = Node:new_class("heading")
function Heading:new(this)
if #this > 1 then
local success, str = pcall(concat, this)
if success then
return new_node(self, {
str,
level = this.level,
section = this.section,
index = this.index
})
end
end
return new_node(self, this)
end
function Heading:__tostring()
local eq = rep("=", self.level)
return eq .. tostring_node(self) .. eq
end
do
local expand_node = Node.expand
-- Expanded heading names can contain "\n" (e.g. inside nowiki tags), which
-- causes any heading containing them to fail. However, in such cases, the
-- native parser still treats it as a heading for the purpose of section
-- numbers.
local function validate_name(self, frame)
local name = expand_node(self, frame)
if find(name, "\n", 1, true) then
return nil
end
return name
end
function Heading:get_name(frame)
local name = validate_name(self, frame)
return name ~= nil and php_trim(name) or nil
end
-- FIXME: account for anchor disambiguation.
function Heading:get_anchor(frame)
local name = validate_name(self, frame)
return name ~= nil and decode_entities(anchor_encode(name)) or nil
end
function Heading:expand(frame)
local eq = rep("=", self.level)
return eq .. expand_node(self, frame) .. eq
end
end
------------------------------------------------------------------------------------
--
-- Frame
--
------------------------------------------------------------------------------------
do
local function new_callback_parser_value(callback)
local value, cache = {}
function value:expand()
if not cache then
cache = callback()
end
return cache
end
return value
end
local function get_argument(self, opt)
local name = type(opt) == "table" and opt.name or opt
return new_callback_parser_value(function()
return self.args
end)
end
local function new_parser_value(self, opt)
local text = type(opt) == "table" and opt.text or opt
return new_callback_parser_value(function()
return self:preprocess(text)
end)
end
local function new_template_parser_value(self, opt)
if type(opt) ~= "table" then
error("frame:newTemplateParserValue: the first parameter must be a table")
elseif opt.title == nil then
error("frame:newTemplateParserValue: a title is required")
end
return new_callback_parser_value(function()
return self:expandTemplate(opt)
end)
end
local function scribunto_param_value(name, k, v, named)
local type_v = type(v)
if type_v == "boolean" then
return v and "1" or ""
elseif type_v == "number" then
return tostring(v)
elseif type_v == "string" then
return named and php_trim(v) or v
end
error(name .. ": invalid type " .. type_v .. " for arg '" .. k .. "'", 3)
end
local function check_args(name, args)
local output_args, seen = {}, {}
for k, v in pairs(args) do
local type_k = type(k)
if type_k ~= "string" and type_k ~= "number" then
error(name .. ": arg keys must be strings or numbers, " .. type_k .. " given", 3)
end
-- When calling back into PHP, keys which can be normalized to
-- numbers without trimming are treated as implicit arguments, while
-- any others are named arguments, e.g. and are treated as
-- an implicit argument 1 ("{{foo|bar}}"), whereas is a named
-- argument 1 ("{{foo|1=bar}}"). This affects whether the value is
-- trimmed.
k = scribunto_param_key(k, true)
if seen then
error("Collision for key " .. k .. " in frame argument table")
end
seen = true
if type(k) == "number" then
-- Implicit argument.
output_args = scribunto_param_value(name, k, v)
else
-- Named argument; normalize with trimming to get the actual key.
output_args = scribunto_param_value(name, k, v, true)
end
end
return output_args
end
local function expand_template(self, opt)
if type(opt) ~= "table" then
error("frame:expandTemplate: the first parameter must be a table")
end
local title = opt.title
if title == nil then
error("frame:expandTemplate: a title is required")
elseif type(title) == "table" and title.namespace == 0 then
title = ":" .. tostring(title)
else
title = tostring(title)
end
local args = opt.args
if args == nil then
args = {}
elseif type(args) ~= "table" then
error("frame:expandTemplate: args must be a table")
else
args = check_args("frame:expandTemplate", args)
end
return parse_then_expand(title, args)
end
local function normalize_parser_function_name(name, name_type)
if not name_type then
name_type = type(name)
end
if name == nil then
error("frame:callParserFunction: a function name is required", 3)
elseif name_type == "number" then
return tostring(name)
elseif name_type == "string" then
return name
end
error("frame:callParserFunction: function name must be a string or number", 3)
end
local function call_parser_function(self, name, args, ...)
-- TODO: handle colon in name.
local name_type, parser_function, no_varargs = type(name)
if name_type == "table" then
parser_function = parser_functions
args = name.args
no_varargs = true
else
parser_function = parser_functions
end
if not parser_function then
return current_frame:callParserFunction(name, args, ...)
elseif type(args) ~= "table" then
args = no_varargs and {args} or {args, ...}
end
local args_array, seen, n = {}, {}, 0
for k, v in pairs(args) do
local type_k = type(k)
if type_k ~= "string" and type_k ~= "number" then
error("frame:callParserFunction: arg keys must be strings or numbers, " .. type_k .. " given", 2)
end
k = scribunto_param_key(k, true)
if args_array then
error("Collision for key " .. k .. " in frame argument table")
end
seen = true
n = n + 1
if type(k) == "number" then
-- Implicit argument; the key is ignored, since callParserFunction in PHP calls array_merge, which rebases all numbered keys.
args_array = scribunto_param_value("frame:callParserFunction", k, v)
else
-- Named argument.
args_array = Argument:new{k, scribunto_param_value("frame:callParserFunction", k, v)}
end
end
-- TODO: sort array.
return parser_function(args_array, args_array, self)
end
local function extension_tag(self, name, content, args)
-- TODO: construct Tag object and expand.
return current_frame:extensionTag(name, content, args)
end
local function preprocess(self, opt)
if type(opt) == "table" then
opt = opt.text
end
return parse(tostring(opt), true):expand(self.args)
end
local function argument_pairs(self)
return pairs(self.args)
end
function new_frame(title, args, parent_title, ...)
local frame = {
argumentPairs = argument_pairs,
callParserFunction = call_parser_function,
expandTemplate = expand_template,
extensionTag = extension_tag,
getArgument = get_argument,
newParserValue = new_parser_value,
newTemplateParserValue = new_template_parser_value,
preprocess = preprocess,
}
local parents, args_mt = {parent_title, ...}, {}
function args_mt:__index(k)
local arg = args
if arg == nil then
k = (type(k) == "string" and tonumber or tostring)(k)
if k ~= nil then
arg = args
end
end
return arg
end
function args_mt.__pairs()
return pairs(args)
end
function args_mt.__ipairs()
return ipairs(args)
end
frame.args = setmetatable({}, args_mt)
function frame:getParent()
if parent_title then
return new_frame(unpack(parents))
end
return nil
end
function frame:getTitle()
return title
end
function frame:newChild(opt)
if type(opt) ~= "table" then
error("frame:newChild: the first parameter must be a table", 2)
end
local title, self_title = opt.title, self:getTitle()
if title == nil then
title = self_title
else
title = tostring(title)
end
local args = opt.args
if args == nil then
args = {}
elseif type(args) ~= "table" then
error("frame:newChild: args must be a table", 2)
else
args = check_args("frame:newChild", args)
end
return new_frame(title, args, unpack(parents))
end
return frame
end
end
------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------
function Parser:read(i, j)
local head, i = self.head, i or 0
return sub(self.text, head + i, head + (j or i))
end
function Parser:advance(n)
self.head = self.head + (n or self.step or 1)
end
function Parser:jump(head)
self.head = head
self.nxt = nil
end
function Parser:set_pattern(pattern)
local layer = self
layer.pattern = pattern
layer.nxt = nil
end
function Parser:consume()
local layer = self
local this = layer.nxt
if this then
layer.nxt = nil
else
local text, head = self.text, self.head
local loc1, loc2 = find(text, layer.pattern, head)
if loc1 == head or not loc1 then
this = sub(text, head, loc2)
else
this = sub(text, head, loc1 - 1)
layer.nxt = sub(text, loc1, loc2)
end
end
layer.step = #this
return layer.handler(self, this)
end
-- Template or parameter.
-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Parameters {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces.
-- For example, "{{{{foo}}}}" (4) is parsed as a parameter enclosed by single braces, and "{{{{{foo}}}}}" (5) is a parameter inside a template. However, "{{{{{foo }} }}}" is a template inside a parameter, due to "}}" forcing the closure of the inner node.
do
-- Handlers.
local handle_name
local handle_argument
local function do_template_or_parameter(self, inner_node)
self:push_sublayer(handle_name)
self:set_pattern("")
-- If a node has already been parsed, nest it at the start of the new
-- outer node (e.g. when parsing"{{{{foo}}bar}}", the template "{{foo}}"
-- is parsed first, since it's the innermost, and becomes the first
-- node of the outer template.
if inner_node then
self:emit(inner_node)
end
end
function handle_name(self, ...)
handle_name = self:switch(handle_name, {
= Parser.heading_block,
= Parser.tag,
= Parser.wikilink_block,
= Parser.braces,
= function(self)
self:emit(Wikitext:new(self:pop_sublayer()))
self:push_sublayer(handle_argument)
self:set_pattern("")
end,
= function(self)
if self:read(1) == "}" then
self:emit(Wikitext:new(self:pop_sublayer()))
return self:pop()
end
self:emit("}")
end,
= Parser.fail_route,
= Parser.emit
})
return handle_name(self, ...)
end
function handle_argument(self, ...)
local function emit_argument(self)
local arg = Wikitext:new(self:pop_sublayer())
local layer = self
local key = layer.key
if key then
arg = Argument:new{key, arg}
layer.key = nil
end
self:emit(arg)
end
handle_argument = self:switch(handle_argument, {
= function(self)
return self:heading_block("\n", self.key and "=" or "==")
end,
= Parser.tag,
= function(self)
local key = Wikitext:new(self:pop_sublayer())
self.key = key
self:push_sublayer(handle_argument)
self:set_pattern("")
end,
= Parser.wikilink_block,
= Parser.braces,
= function(self)
emit_argument(self)
self:push_sublayer(handle_argument)
self:set_pattern("")
end,
= function(self)
if self:read(1) == "}" then
emit_argument(self)
return self:pop()
end
self:emit("}")
end,
= Parser.fail_route,
= Parser.emit
})
return handle_argument(self, ...)
end
function Parser:template_or_parameter()
local text, head, node_to_emit, failed = self.text, self.head
-- Comments/tags interrupt the brace count.
local braces = match(text, "^{+()", head) - head
self:advance(braces)
while true do
local success, node = self:try(do_template_or_parameter, node_to_emit)
-- Fail means no "}}" or "}}}" was found, so emit any remaining
-- unmatched opening braces before any templates/parameters that
-- were found.
if not success then
self:emit(rep("{", braces))
failed = true
break
-- If there are 3+ opening and closing braces, it's a parameter.
elseif braces >= 3 and self:read(2) == "}" then
self:advance(3)
braces = braces - 3
node = Parameter:new(node)
-- Otherwise, it's a template.
else
self:advance(2)
braces = braces - 2
node = Template:new(node)
end
local index = head + braces
node.index = index
node.raw = sub(text, index, self.head - 1)
node_to_emit = node
-- Terminate once not enough braces remain for further matches.
if braces == 0 then
break
-- Emit any stray opening brace before any matched nodes.
elseif braces == 1 then
self:emit("{")
break
end
end
if node_to_emit then
self:emit(node_to_emit)
end
return braces, failed
end
end
-- Tag.
do
local end_tags = data.end_tags
-- Handlers.
local handle_start
local handle_tag
local function do_tag(self)
local layer = self
layer.handler, layer.index = handle_start, self.head
self:set_pattern("")
self:advance()
end
local function is_ignored_tag(self, this)
if self.transcluded then
return this == "includeonly"
end
return this == "noinclude" or this == "onlyinclude"
end
local function ignored_tag(self, text, head)
local loc = find(text, ">", head, true)
if not loc then
return self:fail_route()
end
self:jump(loc)
local tag = self:pop()
tag.ignored = true
return tag
end
function handle_start(self, this)
if this == "/" then
local text, head = self.text, self.head + 1
local this = match(text, "^+", head)
if this and is_ignored_tag(self, lower(this)) then
head = head + #this
if not match(text, "^/", head) then
return ignored_tag(self, text, head)
end
end
return self:fail_route()
elseif this == "" then
return self:fail_route()
end
-- Tags are only case-insensitive with ASCII characters.
local raw_name = this
this = lower(this)
local end_tag_pattern = end_tags
if not end_tag_pattern then -- Validity check.
return self:fail_route()
end
local layer = self
local text, head = self.text, self.head + layer.step
if match(text, "^/", head) then
return self:fail_route()
elseif is_ignored_tag(self, this) then
return ignored_tag(self, text, head)
-- If an onlyinclude tag is not ignored (and cannot be active since it
-- would have triggered special handling earlier), it must be plaintext.
elseif this == "onlyinclude" then
return self:fail_route()
elseif this == "noinclude" or this == "includeonly" then
layer.ignored = true -- Ignored block.
layer.raw_name = raw_name
end
layer.name, layer.handler, layer.end_tag_pattern = this, handle_tag, end_tag_pattern
self:set_pattern(">")
end
function handle_tag(self, this)
if this == "" then
return self:fail_route()
elseif this ~= ">" then
self.attributes = this
return
elseif self:read(-1) == "/" then
self.self_closing = true
return self:pop()
end
local text, head, layer = self.text, self.head + 1, self
local loc1, loc2 = find(text, layer.end_tag_pattern, head)
if loc1 then
if loc1 > head then
self:emit(sub(text, head, loc1 - 1))
end
self:jump(loc2)
return self:pop()
-- noinclude and includeonly will tolerate having no closing tag, but
-- only if given in lowercase. This is due to a preprocessor bug, as
-- it uses a regex with the /i (case-insensitive) flag to check for
-- end tags, but a simple array lookup with lowercase tag names when
-- looking up which tags should tolerate no closing tag (exact match
-- only, so case-sensitive).
elseif layer.ignored then
local raw_name = layer.raw_name
if raw_name == "noinclude" or raw_name == "includeonly" then
self:jump(#text)
return self:pop()
end
end
return self:fail_route()
end
function Parser:tag()
-- HTML comment.
if self:read(1, 3) == "!--" then
local text = self.text
self:jump(select(2, find(text, "-->", self.head + 4, true)) or #text)
-- onlyinclude tags (which must be lowercase with no whitespace).
elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then
local text = self.text
self:jump(select(2, find(text, "<onlyinclude>", self.head + 14, true)) or #text)
else
local success, tag = self:try(do_tag)
if not success then
self:emit("<")
elseif not tag.ignored then
tag.end_tag_pattern = nil
self:emit(Tag:new(tag))
end
end
end
end
-- Heading.
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, parameters and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be.
do
-- Handlers.
local handle_start
local handle_body
local handle_possible_end
local function do_heading(self)
local layer, head = self, self.head
layer.handler, layer.index = handle_start, head
self:set_pattern("")
-- Comments/tags interrupt the equals count.
local eq = match(self.text, "^=+()", head) - head
layer.level = eq
self:advance(eq)
end
local function do_heading_possible_end(self)
local layer = self
layer.handler = handle_possible_end
self:set_pattern("")
end
function handle_start(self, ...)
-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
local function newline(self)
local layer = self
local eq = layer.level
if eq <= 2 then
return self:fail_route()
end
-- Calculate which equals signs determine the heading level.
local level_eq = eq - (2 - eq % 2)
level_eq = level_eq > 12 and 12 or level_eq
-- Emit the excess.
self:emit(rep("=", eq - level_eq))
layer.level = level_eq / 2
return self:pop()
end
local function whitespace(self)
local success, possible_end = self:try(do_heading_possible_end)
if success then
self:emit(Wikitext:new(possible_end))
local layer = self
layer.handler = handle_body
self:set_pattern("")
return self:consume()
end
return newline(self)
end
handle_start = self:switch(handle_start, {
= whitespace,
= newline,
= whitespace,
= newline,
= function(self)
-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
local layer = self
local eq = layer.level
if eq > 6 then
self:emit(1, rep("=", eq - 6))
layer.level = 6
end
layer.handler = handle_body
self:set_pattern("")
return self:consume()
end
})
return handle_start(self, ...)
end
function handle_body(self, ...)
handle_body = self:switch(handle_body, {
= Parser.fail_route,
= Parser.tag,
= function(self)
-- Comments/tags interrupt the equals count.
local eq = match(self.text, "^=+", self.head)
local eq_len = #eq
self:advance(eq_len)
local success, possible_end = self:try(do_heading_possible_end)
if success then
self:emit(eq)
self:emit(Wikitext:new(possible_end))
return self:consume()
end
local layer = self
local level = layer.level
if eq_len > level then
self:emit(rep("=", eq_len - level))
elseif level > eq_len then
layer.level = eq_len
self:emit(1, rep("=", level - eq_len))
end
return self:pop()
end,
= Parser.wikilink_block,
= function(self, this)
return self:braces("{", true)
end,
= Parser.fail_route,
= Parser.emit
})
return handle_body(self, ...)
end
function handle_possible_end(self, ...)
handle_possible_end = self:switch(handle_possible_end, {
= Parser.fail_route,
= function(self)
if self:read(1, 3) ~= "!--" then
return self:pop()
end
local head = select(2, find(self.text, "-->", self.head + 4, true))
if not head then
return self:pop()
end
self:jump(head)
end,
= Parser.fail_route,
= function(self, this)
if not match(this, "^+()$") then
return self:pop()
end
self:emit(this)
end
})
return handle_possible_end(self, ...)
end
function Parser:heading()
local success, heading = self:try(do_heading)
if success then
local section = self.section + 1
heading.section = section
self.section = section
self:emit(Heading:new(heading))
return self:consume()
else
self:emit("=")
end
end
end
------------------------------------------------------------------------------------
--
-- Block handlers
--
------------------------------------------------------------------------------------
-- Block handlers.
-- These are blocks which can affect template/parameter parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).
-- All blocks (including templates/parameters) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| ] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| ] }}" would also work, even though "]" is not a valid wikilink.
-- Note that nesting also affects pipes and equals signs, in addition to block closures.
-- These blocks can be nested to any degree, so "{{template| ] }}" will not work, since only one of the three wikilink blocks has been closed. On the other hand, "{{template| ] ]] ]] }}" will work.
-- All blocks are implicitly closed by the end of the text, since their validity is irrelevant at this stage.
-- Language conversion block.
-- Opens with "-{" and closes with "}-". However, templates/parameters take priority, so "-{{" is parsed as "-" followed by the opening of a template/parameter block (depending on what comes after).
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block.
do
--Handler.
local handle_language_conversion_block
local function do_language_conversion_block(self)
local layer = self
layer.handler = handle_language_conversion_block
self:set_pattern("")
end
function handle_language_conversion_block(self, ...)
handle_language_conversion_block = self:switch(handle_language_conversion_block, {
= Parser.heading_block,
= Parser.tag,
= Parser.wikilink_block,
= Parser.braces,
= function(self)
if self:read(1) == "-" then
self:emit("}-")
self:advance()
return self:pop()
end
self:emit("}")
end,
= Parser.pop,
= Parser.emit
})
return handle_language_conversion_block(self, ...)
end
function Parser:braces(this, fail_on_unclosed_braces)
local language_conversion_block = self:read(-1) == "-"
if self:read(1) == "{" then
local braces, failed = self:template_or_parameter()
-- Headings will fail if they contain an unclosed brace block.
if failed and fail_on_unclosed_braces then
return self:fail_route()
-- Language conversion blocks cannot begin "-{{", but can begin
-- "-{{{" iff parsed as "-{" + "{{".
elseif not (language_conversion_block and braces == 1) then
return self:consume()
end
else
self:emit(this)
if not language_conversion_block then
return
end
self:advance()
end
self:emit(Wikitext:new(self:get(do_language_conversion_block)))
end
end
--[==[
Headings
Opens with "\n=" (or "=" at the start of the text), and closes with "\n" or the end of the text. Note that it doesn't matter whether the heading will fail to process due to a premature newline (e.g. if there are no closing signs), so at this stage the only thing that matters for closure is the newline or end of text.
Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings).
Note 2: if directly inside a template argument with no previous equals signs, a newline followed by a single equals sign is parsed as an argument equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|key\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|key\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block.
]==]
do
--Handler.
local handle_heading_block
local function do_heading_block(self)
local layer = self
layer.handler = handle_heading_block
self:set_pattern("")
end
function handle_heading_block(self, ...)
handle_heading_block = self:switch(handle_heading_block, {
= function(self)
self:newline()
return self:pop()
end,
= Parser.tag,
= Parser.wikilink_block,
= Parser.braces,
= Parser.pop,
= Parser.emit
})
return handle_heading_block(self, ...)
end
function Parser:heading_block(this, nxt)
self:newline()
this = this .. (nxt or "=")
local loc = #this - 1
while self:read(0, loc) == this do
self:advance()
self:emit(Wikitext:new(self:get(do_heading_block)))
end
end
end
-- Wikilink block.
-- Opens with "]".
do
-- Handler.
local handle_wikilink_block
local function do_wikilink_block(self)
local layer = self
layer.handler = handle_wikilink_block
self:set_pattern("{]")
end
function handle_wikilink_block(self, ...)
handle_wikilink_block = self:switch(handle_wikilink_block, {
= Parser.heading_block,
= Parser.tag,
= Parser.wikilink_block,
"] = function(self)
if self:read(1) == "]" then
self:emit("]]")
self:advance()
return self:pop()
end
self:emit("]")
end,
= Parser.braces,
= Parser.pop,
= Parser.emit
})
return handle_wikilink_block(self, ...)
end
function Parser:wikilink_block()
if self:read(1) == "[" then
self:emit("[[")
self:advance(2)
self:emit(Wikitext:new(self:get(do_wikilink_block)))
else
self:emit("[")
end
end
end
-- Lines which only contain comments, " " and "\t" are eaten, so long as
-- they're bookended by "\n" (i.e. not the first or last line).
function Parser:newline()
local text, head = self.text, self.head
while true do
repeat
local loc = match(text, "^*<!%-%-()", head + 1)
if not loc then
break
end
loc = select(2, find(text, "-->", loc, true))
head = loc or head
until not loc
-- Fail if no comments found.
if head == self.head then
break
end
head = match(text, "^*()\n", head + 1)
if not head then
break
end
self:jump(head)
end
self:emit("\n")
end
do
-- Handlers.
local handle_start
local main_handler
-- If `transcluded` is true, then the text is checked for a pair of
-- onlyinclude tags. If these are found (even if they're in the wrong
-- order), then the start of the page is treated as though it is preceded
-- by a closing onlyinclude tag.
-- Note 1: unlike other parser extension tags, onlyinclude tags are case-
-- sensitive and cannot contain whitespace.
-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the
-- text, but the hard requirement above means this can only happen if
-- either the tags are in the wrong order or there are multiple onlyinclude
-- blocks.
local function do_parse(self, transcluded)
local layer = self
layer.handler = handle_start
self:set_pattern(".")
self.section = 0
if not transcluded then
return
end
self.transcluded = true
local text = self.text
if find(text, "</onlyinclude>", 1, true) then
local head = find(text, "<onlyinclude>", 1, true)
if head then
self.onlyinclude = true
self:jump(head + 13)
end
end
end
-- If the first character is "=", try parsing it as a heading.
function handle_start(self, this)
local layer = self
layer.handler = main_handler
self:set_pattern("")
if this == "=" then
return self:heading()
end
return self:consume()
end
function main_handler(self, ...)
main_handler = self:switch(main_handler, {
= function(self)
self:newline()
if self:read(1) == "=" then
self:advance()
return self:heading()
end
end,
= Parser.tag,
= function(self)
if self:read(1) == "{" then
self:template_or_parameter()
return self:consume()
end
self:emit("{")
end,
= Parser.pop,
= Parser.emit
})
return main_handler(self, ...)
end
function export.parse(text, transcluded)
local text_type = type(text)
return (select(2, Parser:parse{
text = text_type == "string" and text or
text_type == "number" and tostring(text) or
error("bad argument #1 (string expected, got " .. text_type .. ")"),
node = {Wikitext, true},
route = {do_parse, transcluded}
}))
end
parse = export.parse
end
do
local function next_template(iter)
while true do
local node = iter()
if node == nil or class_else_type(node) == "template" then
return node
end
end
end
function export.find_templates(text, not_transcluded)
return next_template, parse(text, not not_transcluded):__pairs("next_node")
end
end
do
local link_parameter_1 = data.template_link_param_1
local link_parameter_2 = data.template_link_param_2
-- Generate a link. If the target title doesn't have a fragment, use "#top"
-- (which is an implicit anchor at the top of every page), as this ensures
-- self-links still display as links, since bold display is distracting and
-- unintuitive for template links.
local function link_page(title, display)
local fragment = title.fragment
if fragment == "" then
fragment = "top"
end
return format(
"]",
encode_uri(title.prefixedText .. "#" .. fragment, "WIKI"),
display
)
end
-- pf_arg1 or pf_arg2 may need to be linked if a given parser function
-- treats them as a pagename. If a key exists in `namespace`, the value is
-- the namespace for the page: if not 0, then the namespace prefix will
-- always be added to the input (e.g. {{#invoke:}} can only target the
-- Module: namespace, so inputting "Template:foo" gives
-- "Module:Template:foo", and "Module:foo" gives "Module:Module:foo").
-- However, this isn't possible with mainspace (namespace 0), so prefixes
-- are respected. make_title handles all of this automatically.
local function finalize_arg(pagename, namespace)
if namespace == nil then
return pagename
end
local title = make_title(namespace, pagename)
if not (title and is_internal_title(title)) then
return pagename
end
return link_page(title, pagename)
end
local function render_title(name, args)
-- parse_template_name returns a table of transclusion modifiers plus
-- the normalized template/magic word name, which will be used as link
-- targets. The third return value pf_arg1 is the first argument of a
-- a parser function, which comes after the colon (e.g. "foo" in
-- "{{#IF:foo|bar|baz}}"). This means args (i.e. the first argument
-- that comes after a pipe is actually argument 2, and so on. Note: the
-- second parameter of parse_template_name checks if there are any
-- arguments, since parser variables cannot take arguments (e.g.
-- {{CURRENTYEAR}} is a parser variable, but {{CURRENTYEAR|foo}}
-- transcludes "Template:CURRENTYEAR"). In such cases, the returned
-- table explicitly includes the "Template:" prefix in the template
-- name. The third parameter instructs it to retain any fragment in the
-- template name in the returned table, if present.
local chunks, subclass, pf_arg1 = parse_template_name(name, args and pairs(args)(args) ~= nil, true)
if chunks == nil then
return name
end
local chunks_len = #chunks
-- Additionally, generate the corresponding table `rawchunks`, which
-- is a list of colon-separated chunks in the raw input. This is used
-- to retrieve the display forms for each chunk.
local rawchunks = split(name, ":")
for i = 1, chunks_len - 1 do
chunks = format(
"]",
encode_uri(magic_words, 1, -2)].transclusion_modifier, "WIKI"),
rawchunks
)
end
local chunk = chunks
-- If it's a template, return a link to it with link_page, concatenating
-- the remaining chunks in `rawchunks` to form the display text.
-- Use new_title with the default namespace 10 (Template:) to generate
-- a target title, which is the same setting used for retrieving
-- templates (including those in other namespaces, as prefixes override
-- the default).
if subclass == "template" then
chunks = link_page(
new_title(chunk, 10),
concat(rawchunks, ":", chunks_len) -- :
)
return concat(chunks, ":") -- :
elseif subclass == "parser variable" then
chunks = format(
"]",
encode_uri(magic_words.parser_variable, "WIKI"),
rawchunks
)
return concat(chunks, ":") -- :
end
-- Otherwise, it must be a parser function.
local mgw_data = magic_words
local link = mgw_data.parser_function or mgw_data.transclusion_modifier
local pf_arg2 = args and args or nil
-- Some magic words have different links, depending on whether argument
-- 2 is specified (e.g. "baz" in {{foo:bar|baz}}).
if type(link) == "table" then
link = pf_arg2 and link or link
end
chunks = format("]", encode_uri(link, "WIKI"), rawchunks)
-- #TAG: has special handling, because documentation links for parser
-- extension tags come from ].
if chunk == "#TAG:" then
-- Tags are only case-insensitive with ASCII characters.
local tag = parser_extension_tags
if tag then
pf_arg1 = format("]", encode_uri(tag, "WIKI"), pf_arg1)
end
-- Otherwise, finalize pf_arg1 and add it to `chunks`.
else
pf_arg1 = finalize_arg(pf_arg1, link_parameter_1)
end
chunks = pf_arg1
-- Finalize pf_arg2 (if applicable), then return.
if pf_arg2 then
args = finalize_arg(pf_arg2, link_parameter_2)
end
return concat(chunks, ":") -- :
end
function export.buildTemplate(title, args)
local output = {title}
-- Iterate over all numbered parameters in order, followed by any
-- remaining parameters in codepoint order. Implicit parameters are
-- used wherever possible, even if explicit numbers are interpolated
-- between them (e.g. 0 would go before any implicit parameters, and
-- 2.5 between 2 and 3).
-- TODO: handle "=" and "|" in params/values.
if args then
local iter, implicit = sorted_pairs(args), table_len(args)
local k, v = iter()
while k ~= nil do
if type(k) == "number" and k >= 1 and k <= implicit and k % 1 == 0 then
insert(output, v)
else
insert(output, k .. "=" .. v)
end
k, v = iter()
end
end
return output
end
build_template = export.buildTemplate
function export.templateLink(title, args, no_link)
local output = build_template(no_link and title or render_title(title, args), args)
for i = 1, #output do
output = encode_entities(output, "={}", true, true)
end
return tostring(html_create("code")
:css("white-space", "pre-wrap")
:wikitext("{{" .. concat(output, "|") .. "}}") -- {{ | }}
)
end
end
do
local function next_parameter(iter)
while true do
local node = iter()
if node == nil or class_else_type(node) == "parameter" then
return node
end
end
end
function export.find_parameters(text, not_transcluded)
return next_parameter, parse(text, not not_transcluded):__pairs("next_node")
end
function export.displayParameter(name, default)
return tostring(html_create("code")
:css("white-space", "pre-wrap")
:wikitext("{{{" .. concat({name, default}, "|") .. "}}}") -- {{{ | }}}
)
end
end
do
local function check_level(level)
if type(level) ~= "number" then
error("Heading levels must be numbers.")
elseif level < 1 or level > 6 or level % 1 ~= 0 then
error("Heading levels must be integers between 1 and 6.")
end
return level
end
local function next_heading(iter)
while true do
local node = iter()
if node == nil then
return nil
elseif class_else_type(node) == "heading" then
local level = node.level
if level >= iter.i and level <= iter.j then
return node
end
end
end
end
-- FIXME: should headings which contain "\n" be returned? This may depend
-- on variable factors, like template expansion. They iterate the heading
-- count number, but fail on rendering. However, in some cases a different
-- heading might still be rendered due to intermediate equals signs; it
-- may even be of a different heading level: e.g., this is parsed as an
-- L2 heading with a newline (due to the wikilink block), but renders as the
-- L1 heading "=foo[[". Section edit links are sometimes (but not always)
-- present in such cases.
-- ==[[=
-- ]]==
-- TODO: section numbers for edit links seem to also include headings
-- nested inside templates and parameters (but apparently not those in
-- parser extension tags - need to test this more). If we ever want to add
-- section edit links manually, this will need to be accounted for.
function export.find_headings(text, i, j)
local iter = parse(text):__pairs("next_node")
iter.i, iter.j = i and check_level(i) or 1, j and check_level(j) or 6
return next_heading, iter
end
end
do
local function make_tag(tag)
return tostring(html_create("code")
:css("white-space", "pre-wrap")
:wikitext("<" .. tag .. ">")
)
end
-- Note: invalid tags are returned without links.
function export.wikitagLink(tag)
-- ">" can't appear in tags (including attributes) since the parser
-- unconditionally treats ">" as the end of a tag.
if find(tag, ">", 1, true) then
return make_tag(tag)
end
-- Tags must start "<tagname..." or "</tagname...", with no whitespace
-- after "<" or "</".
local slash, tagname, remainder = match(tag, "^(/?)(+)(.*)$")
if not tagname then
return make_tag(tag)
end
-- Tags are only case-insensitive with ASCII characters.
local link = lower(tagname)
if (
-- onlyinclude tags must be lowercase and are whitespace intolerant.
link == "onlyinclude" and (link ~= tagname or remainder ~= "") or
-- Closing wikitags (except onlyinclude) can only have whitespace
-- after the tag name.
slash == "/" and not match(remainder, "^%s*()$") or
-- Tagnames cannot be followed immediately by "/", unless it comes
-- at the end (e.g. "<nowiki/>", but not "<nowiki/ >").
remainder ~= "/" and sub(remainder, 1, 1) == "/"
) then
-- Output with no link.
return make_tag(tag)
end
-- Partial transclusion tags aren't in the table of parser extension
-- tags.
if link == "noinclude" or link == "includeonly" or link == "onlyinclude" then
link = "mw:Transclusion#Partial transclusion"
else
link = parser_extension_tags
end
if link then
tag = gsub(tag, pattern_escape(tagname), "]", 1)
end
return make_tag(tag)
end
end
-- For convenience.
export.class_else_type = class_else_type
return export