local export = {}
local rsplit = mw.text.split
local u = mw.ustring.char
local m_templateparser = require("Module:templateparser")
local pattern_utilities_module = "Module:pattern utilities"
-- From ]
local gloss_left = "<span class=\"gloss-brac\">(</span><span class=\"gloss-content\">"
local gloss_right = "</span><span class=\"gloss-brac\">)</span>"
local place_extra_info = {
= true,
= true,
= true,
= true,
= false,
= true,
= true,
}
-- From ]
local function escape_pattern(text)
return text:gsub("()", "%%%1")
end
-- Ensure that Wikicode (template calls, bracketed links, HTML, bold/italics, etc.) displays literally in error messages
-- by inserting a Unicode word-joiner symbol after all characters that may trigger Wikicode interpretation. Replacing
-- with equivalent HTML escapes doesn't work because they are displayed literally. I could not get this to work using
-- <nowiki>...</nowiki> (those tags display literally), using using {{#tag:nowiki|...}} (same thing) or using
-- mw.getCurrentFrame():extensionTag("nowiki", ...) (everything gets converted to a strip marker
-- `UNIQ--nowiki-00000000-QINU` or similar). FIXME: This is a massive hack; there must be a better way.
local function escape_wikicode(text)
text = text:gsub("()", "%1" .. u(0x2060))
return text
end
local function preprocess(frame, text)
if text:find("{") then
return frame:preprocess(text)
else
return text
end
end
local function discard(offset, iter, obj, index)
return iter, obj, index + offset
end
local function remove_templates_if(haystack, predicate)
local remaining = {}
local last_start = 1
for name, args, text, index in m_templateparser.findTemplates(haystack) do
local remove = predicate(name, args, next(remaining) == nil)
if remove then
if last_start < index then
local chunk = haystack:sub(last_start, index - 1)
if chunk:find("%S") then
table.insert(remaining, chunk)
end
end
last_start = index + text:len()
end
end
if last_start == 1 then
return haystack
else
table.insert(remaining, haystack:sub(last_start))
return table.concat(remaining)
end
end
local function copy_unnamed_args_maybe_except_code(to, from, deny_list, first_argument)
first_argument = first_argument or 2
for _, value in discard(first_argument - 1, ipairs(from)) do
if not deny_list or not require("Module:table").contains(deny_list, value) then
table.insert(to, value)
end
end
end
local function handle_definition_template(name, args, transclude_args)
if name == "place" or name == "User:Benwing2/place" then
return {
should_remove = true,
must_be_first = true,
generate = function(data)
if data.formatted_to and data.formatted_to ~= "" then
error("{{place}} cannot be used in conjunction with 'to'.")
end
local place_args = {}
local langcode = data.lang:getCode()
local include_place_extra_info = transclude_args.include_place_extra_info
local drop_extra = include_place_extra_info == false or include_place_extra_info == nil and langcode ~= "en"
local saw_tcl_t
-- Copy the arguments but drop translations and maybe the "extra info"
for key, val in pairs(args) do
local base, num = tostring(key):match("^(.-)(*)$")
if base == "tcl_t" or base == "tcl_tid" then
saw_tcl_t = true -- otherwise ignore
elseif base == "tcl_nolb" then
data.nolb = val -- otherwise ignore
elseif base == "t" or base == "tid" or drop_extra and place_extra_info ~= nil then
-- ignore it
else
place_args = val
end
end
local function sub_plus(t)
if t:find("+") then
t = t:gsub("+", require(pattern_utilities_module).replacement_escape(data.source))
end
return t
end
place_args = langcode
if #transclude_args.t > 0 then
local argno = 1
for _, t in ipairs(transclude_args.t) do
if t ~= "-" then
place_args = sub_plus(t)
argno = argno + 1
end
end
elseif saw_tcl_t then
for key, val in pairs(args) do
local base, num = tostring(key):match("^(.-)(*)$")
if base == "tcl_t" then
place_args = sub_plus(val)
elseif base == "tcl_tid" then
place_args = val
end
end
elseif langcode ~= "en" then
place_args = data.source
place_args = data.id
end
place_args = data.sort
if data.no_gloss then
place_args = "-"
else
local gloss = data.gloss
for extra_info_arg, is_list in pairs(place_extra_info) do
if is_list then
for i, v in ipairs(transclude_args) do
place_args = v
end
elseif transclude_args then
place_args = transclude_args
end
end
if gloss ~= "" then
local first_free = 2
while place_args ~= nil do first_free = first_free + 1 end
if place_args:find("<<") then
-- new-style argument; concatenate to end of argument
if not gloss:find("^") then
gloss = " " .. gloss
end
place_args = args .. gloss
else
-- old-style argument; add as separate argument
gloss = gloss:gsub("^ *", "")
place_args = gloss
end
end
end
return require("Module:User:Benwing2/place").format(place_args)
end,
}
elseif name == "abbreviation of" or name == "abbr of" or name == "abbrev of"
or name == "acronym of"
or name == "contraction of" or name == "contr of"
or name == "initialism of" or name == "init of"
or name == "short for" then
return {
should_remove = true,
must_be_first = true,
generate = function(data)
local formatted_gloss = ""
if not data.no_gloss then
local formatted_link = require("Module:links").full_link {
term = args, alt = args, lang = data.source_lang, id = args
}
local after_link = ""
if data.gloss ~= "" then
local separator = (args and "") or ((args or ";") .. " ")
after_link = separator .. data.gloss
end
formatted_gloss = " " .. gloss_left .. formatted_link .. after_link .. gloss_right
end
return data.formatted_to .. require("Module:links").full_link {
term = data.source, lang = data.source_lang, id = data.id
} .. formatted_gloss
end,
}
end
return nil
end
function export.show(frame)
local params = {
= { required = true }, -- langcode of target language (the current entry's language)
= { required = true }, -- source English term to transclude from
= {}, -- can have multiple comma-separated ID's
= {},
= { default = false, type = "boolean" },
= { type = "boolean" },
-- Normally, we ignore extra info (capital, largest city, modern name, etc.) when transcluding {{place}}
-- because the given terms are in English and will likely differ from language to language.
= { type = "boolean" },
= {}, -- can have multiple semicolon-separated labels
= {}, -- can have multiple semicolon-separated labels
= { type = "boolean" },
= { list = true },
= {},
}
for k, is_list in pairs(place_extra_info) do
params = { list = is_list }
end
local args = require("Module:parameters").process(frame:getParent().args, params)
local language_code = args
local language = require("Module:languages").getByCode(language_code)
local source = args
local source_langcode = "en"
local source_lang = require("Module:languages").getByCode(source_langcode)
local source_langname = source_lang:getFullName()
local ids = args.id and rsplit(args.id, ",") or {"-"}
local sort = args
local copy_sortkey = (sort == nil) and (mw.title.getCurrentTitle() == source)
local no_gloss = args
local labels = args and rsplit(args, ";") or {}
local nolb
local found_labels = {}
local to = args
local content = mw.title.new(source):getContent()
if content == nil then
error("Couldn't find the entry ].")
end
-- Remove HTML comments.
content = content:gsub("<!%-%-.-%-%->", "")
-- Remove.<ref></ref>
content = content:gsub("< ***>.-< */ * *>", "")
-- Remove <ref/>.
content = content:gsub("< ***/ *>", "")
-- TODO: Handle <nowiki> (it's more complex than just cutting it out too).
local retlines = {}
for _, id in ipairs(ids) do
local line_start, line
if id ~= "-" then
local senseid_start, senseid_end = content:find("{{ *senseid *| *" .. escape_pattern(source_langcode) .. " *| *" .. escape_pattern(id) .. " *}}")
if senseid_start == nil then
senseid_start, senseid_end = content:find("{{ *sid *| *" .. escape_pattern(source_langcode) .. " *| *" .. escape_pattern(id) .. " *}}")
end
if senseid_start == nil then
local alternatives = nil
for id in content:gmatch("{{ *senseid *| *" .. escape_pattern(source_langcode) .. " *| *(*)}}") do
alternatives = alternatives and alternatives .. ", " .. id or id
end
for id in content:gmatch("{{ *sid *| *" .. escape_pattern(source_langcode) .. " *| *(*)}}") do
alternatives = alternatives and alternatives .. ", " .. id or id
end
if alternatives then
alternatives = " Alternatives for |id= are: " .. alternatives
else
alternatives = ""
end
error("Couldn't find the template {{]|" .. source_langcode .. "|" .. id .. "}} within entry ]." .. alternatives)
end
-- Do the following manually instead of using regex or iterators in hopes of saving memory.
local newline = string.byte("\n")
local pound = string.byte("#")
line_start = senseid_start
while line_start > 0 and content:byte(line_start - 1) ~= newline do line_start = line_start - 1 end
local def_start = line_start
while content:byte(def_start) == pound do def_start = def_start + 1 end
local line_end = senseid_end
while line_end < content:len() and content:byte(line_end + 1) ~= newline do line_end = line_end + 1 end
line = content:sub(def_start, senseid_start - 1) .. content:sub(senseid_end + 1, line_end)
else
local _, start_source = string.find(content, "==*" .. require("Module:pattern utilities").
pattern_escape(source_langname) .. "*==")
if not start_source then
error(("Couldn't find L2 header for source language '%s' on page ]"):format(source_langname,
source))
end
-- Find index of start of next language; may be nil if no language follows.
local _, start_next_lang = string.find(content, "\n==+==", start_source, false)
content = content:sub(start_source, start_next_lang)
while true do
local next_line_start
_, next_line_start = string.find(content, "\n#+", line_start, false)
if not next_line_start then
break
end
if line_start then
local first_line = string.match(content, "(.-)%f", line_start)
local next_line = string.match(content, "(.-)%f", next_line_start + 1)
error(("No id specified and saw two definition lines '%s' and '%s' for source language '%s' on page ]"):format(
escape_wikicode(first_line), escape_wikicode(next_line), source_langname, source))
end
line_start = next_line_start + 1
end
if not line_start then
error(("Couldn't find any definition lines for source language '%s' on page ]"):format(
source_langname, source))
end
line = string.match(content, "(.-)%f", line_start)
end
if to == nil then
local i = line_start
while i > 1 do
i = i - 1 -- i is now the index of the newline
while i > 1 and content:byte(i - 1) ~= newline do i = i - 1 end
local header = content:match("^===+(+)===+ *\n", i)
if header then
to = (header:match("erb") ~= nil)
break
end
end
end
-- TODO: Remove this error once <nowiki> is handled correctly (see above TODO).
if line:find("< *nowiki%W") or line:find("< */ *nowiki%W") then
error("Cannot handle <nowiki>.")
end
-- Quick'n'dirty templatization of manual cats so that the below code also works for them.
for _, v in ipairs({{source_langcode .. ":", "c"}, {source_lang:getCanonicalName() .. " ", "cln"}, {"", "cat"}}) do
line = line:gsub("% .. "(|]*)%]%]", "{{" .. v .. "|" .. source_langcode .. "|%1}}")
line = line:gsub("% .. "(|]*)%|(|]*)%]%]", "{{" .. v .. "|" .. source_langcode .. "|%1|sort=%2}}")
end
-- Extract template information.
local cats = {}
local cats_cln = {}
local cats_top = {}
local encountered_label = false
local generator = nil
local sortkeys = {}
local sortkey_most_frequent = nil
local sortkey_most_frequent_n = 0
local function process_template(name, tempargs, is_at_the_start)
-- Expand any nested templates in template arguments.
for k, v in pairs(tempargs) do
tempargs = preprocess(frame, v)
end
local supports_sortkey = false
local should_remove = true -- If set, removes the template from the line after processing.
local must_be_first = false -- If set, ensures that nothing (except for removed templates) preceeds this template.
local definition_template_handler = handle_definition_template(name, tempargs, args)
if definition_template_handler ~= nil then
if generator ~= nil then
error("Encountered {{]}} even though a full definition template has already been processed.")
end
should_remove = definition_template_handler.should_remove
must_be_first = definition_template_handler.must_be_first
generator = definition_template_handler.generate
elseif name == "categorize" or name == "cat" then
copy_unnamed_args_maybe_except_code(cats, tempargs)
supports_sortkey = true
elseif name == "catlangname" or name == "cln" then
copy_unnamed_args_maybe_except_code(cats, tempargs)
supports_sortkey = true
elseif name == "catlangcode" or name == "topics" or name == "top" or name == "C" or name == "c" then
copy_unnamed_args_maybe_except_code(cats_top, tempargs)
supports_sortkey = true
elseif name == "label" or name == "lbl" or name == "lb" then
if encountered_label then
error("Encountered multiple {{]}} templates in the definition line.")
end
encountered_label = true
copy_unnamed_args_maybe_except_code(found_labels, tempargs)
supports_sortkey = true
must_be_first = true
elseif name == "defdate" or name == "defdt" or name == "ref" or name == "refn" or name == "senseid" or name == "sid" then
-- Remove and do nothing.
else
-- We are dealing with a template other than the above hard-coded ones.
-- If it contains the language code, we cannot handle it.
if tempargs == source_langcode then
error("Cannot handle template {{]}}.")
end
supports_sortkey = tempargs or tempargs -- TODO: This doesn't handle the case where there is only sortn but not sort1/sort.
should_remove = false -- Leave the template in and just copy it, e.g. ], ], ], ] etc.
end
if supports_sortkey then
if tempargs ~= nil then
error("Cannot handle multiple sort keys.")
end
local sortkey = tempargs
if sortkey ~= nil then
if sortkeys == nil then
sortkeys = 1
else
sortkeys = sortkeys + 1
end
if sortkeys > sortkey_most_frequent_n then
sortkey_most_frequent = sortkey
sortkey_most_frequent_n = sortkeys
end
end
end
if must_be_first and not is_at_the_start then
error("The template {{]}} should occur to the front of the definition line.")
end
return should_remove
end
line = remove_templates_if(line, process_template)
line = line:gsub("^%s+", ""):gsub("%s+$", "") -- Prune ends.
-- Tidy up the remaining definition (to be used as a gloss).
local gloss = line
if not args.no_truncate_gloss then
-- Truncate full sentences after a period, as they won't be formatted well as a gloss. Require a space after
-- the period as a possible way of reducing false positives with abbreviations.
gloss = gloss:gsub("%s*%. .*$", "")
end
gloss = gloss:gsub("^%u", string.lower):gsub("%.$", "")
gloss = gloss:gsub("^{{1|(*)}}", "%1") -- Remove ]
local _, link_end, link_dest_head, link_dest_tail, link_face_head, link_face_tail = gloss:find("^%]*)|(.)(]*)%]%]") -- Remove ]
if link_end ~= nil and link_dest_tail == link_face_tail and link_face_head:lower() == link_dest_head then
gloss = "[[" .. link_dest_head .. link_dest_tail .. gloss:sub(link_end - 1)
end
gloss = preprocess(frame, gloss)
if copy_sortkey then
sort = sortkey_most_frequent
end
local formatted_senseid = require("Module:senseid").senseid(language, id, "span")
local formatted_categories =
((next(cats ) == nil) and "" or frame:expandTemplate({ title = "cat", args = { language_code, unpack(cats ) } })) ..
((next(cats_cln) == nil) and "" or frame:expandTemplate({ title = "cln", args = { language_code, unpack(cats_cln) } })) ..
((next(cats_top) == nil) and "" or frame:expandTemplate({ title = "top", args = { language_code, unpack(cats_top) } }))
local formatted_to = to and "to " or ""
local formatted_definition
if generator ~= nil then
local data = {
frame = frame, lang = language, source = source, source_lang = source_lang, id = id,
sort = sort, no_gloss = no_gloss, gloss = gloss, formatted_to = formatted_to,
}
formatted_definition = generator(data)
nolb = data.nolb or nolb
else
local formatted_link = require("Module:links").full_link { term = source, lang = source_lang, id = id }
local formatted_gloss = no_gloss and "" or (" " .. gloss_left .. gloss .. gloss_right)
formatted_definition = formatted_to .. formatted_link .. formatted_gloss
end
nolb = args or nolb
local labels_to_ignore = nil
local ignore_all_labels = false
if nolb then
if nolb == "+" or nolb == "1" or nolb == "*" then
ignore_all_labels = true
else
labels_to_ignore = rsplit(nolb, ";")
end
end
if not ignore_all_labels then
copy_unnamed_args_maybe_except_code(labels, found_labels, labels_to_ignore, 1)
end
local formatted_labels = (next(labels) == nil) and "" or (require("Module:labels").show_labels { labels = labels, lang = language, sort = sort} .. " ")
table.insert(retlines, formatted_senseid .. formatted_categories .. formatted_labels .. formatted_definition)
end
return table.concat(retlines, "\n" .. (args.indent or "#") .. " ")
end
return export