This module is intended to be used by bots or other automation tools which need to access Wiktionary data.
Bots may access the data by using mw:API:Expandtemplates or requesting the raw source of a page invoking this module with a &templates=expand
query parameter.
Make sure you only load this module once (or twice, if you need both the languages table and another table). Generating the JSON data takes a few seconds and puts a relatively high strain on the servers.
Available functions are: export_languages
, export_scripts
and export_families
, which generate the JSON equivalents of Module:languages, Module:scripts and Module:families respectively. The structure of the data corresponds exactly to the one used in Wiktionary modules, with a caveat below.
When export_languages
is given positional arguments:
TWO_LETTER
, TWO_THREE_LETTER
and TWO_THREE_LETTER_REGULAR
can be passed. For example, invoking the function with TWO_LETTER
, 1
and 4
as arguments will export the canonical names and script codes for languages with two-letter codes.Examples:
{{#invoke:JSON data|export_languages||ancestors|3}}
This will return the ancestors and language family for every language. (See Template:language data documentation for an explanation of numbers 1
to 4
.)
{{#invoke:JSON data|export_languages|TWO_LETTER}}
This will return all available info for two-letter language codes. (Note, this is currently broken and returns a Lua error.)
local export = {}
-- optimisation: local variable lookup is slightly faster than global lookup
local tab_concat, type, tostring, pairs, ipairs = table.concat, type, tostring, pairs, ipairs
local function export_str(s)
-- rudimentary escaping, to save time
return '"' .. tostring(s):gsub('', '\\%0') .. '"'
end
local function export_array(tab)
local items = {}
for key, value in ipairs(tab) do
if type(value) == 'string' then
items = export_str(value)
elseif type(value) == 'boolean' or type(value) == 'number' then
items = tostring(value)
else
error("serialisation failed: unsupported array element type '" .. type(value) .. "'")
end
end
return ""
end
-- the second argument is a rudimentary "schema" which specifies
-- whether a table value at a given key should be serialised
-- as an array or an object; Lua uses the same table type for both
local function export_object(tab, schema)
local items = {}
if tab == nil then
return "null"
end
for key, value in pairs(tab) do
if type(value) == 'string' then
items = export_str(key) .. ':' .. export_str(value)
elseif type(value) == 'boolean' or type(value) == 'number' then
items = export_str(key) .. ':' .. tostring(value)
elseif type(value) == 'table' then
if not schema then
error("no schema given for array with table values, key '" .. key .. "'")
end
local ktype = {}
if type(schema) == 'table' then
ktype = schema
end
-- false indicates array, true indicates un-schematised object
if ktype == false then
items = export_str(key) .. ':' .. export_array(value)
else
items = export_str(key) .. ':' .. export_object(value, ktype)
end
else
error("serialisation failed: unsupported object value type '" .. type(value) .. "'")
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_languages(item_filter, key_filter, skip_nulls)
if type(item_filter) == "table" then
key_filter = {}
local i = 2
while item_filter.args do
if tonumber(item_filter.args) ~= nil then
key_filter = tonumber(item_filter.args)
else
key_filter = item_filter.args
end
i = i + 1
end
if #key_filter == 0 then
key_filter = nil
end
skip_nulls = require('Module:yesno')(item_filter.args.nulls)
item_filter = item_filter.args
end
item_filter = (item_filter ~= "") and item_filter or function() return true end
if type(item_filter) == 'string' then
if item_filter == "TWO_LETTER" then
function item_filter(key, value)
return #key == 2
end
elseif item_filter == "TWO_THREE_LETTER" then
function item_filter(key, value)
return #key <= 3
end
elseif item_filter == "TWO_THREE_LETTER_REGULAR" then
function item_filter(key, value)
return (#key <= 3) and value.type == 'regular'
end
elseif item_filter:sub(1, 1) == '=' then
local list = {}
for item in mw.text.gsplit(item_filter:sub(2), ',') do
list = true
end
function item_filter(key, value)
return list
end
else
local t = item_filter
function item_filter(key, value)
return value.type == t
end
end
end
local data = mw.loadData("Module:languages/data/all")
local items = {}
-- false indicates array, true indicates un-schematised object (just dump raw)
local schema = {
canonicalName = false,
type = false,
scripts = false,
family = false,
otherNames = false,
ancestors = false,
wikimedia_codes = false,
aliases = false,
varieties = false,
sort_key = true,
entry_name = true
}
for key, value in pairs(data) do
if item_filter(key, value) then
if key_filter then
if #key_filter == 1 then
local item = value]
local itsc = schema]
if item == nil then
if not skip_nulls then
items = export_str(key) .. ':null'
end
else
items = export_str(key) .. ':' ..
((type(item) == "string" and export_str(item))
or (itsc == false and export_array(item))
or export_object(item, true))
end
else
local langobj = {}
for _, fkey in pairs(key_filter) do
langobj = value
end
items = export_str(key) .. ':' .. export_object(langobj, schema)
end
else
items = export_str(key) .. ':' .. export_object(value, schema)
end
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_scripts()
local data = mw.loadData("Module:scripts/data")
local items = {}
for key, value in pairs(data) do
items = export_str(key) .. ':' .. export_object(value, {
canonicalName = false,
characters = false,
systems = false,
otherNames = false,
aliases = false,
varieties = false
})
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_etymology_languages()
local data = mw.loadData("Module:etymology languages/data")
local items = {}
for key, value in pairs(data) do
items = export_str(key) .. ':' .. export_object(value, {
canonicalName = false,
parent = false,
wikipedia_article = false,
otherNames = false,
ancestors = false,
aliases = false,
entry_name = false
})
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_families()
local data = mw.loadData("Module:families/data")
local items = {}
for key, value in pairs(data) do
items = export_str(key) .. ':' .. export_object(value, {
canonicalName = false,
otherNames = false,
family = false,
aliases = false,
varieties = false
})
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_labels()
local data = mw.loadData("Module:labels/data")
local labels = {}
for key, value in pairs(data.labels) do
if type(value) == "string" then
labels = export_str(key) .. ':' .. export_str(value)
else
labels = export_str(key) .. ':' .. export_object(value, {
plain_categories = false,
topical_categories = false,
pos_categories = false,
regional_categories = false
})
end
end
return "{" .. tab_concat(labels, ',') .. "}"
end
function export.export_wgs()
local m_wgdata = mw.loadData('Module:workgroup ping/data')
local items = {}
for key, value in pairs(m_wgdata) do
if type(value) == 'string' then
items = export_str(key) .. ':' .. export_str(value)
else
local item = { desc = value.desc; category = value.category; members = {} }
for _, user in ipairs(value) do
item.members = user
end
items = export_str(key) .. ':' .. export_object(item, {
members = false
})
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
-- replacement for using the ] to do ]
-- TODO: limits?
function export.complete_langname(frame)
local m_langs = mw.loadData("Module:languages/data/all")
local target = frame.args
local items = {}
for code, data in pairs(m_langs) do
for _, name in ipairs(data.names) do
if name:sub(1, #target) == target then
items = export_str(name) .. ":" .. export_str(code)
end
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
return export