What I don't like about {{zh-l}}
automatically picking up Mandarin is that pinyin may be interpreted as POJ or Cantonese Yale (or the other way around) or something
local export = {}
local M = require("Module:zh")
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_script_utilities = require("Module:script utilities")
local m_test1 = require("Module:User:Suzukaze-c/zh-extract")
local lang = m_languages.getByCode("zh")
local varinfo = mw.loadData("Module:User:Suzukaze-c/zh/data/info").data
local match = mw.ustring.match
local gsub = mw.ustring.gsub
local split = mw.text.split
local match_Han = ''
local function abbr_gen(abbr)
local page, tooltip, upper = varinfo, varinfo, mw.ustring.upper(abbr)
return ']'
end
function export.link(frame)
local args = frame:getParent().args
local varieties, word, gloss = '', '', ''
if match(args, match_Han) then
-- variety specification has been left out; $1 is definitely a word here as it is in the Han script
varieties = 'm'
word = args or false
gloss = args or false
elseif not args then
-- we have been given only a word, POSSIBLY in the Latin script, and nothing else
varieties = 'm'
word = args or false
gloss = false
else
varieties = args or false
word = args or false
gloss = args or false
end
local pos = args or false
local lit = args or false
local manual_roman = args or false
local force_simp = args or false
varieties = split(varieties, ",", true)
-- link repression
if match(word, "@") then
word = gsub(word, "@", "")
no_link = true
end
if match(word, "%*") then
-- the usual linguistic *
no_link = true
end
-- cleanup
word = gsub(word, "%/", "/")
local lookup_targets = word
lookup_targets = gsub(lookup_targets, '/-]', '') -- filter out things like punctuation
if match(word, "/") then
-- allow roman to be picked up even with explicit alternate forms
lookup_targets = split(lookup_targets, "/", true)
lookup_targets = { = lookup_targets } -- save first table item into table
elseif match(word, "%[%[") then
-- we have been given multiple terms
lookup_targets = gsub(lookup_targets, "|]+", "") -- remove link titles if present
lookup_targets = gsub(lookup_targets, "]", " ") -- replace all square brackets with spaces
lookup_targets = gsub(lookup_targets, " +", " ") -- reduce consecutive spaces
lookup_targets = mw.text.trim(lookup_targets) -- remove excess spaces
lookup_targets = split(lookup_targets, " ", true) -- now we have a table of each linked item (theoretically)
else
lookup_targets = { = lookup_targets } -- change to table
end
-- check if all pages exist
local pages_exist = ''
for i, word in ipairs(lookup_targets) do
if not mw.title.new(word).exists then
pages_exist = pages_exist .. 'n'
end
end
pages_exist = not match(pages_exist, 'n')
-- extract every pronunciation for every word
local roman_for_each_word = {}
if not manual_roman and pages_exist and varieties ~= '' then
for i, word in ipairs(lookup_targets) do
roman_for_each_word = {}
local roman_all = m_test1.extract_roman(word, 1)
for j, variety in ipairs(varieties) do
roman_for_each_word = (roman_all and roman_all or error('"'..variety..'" pronunciation not found for ]!'))
end
end
end
-- if true then return '\n'..require('module:debug').dump(roman_for_each_word) end
local tr = {}
if roman_for_each_word then
for j, variety in ipairs(varieties) do
tr = {}
for i, word in ipairs(lookup_targets) do
table.insert(tr, roman_for_each_word)
end
tr = table.concat(tr, ' ')
end
-- if true then return '\n'..require('module:debug').dump(tr) end
for i, roman in ipairs(tr) do
tr = abbr_gen(varieties) .. ' ' .. tr
end
-- if true then return '\n'..require('module:debug').dump(tr) end
tr = table.concat(tr, '; ')
elseif manual_roman then
manual_roman = split(manual_roman, "/", true)
for i, set in ipairs(manual_roman) do
local variety, roman = match(set, "(.+):(.+)")
table.insert(tr, abbr_gen(variety) .. ' ' .. roman)
end
tr = table.concat(tr, '; ')
else
tr = false
end
-- finalize link
if match(word, "%[%[") then
-- "]]"→"]]/]]"
word = word .. "/" .. M.ts(word)
elseif match(word, "/") then
-- "臺灣話/台灣話/台湾话"→"]/]/]"
word = ']/]'
elseif M.ts_determ(word) == "trad" or force_simp then
-- "附著"→"]/]"
word = "]/]"
end
-- build the link
local terminfo = {lang = lang, term = word, tr = tr, gloss = gloss, pos = pos, lit = lit}
if no_link then
word = m_links.remove_links(word) -- "easier to destroy than create"
text = m_script_utilities.tag_text(word, lang) .. m_links.format_link_annotations(terminfo)
else
text = m_links.full_link(terminfo)
end
return text
end
return export