--[==[
mostly unrelated things, in one single module bc i can.
show
format simple word lists
rpre
convert to {{ja-r}} using data given
extract_ja_readings
extract readings from a japanese entry
r
convert to {{ja-r}} using data in the linked entry
hzreport
generate hanzi entry report
newja
new japanese entry generator
newhz
new hanzi entry generator
newhzmul
new hanzi translingual entry generator, from a copy-paste of the unihan page. how horrifying
test_2
create a special:search link
test_3
newhz() wu
test_9
ltc/och attention
note to self: consider retracting {{zh-attn-split}}
test_10
determine ?action=edit§ion=x
test_11
HSK list words missing appropriate category → https://en.wiktionary.org/?oldid=46157868
test_12
jouyou kanji that need {{ja-readings}} to be updated
]
test_13
process/update {{ja-readings}}
test_14
generate list of {{attention}}s from a copy-paste of the Category: page
make_map
map_test_data
kartographer test
test_16
kun readings that may be a verb with a '-' in the wrong place
https://en.wiktionary.orghttps://en.wiktionary.org/w/index.php?title=%E6%89%93&diff=prev&oldid=50292857
test_17
look for missing japanese entries, based on jouyou kanji data
https://en.wiktionary.org/?oldid=52318414
test_18
extract entry names from a copy-paste of the Category: page
test_19
do things from a copy-paste of ]
=p.newja{={='プログレッシブロック', 'プログレッシブ ロック', 'n'}}
]==]
local export = {}
local replace = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local itermatch = mw.ustring.gmatch
local split = mw.text.split
local itersplit = mw.text.gsplit
local trim = mw.text.trim
local lower = mw.ustring.lower
local sub = mw.ustring.sub
local len = mw.ustring.len
function export.show(frame)
local text = trim(frame.args)
local lang = frame.args
local out = {}
local non = require('Module:string utilities').pattern_escape('*!?.,。、:;…《》「」【】()!?.,:;()"—·☆ ')
local rubied = false
text = replace(text, '\n+', '*')
text = replace(text, ' ', ' ')
text = replace(text, '%s+', ' ')
if not find(text, '') then
for char in itermatch(text, '(.)') do
table.insert(out, '] ')
end
else
for punca, word, puncb in itermatch(text, '(*)(+)(*)') do
if (lang == 'ja') and match(word, '') then
rubied = true
word = replace(word, '¥', ' ')
local lemma, kana = match(word, '(.+)・(.+)')
if not kana then
lemma = word
kana = replace(word, '()', '!')
end
word = require('module:ja-link').link({lemma = lemma, kana = kana})
table.insert(out, punca..word..puncb)
else
table.insert(out, punca..']'..puncb)
end
end
end
out = table.concat(out)
if rubied then
out = require('module:links').full_link({term = out .. '//', tr = '-', lang = require('module:languages').getByCode(lang), sc = require('module:scripts').getByCode('Jpan')})
else
out = require('module:links').full_link({term = out .. '//', tr = '-', lang = require('module:languages').getByCode(lang)})
end
return '«' .. lang .. '» ' .. out
end
function export.rpre(frame)
-- {{m|1=ja|2=WORD|3=TITLE|4=GLOSS}}
-- {{m|0=ja|1=WORD|2=TITLE|3=GLOSS}}
local one = frame.args or ''
local two = frame.args or ''
local three = frame.args or ''
local four = frame.args or ''
local jp = ''
local tr = frame.args or ''
local gloss = frame.args or ''
if one == 'ja' then
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
else
jp = one
linktitle = two
gloss = three
end
tr = replace(tr, '+', '')
tr = replace(tr, '¥', ' ')
if gloss ~= '' then
gloss = ': ' .. gloss
end
if tr ~= '' then
tr = '|' .. tr
end
if tr == '' and find(jp, '') then
tr = '|' .. jp
end
if linktitle ~= '' then
jp = 'linkto=' .. jp .. '|' .. linktitle
end
return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
end
function export.extract_ja_readings(pagename)
if match(pagename, "%[%[") then
--error("Cannot process Japanese text with embedded wikilinks.")
return {}
end
local readings
local content
local function process(text)
text = replace(text, 'hhira=+', '')
text = replace(text, 'decl=+', '')
text = replace(text, 'infl=+', '')
text = replace(text, 'kyu=+', '')
text = replace(text, 'head=+', '')
text = replace(text, 'hira=', '')
if text == 'proper' or text == 'proper noun' then
table.insert(readings, '^' .. pagename)
end
if find(text, 'proper') and not find(text, '%^') then
text = '^' .. replace(text, '()', '%1^')
end
if find(content, 'infl=い') then
text = replace(text, 'しい', 'し.い')
end
if find(content, 'ja%-verb') then
text = replace(text, 'おう', 'お.う')
end
for parameter in itersplit(text, '|') do
if find(parameter, '') then
table.insert(readings, parameter)
end
end
end
local function go()
for parameters in itermatch(content, '{{ja%-adj|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-noun|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb%-suru|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-phrase|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-pos|(+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-altread|(+)}}') do
process(parameters)
end
end
readings = {}
content = mw.title.new(pagename):getContent()
if content then
go()
else
return readings
end
for pagename in itermatch(content, '{{ja%-see|(+)') do
local readings_old = readings
content = mw.title.new(pagename):getContent()
if content then
go()
end
if #readings_old == #readings then
-- ]→], scanning ] will find nothing
table.insert(readings, pagename)
end
end
readings = require("Module:table").removeDuplicates(readings)
return readings
end
function export.r(frame)
local one = frame.args or ''
local two = frame.args or ''
local three = frame.args or ''
local four = frame.args or ''
local jp = ''
local tr = ''
local gloss = frame.args or ''
local choice = ''
if find(one, '') then
choice = one
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
elseif one == 'ja' then
choice = ''
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
else
choice = ''
jp = one
linktitle = two
gloss = (gloss ~= '' and gloss or three)
end
local readings = export.extract_ja_readings(jp)
if #readings > 1 then
if choice ~= '' then
tr = readings
else
return '{{ja-r|' .. jp .. '|ーーーーー}}\n' .. require("Module:debug").highlight_dump(readings)
end
else
tr = (readings and readings or jp)
end
-- if term is pure kana and kana is identical
if replace(jp, '', '') == '' and tr == jp then
tr = ''
end
if gloss ~= '' then
gloss = ': ' .. gloss
end
if tr ~= '' then
tr = '|' .. tr
end
if linktitle ~= '' then
jp = 'linkto=' .. jp .. '|' .. linktitle
end
return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
--[[
変換済みの言葉を再変換
・選択してスペースキーを押す
・または選択してWin+Cを押す
]]
end
function export.hzreport(frame)
local text = {}
local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz
local rows = {}
local y, n = '✔️', '❌️' -- taking advantage of colored emoji. with vs16
for hz in itersplit(candidates], '') do
local content = mw.title.new(hz):getContent() or ''
local zh = find(content, '==Chinese==') and y or n
local def = find(content, '{{zh%-+}}\n\n#') and y or n
local der = find(content, '===Compounds===+\n{{zh%-der') and y or n
local uns = match(content, '|sim=(.)') or ''
local unt = match(content, '|tra=(.)') or ''
local ufs = match(content, '{{zh%-forms|s=(.)') or ''
local uft = match(content, '{{zh%-see|(+)}}') or ''
local goh = find(content, '===Glyph origin===') and y or n
local histf = find(content, '{{anety}}') and y or n
local ids = find(content, '|ids=') and y or n
local yue = match(content, '|c=(+)') or ''
local prc_tw = find(content, '|m=') and y or n
uft = replace(uft, "", '.')
if len(uft) > 6 then uft = sub(uft, 1, 5) .. '◆' end
if len(yue) > 6 then yue = sub(yue, 1, 5) .. '◆' end
hz = ''
local cells = { hz, zh, def, der, uns, unt, ufs, uft, goh, histf, ids, yue, prc_tw }
table.insert(rows, '| ' .. table.concat(cells, ' || '))
end
table.insert(text, ']')
table.insert(text, '{| class="wikitable sortable Hani"')
table.insert(text, '! hz || zh? || def || der || unS || unT || ufS || ufT || goh || histF || ids || yue || prc/tw')
table.insert(text, '|-')
table.insert(text, table.concat(rows, '\n|-\n'))
table.insert(text, '\n|}')
return table.concat(text, '\n')
end
function export.newja(frame)
local a = frame.args and frame.args or frame:getParent().args
local m_languages = require('Module:languages')
local m_scripts = require('Module:scripts')
local lang_ja = m_languages.getByCode('ja')
local sc_Jpan = m_scripts.getByCode('Jpan')
local sc_Hrkt = m_scripts.getByCode('Hrkt')
local sc_Hani = m_scripts.getByCode('Hani')
local pagename = a or mw.title.getCurrentTitle().text
local header_level = 2 -- header level
local this_content = mw.title.new(mw.title.getCurrentTitle().text):getContent() or ''
local pos_datas = {
= {'Adjective:adj'},
= {'Noun:noun'},
= {'Phrase:phrase'},
= {'Verb:verb'},
= {'Verb:verb-suru'},
= {'Verb:verb form'},
= {'Adverb:pos|adverb'},
= {'Adverb:pos|adverb'},
= {'Interjection:pos|interjection'},
= {'Proper noun:pos|proper noun'},
= {'Adjective:pos|adjective form'},
= {'Conjunction:pos|conjunction'},
= {'Noun:noun', 'Verb:verb-suru'},
= {'Adjective:adj', 'Noun:noun'},
}
local pos_aliases = {
= 'n',
}
for pos_codes, array in pairs(pos_datas) do
for i, name_and_template in ipairs(array) do
name_and_template = split(name_and_template, ':')
pos_datas = {
= name_and_template,
= name_and_template,
}
end
end
local verb_types_corresp = {
= '2', -- ichidan is type=2
= '1', -- godan is type=1
}
local etymology_magic_words = {
+)'] = function(a)
return '{{der|ja|' .. replace(a, '^(+) ', '%1|') .. '}}'
end,
+)'] = function(a)
return '{{bor|ja|' .. replace(a, '^(+) ', '%1|') .. '}}'
end,
+)'] = function(a)
return '{{bor|ja|' .. replace(a, '^(+) ', '%1|') .. '}}'
end,
+)'] = function(a)
return '{{obor|ja|' .. replace(a, '^(+) ', '%1|') .. '}}'
end,
= function(a)
return '{{internationalism|ja}}; see {{cog|en|}}'
end,
+) (+)'] = function(a, b)
return '{{rendaku2|' .. replace(a, '_', '̄') .. '|' .. replace(b, '_', '̄') .. '}}'
end,
= function(a)
return '{{rfe|ja' .. (a and '|' .. a or '') .. '}}'
end,
= function(a)
return '{{der|ja|ltc|-}} {{ltc-l|' .. pagename .. '}}'
end,
}
local usage_notes_magic_words = {
= '{{U:ja:biology}}'
}
local output = {}
-- parameters sorted by appearance
local params = {
= {list = true, allow_holes = true}, -- pos and def
= {type = 'number'}, -- etymology number
= {}, -- etymology text
= {}, -- ]
= {}, -- alt in header
= {allow_empty = true}, -- ateji
= {allow_empty = true}, -- yomi
= {allow_empty = true}, -- rendaku
= {}, -- alt in ]
= {allow_empty = true}, -- wikipedia
= {allow_empty = true}, -- en.wikipedia
= {}, -- file
= {}, -- file caption
= {allow_empty = true}, -- no ]
= {}, -- accent
= {}, -- devoicing
= {}, -- head
= {allow_empty = true, default = ''}, -- kana
= {}, -- transivity
= {}, -- alternate kana
= {}, -- measure word/counter
= {}, -- kyuujitai
= {}, -- historical hiragana
= {}, -- usage notes
= {}, -- synonyms
= {}, -- antonyms
= {}, -- hypernyms
= {}, -- hyponyms. super- and sub- bc i'm big dumb
= {}, -- coordinate terms
= {}, -- derived terms
= {}, -- related terms
= {}, -- descendants
= {}, -- see also
= {allow_empty = true}, -- references header
= {}, -- ]
= {}, -- ]
= {},
-- `allow_empty = true`
-- instead of `type = 'boolean'`
-- `|r=` is sufficient for saying 'rendaku yes'
-- instead of `|r=y`
-- "typo" "correction" also takes up time
= {alias_of = 'ate'},
= {alias_of = 'y'},
= {alias_of = 'hh'},
= {alias_of = 'sy'},
= {alias_of = 'sy'},
= {alias_of = 'an'},
= {alias_of = 'de'},
= {alias_of = 'al'},
= {alias_of = 'c'},
}
a = require('Module:parameters').process(a, params)
local function waapuro_to_kana(text)
if text == '' then return text end -- with just one parameter, ] will fallback to language 'all' and treat parameter 1 as the text to convert. for us that will return 'ja'
text = replace(text, '-', '@@@') -- preserve hyphen
text = replace(text, '_', '-') -- instead of hyphen, use underscore for chouonpu
--text = require('Module:typing-aids').replace({'ja', text})
text = replace(text, '@@@', '-') -- restore hyphen
text = replace(text, ' ', ' ') -- replace fullwidth space
return text
end
local function wr(text)
table.insert(output, text)
end
local function wrh(text)
wr('\n' .. mw.ustring.rep('=', header_level) .. text .. mw.ustring.rep('=', header_level))
end
local function hl(n)
header_level = header_level + n
end
local function needs_reading(pagename, kana)
return not (kana or find(pagename, '^%-]+$'))
end
local function generate_tab_params(kana, a)
-- 銀行
-- gin,kou
-- gin|kou|yomi=o
-- 文字
-- mon;mo,ji
-- mon|k1=mo|ji|yomi=o
-- 送り仮名
-- ri
-- oku|ri|ka|k3=ga|na|yomi=k
-- 送仮名
--
-- oku|o1=ri|ka|k3=ga|na|yomi=k
-- 満漢全席
-- man-kan zen,seki
-- man|kan|zen|seki|yomi=o
-- 犬
-- inu
-- inu
-- because pressing shift is effort
local yomi
local params = {}
if find(kana, '%[') then
yomi = 'k'
for yomigana in itermatch(kana, '%') do
table.insert(params, yomigana)
end
elseif sc_Hani:countCharacters(pagename) > 0 then
if find(kana, ',') then
yomi = 'o'
end
kana = replace(kana, '%^', '')
kana = replace(kana, '', ',')
if kana ~= '' then
params = split(kana, ',')
end
end
for i, yomigana in ipairs(params) do
yomigana = replace(yomigana, ';', '|k' .. i .. '=')
yomigana = replace(yomigana, ':', '|o' .. i .. '=')
params = yomigana
end
for i, _ in ipairs(params) do
params = require('Module:ja').kata_to_hira(params)
end
if a then
table.insert(params, 'r=y')
end
if a then
table.insert(params, 'ateji=' .. (a == '' and 'y' or a))
end
yomi = a or yomi
if yomi then
table.insert(params, 'yomi=' .. yomi)
end
if a then
table.insert(params, 'alt=' .. replace(a, '、', ','))
end
return params
end
local function generate_links_list(text)
-- 3密
-- * {{ja-l|3密}}
-- 3密 sanmitu
-- * {{ja-r|3密|さんみつ}}
-- 3密 sanmitu,sense=Sense\q=Qualifier\三つの密 mittu no mitu\gloss=Gloss
-- * {{ja-r|3密|さんみつ}}
-- * {{sense|Sense}} {{q|Qualifier}} {{ja-r|三つの密|みっつ の みつ|gloss=Gloss}}
-- いい_ね
-- * {{ja-r|いい ね}}
local params = {}
for i, item in ipairs(split(text, '')) do
item = split(item, '')
local q
local sense
local output_link = {}
local main_found = false
local pagename, kana
local r_or_l
while #item > 0 do
if find(item, '^q=') then
q = replace(item, '^q=', '')
elseif find(item, '^sense=') then
sense = replace(item, '^sense=', '')
elseif not main_found then
pagename, kana = match(item, '^(-)(.+)$')
pagename = pagename or item -- if match() returns nil
pagename = (pagename and replace(pagename, '_', ' ') or pagename)
kana = (kana and replace(kana, ' ', ' ') or kana)
kana = (kana and waapuro_to_kana(kana) or kana)
if not kana then
kana = export.extract_ja_readings(pagename)
if #kana == 1 then
kana = kana
else
kana = nil
end
end
r_or_l = needs_reading(pagename, kana) and 'l' or 'r'
table.insert(output_link, 'ja-' .. r_or_l)
table.insert(output_link, pagename)
table.insert(output_link, kana)
main_found = true
else
table.insert(output_link, item)
end
table.remove(item, 1)
end
table.insert(
params,
'* ' .. (sense and '{{sense|' .. sense .. '}} ' or '') .. (q and '{{q|' .. q .. '}} ' or '') .. '{{' .. table.concat(output_link, '|') .. '}}'
)
end
return params
end
local kana = a
local kana_no_hyphens
-- convert kana from romaji to kana
kana = waapuro_to_kana(kana)
kana = mw.ustring.toNFC(kana) -- wtf?
-- convert fullwidth CJK symbols to halfwidth
kana = replace(kana, '(.)', {
= '[',
= ';',
= ':',
= ']',
= ',',
= '.',
= ' ',
})
-- generate ]
local tab
if a or sc_Hani:countCharacters(pagename) > 0 then
local tab_params = generate_tab_params(kana, a)
tab_params = table.concat(tab_params, '|')
tab = '{{ja-kanjitab' .. (tab_params and '|' .. tab_params) .. '}}'
end
-- remove markup for generating ] from kana
kana = replace(kana, '', '')
-- kun
kana = replace(kana, '(%]-):(]-)(%])', '%1%2%4%3')
kana = replace(kana, '(%]-);(]-)(%])', '%3')
kana = replace(kana, '(%]-)(%])', '%2')
-- on
kana = replace(kana, '()(-;)', '%1')
kana = replace(kana, '^(-;)', '')
kana = replace(kana, ',', '')
-- for ]
kana_no_hyphens = replace(kana, '', '')
-- blank if it's the same as the pagename. avoid unnecessary template input
if kana == pagename then kana = '' end
if kana_no_hyphens == pagename then kana_no_hyphens = '' end
-- automatic |head= using |1= (kana)
if replace(kana, '+', '') == pagename and not a then
a = '+)', ']]%1]'
a = replace(a, '%)%]%]', '%1')
end
-- process etymology
-- process usage notes
if a then
-- expand shortcuts
for magic_word, f in pairs(etymology_magic_words) do
a = replace(a, '\\' .. magic_word, f)
a = replace(a, ' }}{{', '}} {{')
end
-- automatically fill empty {{bor}}/{{der}}/{{cog}}
if a and match(a, '%%]') then
a = replace(a, '({{+?|ja|+)(}})', function(template_head, template_tail) return template_head .. '|' .. match(a, '%%]') .. template_tail end)
a = replace(a, '(see {{cog|en|)(}})', function(template_head, template_tail) return template_head .. match(a, '%%]') .. template_tail end)
end
-- add final period
if not find(a, '%.$') and not find(a, 'UNIQ.-QINU') and not find(a, '{{rendaku2+}}$') and not find(a, '{{pedia+}}$') and not find(a, '%-%->$') and not find(a, '{{rfe+$') then
a = a .. '.'
end
-- add leading 'From'
if find(a, '^{{compound|') then
a = 'From ' .. a
end
-- change leading ], ] to ], ]
if find(a, '^{{bor|') or find(a, '^{{der|') then
a = replace(a, '^{{bor|', '{{bor+|')
a = replace(a, '^{{der|', '{{der+|')
end
end
if a then
-- expand shortcuts
a = replace(a, '\\(+)', usage_notes_magic_words)
end
-- automatically make an {{etymid}}
if a and not a then
if match(pagename, '^+$') then
a = kana_no_hyphens
elseif a then
a = match(a, '^(+)')
end
end
-- write Japanese header
-- write etymology section
-- write etymology
-- write ]
-- write ]
-- write alternative forms
if match(this_content, 'ja%-readings') and not a then
a = 0
end
if a then
-- en = 0
-- for adding new sections under a single character entry like ], where you want an Etymology section for sanity, but just one, and no Japanese header
if a == 1 and not match(this_content, 'ja%-readings') then wrh('Japanese') end
hl(1)
wrh('Etymology' .. (a == 0 and '' or ' ' .. a))
wr(a and ('{{etymid|ja|' .. a .. '}}') or nil)
hl(1)
wr(tab and tab or nil)
wr(a and ('{{wikipedia|lang=ja' .. (a == '' and '' or '|' .. a) .. '}}') or nil)
wr(a and ('{{wikipedia|lang=en|' .. a .. '}}') or nil)
wr(a == 'rfi' and '{{rfi|ja}}' or nil)
if a == 'rfi' then a = nil end
wr(a and (' .. '|thumb|right' .. (a and '|' .. a or '').. ']]') or nil)
wr(a)
if a then
wrh('Alternative forms')
wr(table.concat(generate_links_list(a), '\n'))
end
else
wrh('Japanese')
hl(1)
wr(tab and tab or nil)
wr(a and ('{{wikipedia|lang=ja' .. (a == '' and '' or '|' .. a) .. '}}') or nil)
wr(a and ('{{wikipedia|lang=en|' .. a .. '}}') or nil)
wr(a == 'rfi' and '{{rfi|ja}}' or nil)
if a == 'rfi' then a = nil end
wr(a and (' .. '|thumb|right' .. (a and '|' .. a or '').. ']]') or nil)
if a then
wrh('Alternative forms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Etymology')
wr(a)
end
end
if sc_Hani:countCharacters(pagename) > 0 and not a and not a then
-- if title has kanji and not given pos or def
-- write ] by combining title and reading
if a then
wrh('Definitions')
end
pagename_to_kana = pagename
for char in itermatch(replace(replace(a, '', ''), '+', ''), '+') do
pagename_to_kana = replace(pagename_to_kana, '', char, 1)
end
wr('{{ja-see|' .. pagename_to_kana .. '}}')
elseif a and a ~= '' and not find(a, '') then
-- if pos is non-latin
-- write ] using that text
if a then
wrh('Definitions')
end
wr('{{ja-see|' .. table.concat(a, '|') .. '}}')
else
-- write ]
if not a then
-- 1DJR,2,3-
-- 0DJR NHK
local pron_params = {}
if kana_no_hyphens ~= '' or needs_reading(pagename, kana) then
table.insert(pron_params, kana_no_hyphens)
end
if a then
for i, acc_item in ipairs(split(a, '')) do
local acc, acc_ref = match(acc_item, '^(%d+)(.*)$')
acc_ref = replace(acc_ref, ' ', ',')
acc_ref = string.upper(acc_ref)
if acc_ref == '' then
acc_ref = 'DJR'
elseif acc_ref == '-' then
acc_ref = nil
end
if acc_ref and not a then
a = true
end
table.insert(pron_params, 'acc' .. (i > 1 and i or '') .. '=' .. acc)
table.insert(pron_params, acc_ref and 'acc' .. (i > 1 and i or '') .. '_ref=' .. acc_ref or nil)
table.insert(pron_params, a and 'dev=' .. a or nil)
end
end
wrh('Pronunciation')
wr('{{ja-pron' .. (#pron_params > 0 and '|' .. table.concat(pron_params, '|') or '') .. '}}')
end
-- if no pos or def parameters
-- then generate a default Noun and ]
if a == 0 then
a = {''}
a = 1
end
i = 1
while i <= a do
-- 犬 <empty string>
-- 犬 n
-- 赤い a
-- 赤い a,i
-- 明らか a,na
-- 画然 a,tari
-- 異常 an
-- 食べる v,2
local pos_code
local defs
local type, infl
pos_code = a or ''
defs = {
a or '{{rfdef|ja}}',
a or '{{rfdef|ja}}',
}
-- shortcut for {{lb}}
--
defs = replace(defs, '^%]+)%]', function(labels) return '{{lb|ja|' .. replace(labels, ',', '|') .. '}}' end)
defs = replace(defs, '# %]+)%]', function(labels) return '# {{lb|ja|' .. replace(labels, ',', '|') .. '}}' end)
defs = replace(defs, '^%]+)%]', function(labels) return '{{lb|ja|' .. replace(labels, ',', '|') .. '}}' end)
defs = replace(defs, '# %]+)%]', function(labels) return '# {{lb|ja|' .. replace(labels, ',', '|') .. '}}' end)
local match_a, match_b = match(pos_code, '^(.+),(.+)$')
if match_a then pos_code, type = match_a, match_b end
pos_code = pos_aliases or pos_code
if pos_code == 'v' and verb_types_corresp then
type = verb_types_corresp
end
-- default type
if not type then
if pos_code == 'an' then
type = 'na'
elseif pos_code == 'v' then
type = '1'
elseif pos_code == 'a' then
type = 'i'
end
end
-- adjectives use infl
if pos_code == 'an' or pos_code == 'a' then
infl = type
type = nil
end
-- suffixes are uhhh. uh. fuck it it's all 活用 anyway
if pos_code == 'suffix' then
if type == 'i' then
infl = type
type = nil
end
end
-- get data
local pos_data = pos_datas
-- create fallback data
pos_data = pos_data or {
{
= replace(pos_code, '^.', mw.ustring.upper),
= 'pos|' .. pos_code,
},
}
-- write header, etc
for ii, name_and_template in ipairs(pos_data) do
local is_a_or_v = false
name = name_and_template
template = name_and_template
is_a_or_v = match(template, '^adj') or match(template, '^verb') or match(template, '^pos|suffix') -- prevent inappropriate addition of parameters (`noun|infl=na`)
if (sc_Hani:countCharacters(pagename) > 0) then
template = template .. (a and '|head=' .. a or '')
template = template .. (kana ~= '' and '|' .. kana or '')
else
template = template .. (a and '|' .. a or (kana ~= '' and '|' .. kana or ''))
end
template = template .. (a and '|' .. replace(waapuro_to_kana(a), ',', '|') or '')
if is_a_or_v then
template = template .. (a and '|tr=' .. a or '')
template = template .. (type and '|type=' .. type or '')
template = template .. (infl and '|infl=' .. infl or '')
end
template = template .. (a and '|count=' .. a or '')
template = template .. (a and '|kyu=' .. a or '')
template = template .. (a and '|hhira=' .. waapuro_to_kana(a) or '')
wrh(name)
wr('{{ja-' .. template .. '}}')
wr('')
wr('# ' .. defs)
if is_a_or_v then
local kana_stem = sub(kana ~= '' and kana or pagename, 0, -2)
kana_stem = (kana == '' and kana or kana_stem) -- the templates will be smart if you do not give it a reading
local kana_last = sub(kana ~= '' and kana or pagename, -1)
kana_last = lang_ja:transliterate(kana_last, sc_Hrkt)
if type or pos_code == 's' or pos_code == 'suru' then
hl(1)
wrh('Conjugation')
if type == '2' then
wr('{{ja-ichi' .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
elseif pos_code == 's'or pos_code == 'suru' then
wr('{{ja-suru' .. (kana ~= '' and '|' .. kana or '') .. '}}')
elseif type == '1' then
wr('{{ja-go-' .. kana_last .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
else
wr('{{rfinfl|ja}}')
end
hl(-1)
end
if infl then
hl(1)
wrh('Inflection')
if infl == 'na' then
wr('{{ja-' .. infl .. (kana ~= '' and '|' .. kana or '') .. '}}')
elseif infl == 'shiku' or infl == 'ku' then
-- ja-conj-bungo will not be smart
wr('{{ja-conj-bungo|' .. sub(kana ~= '' and kana or pagename, 0, -2) .. '|adj-' .. infl .. '}}')
elseif infl == 'i' then
wr('{{ja-' .. infl .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}')
else
wr('{{rfinfl|ja}}')
end
hl(-1)
end
end
if i == 1 and ii == 1 then
hl(1)
if a then
wrh('Usage notes')
wr('* ' .. a)
end
if a then
wrh('Synonyms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Antonyms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Hypernyms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Hyponyms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Coordinate terms')
wr('*' .. replace(table.concat(generate_links_list(a), ','), '%*', ''))
end
if a then
wrh('Derived terms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Related terms')
wr(table.concat(generate_links_list(a), '\n'))
end
if a then
wrh('Descendants')
wr('* ' .. a)
end
if a then
wrh('See also')
wr(table.concat(generate_links_list(a), '\n'))
end
hl(-1)
end
end
-- advance i by the number of PoS headers produced
i = i + 1 + (#pos_data)
end
end
if (a or (a and find(a, 'UNIQ.-QINU'))) and (a ~= 'n') and (not a) then
if a then
hl(-1)
end
wrh('References')
wr('<references />')
end
if a or a then
wr('')
if a then
a = replace(a, '^.', mw.ustring.upper)
a = replace(a, '(,)(.)', function(a, b) return a .. mw.ustring.upper(b) end)
wr('{{C|ja|' .. replace(a, '', '|') .. '}}')
end
if a then
wr('{{cln|ja|' .. replace(a, '', '|') .. '}}')
end
end
output = table.concat(output, '\n')
-- html comments
-- real html comments are discarded before the module ever knows
output = replace(output, '<%-%-', '<!--')
-- trim: otherwise `Etymology n` headers create a leading newline
return trim(output)
end
function export.newhz(frame)
local m_zh = require("Module:zh")
local m_zh_new = require("Module:zh-new")
local a = frame.args
local character = mw.title.getCurrentTitle().text
local en = tonumber(a) or 0 -- etym_number
local hl = 3 -- header_level
local function head(text) return '\n' .. mw.ustring.rep('=', hl) .. text .. mw.ustring.rep('=', hl) end
local function hcr(a) return (a > 0 and hl + 1 or hl - 1) end -- header_{in|de}crement
local content = mw.title.new(character):getContent() or ''
local simp_form = a or match(content, '|sim=(.)') or match(content, '|s=(.)') or false
--local trad_form = a or match(content, '|tra=(.)') or match(content, '|t=(.)') or false
local alt_form = a or a or match(content, '|alt=(+)') or false
local zhwp_data = require('Module:User:Suzukaze-c/02/zhwp')
-- ----
local add_han_etym = false
local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz
if en <= 1 then
if find(candidates, character) or require("module:zh-glyph/phonetic/list") then
add_han_etym = true
end
end
local zh_see
if not a then
if trad_form or m_zh.ts_determ(character) == 'simp' then
zh_see = (trad_form or m_zh.st(character))
end
end
if a and match(a, '^$') then
zh_see = a .. (a and '|' .. a or '')
end
local zh_forms = ''
if simp_form or m_zh.ts_determ(character) == 'trad' then
zh_forms = zh_forms .. '|s=' .. (simp_form or m_zh.ts(character))
end
if alt_form then
zh_forms = zh_forms .. '|alt=' .. alt_form
end
local zh_wp
if a then
if a == 'y' then
zh_wp = ''
elseif a == '' then
zh_wp = false
else
zh_wp = '|' .. replace(a, ',', '|')
end
end
if zhwp_data.single_char_title or zhwp_data.contains_astral or zhwp_data.single_char_title then
zh_wp = ''
end
-- ----
local output = {}
local function write(text) table.insert(output, text) end
-- ----
if en <= 1 then
write('==Chinese==')
end
if en == 0 then
if not zh_see then write('{{zh-forms' .. zh_forms .. '}}') end
if zh_wp then
write('{{zh-wp' .. zh_wp .. '}}')
end
end
if a or add_han_etym then
write(head('Glyph origin'))
if add_han_etym then write('{{Han etym}}') end
if a then write(a) end
if zh_see and en == 0 then
write(head('Definitions'))
end
end
if en > 0 then
write(head('Etymology ' .. tostring(en)))
if not zh_see then write('{{zh-forms' .. zh_forms .. '}}') end
if zh_wp then
write('{{zh-wp' .. zh_wp .. '}}')
end
end
if en > 0 then
hl = hcr(1)
end
if zh_see then
write('{{zh-see|' .. zh_see .. '}}')
else
write(head('Pronunciation'))
local m, c, h, mn, w = a or false, a or false, a or false, a or false, a or false
local mc, oc = a or false, a or false
if m and find(m, '') then
m = replace(m, '', '@')
m = replace(m, '@+', '@')
m = replace(m, '^@+', '')
m = replace(m, '@+$', '')
m = replace(m, '@', ',')
m = require("module:cmn-pron").zhuyin_py(m)
end
if m and find(m, '') then
m = replace(m, '1', '̄')
m = replace(m, '2', '́')
m = replace(m, '3', '̌')
m = replace(m, '4', '̀')
m = replace(m, 'v', 'ü')
end
if not m then
if require("module:zh/data/cmn-tag").MT then -- if there is cmn-tag data
m = character
else
m = mw.ustring.gsub(m_zh_new.pytemp(character,'','',''), ',', ', ') or false -- based on line from zh-new
if m == character or m == simp_form or m == m_zh.ts(character) then
-- pinyin conversion failed
m = false
end
end
end
if not c then
c = m_zh.check_pron(character, 'yue', 1) or false
if c and find(c, ',') then a = 'yes' end
end
if h and find(h, '') then
h = replace(h, 'w', 'ṳ')
h = replace(h, '24', '̂')
h = replace(h, '11', '̀')
h = replace(h, '31', '́')
h = replace(h, '55', '')
h = replace(h, '2', '')
h = replace(h, '5', '̍')
end
if not h then
h = m_zh.check_pron(character, 'hak', 1) or false
end
if not mn then
mn = m_zh.check_pron(character, 'nan-hbl', 1) or false
end
if w and find(w, '') then
w = export.test_3(w)
end
if (not mc) and (mw.title.new('Module:zh/data/ltc-pron/' .. character).exists) then
mc = 'y'
end
if (not oc) and (mw.title.new('Module:zh/data/och-pron-BS/' .. character).exists or mw.title.new('Module:zh/data/och-pron-ZS/' .. character).exists) then
oc = 'y'
end
if not m then write('<!--') end
write('{{zh-pron')
if m then write('|m=' .. m .. (a and ',er=y' or '')) else write('|m=') end
if (a or a) then write('|m-s=' .. (a or a)) end
if c then write('|c=' .. c) end
if (a or a) then write('|c-t=' .. (a or a)) end
if a then write('|g=' .. a) end
if h then write('|h=pfs=' .. h) end
if a then write('|j=' .. a) end
if a then write('|md=' .. a) end
if mn then write('|mn=' .. mn) end
if (a or a) then write('|mn_note=' .. (a or a)) end
if (a or a) then write('|mn-t=' .. (a or a)) end
if (a or a) then write('|mn-t_note=' .. (a or a)) end
if w then write('|w=' .. w) end
if a then write('|x=' .. a) end
if mc or oc then
write('|mc=' .. (mc or ''))
write('|oc=' .. (oc or ''))
end
if a then write('|ma=' .. a) end
write('|cat=' .. (a or ''))
write('}}')
if not m then write('-->') end
if not m then write('{{rfp|cmn|Mandarin}}') end
write(head('Definitions'))
write('{{head|zh|hanzi}}')
write('')
if a then
write('# {{lb|zh|Taiwan}} {{n-g|Only used in personal names.}}')
else
write('# ' .. (a or '{{rfdef|zh}}'))
end
if not a then -- Lua error: not enough memory
local der = ''
local der_add = ''
if a then
der_add = a
der_add = replace(der_add, '+', '|') -- any non-hanzi text becomes separator
der_add = replace(der_add, '|+', '|')
der_add = replace(der_add, '^|', '')
der_add = replace(der_add, '|$', '')
der_add = '|' .. der_add
end
if match(character, '') then
for title, _ in pairs(zhwp_data.contains_astral) do
if len(title) > 1 and match(title, character) then
der_add = der_add .. '|' .. title
end
end
end
der = frame:preprocess('{{subst:zh-der/new' .. (der_add or '') .. ((m and not match(m, ',')) and '|p=' .. mw.ustring.toNFC(m) or '') .. '}}')
if match(der, 'memory') then
write(head('Compounds'))
write('{{su#bst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', '') or '') .. '}}') -- let you add zh-der in a separate edit in case Lua returns "out of memory"
elseif match(der, '') then
write(head('Compounds'))
write(der)
end
end
if a then
write(head('See also'))
write('* {{zh-l|' .. a .. '}}')
end
end
if en > 0 then
hl = hcr(-1)
end
if a or a or a or a or a or a then
write(head('References'))
if a then write('* {{R:twedu|' .. a .. '}}') end
if a then write('* {{R:yue:mfccd}}') end
if a then write('* {{R:yue:jyut.net}}') end
if a then write('* {{R:yue:Jyutping Database}}') end
if a then write('* {{R:zh:CNS|' .. replace(a, '%-', '|') .. '}}') end
if a then
if find(a, '%d') then
write('* {{R:nan:thcwd|' .. a .. '}}')
else
write('* {{R:nan:thcwdq}}')
end
end
end
return trim(table.concat(output, '\n'))
end
function export.newhzmul(frame)
local text = frame.args
local char = mw.title.getCurrentTitle().text
local x = mw.title.new(char):getContent() or ''
local model = trim([==[
{{character info}}
==Translingual==
===Han character===
{{Han char|rn=$rs1|rad=$rad|as=$rs2|sn=$TotalStrokes|four=$FourCornerCode$four|canj=$Cangjie$canj|ids=$ids}}
# $Definition
====References====
{{Han ref|kx=$IRGKangXi|dkj=$IRGDaiKanwaZiten|dj=$IRGDaeJaweon|hdz=$IRGHanyuDaZidian|uh=$hex}}
]==])
local corr = {'一','丨','丶','丿','乙','亅','二','亠','人','儿','入','八','冂','冖','冫','几','凵','刀','力','勹','匕','匚','匸','十','卜','卩','厂','厶','又','口','囗','土','士','夂','夊','夕','大','女','子','宀','寸','小','尢','尸','屮','山','巛','工','己','巾','干','幺','广','廴','廾','弋','弓','彐','彡','彳','心','戈','戶','手','支','攴','文','斗','斤','方','无','日','曰','月','木','欠','止','歹','殳','毋','比','毛','氏','气','水','火','爪','父','爻','爿','片','牙','牛','犬','玄','玉','瓜','瓦','甘','生','用','田','疋','疒','癶','白','皮','皿','目','矛','矢','石','示','禸','禾','穴','立','竹','米','糸','缶','网','羊','羽','老','而','耒','耳','聿','肉','臣','自','至','臼','舌','舛','舟','艮','色','艸','虍','虫','血','行','衣','襾','見','角','言','谷','豆','豕','豸','貝','赤','走','足','身','車','辛','辰','辵','邑','酉','釆','里','金','長','門','阜','隶','隹','雨','靑','非','面','革','韋','韭','音','頁','風','飛','食','首','香','馬','骨','高','髟','鬥','鬯','鬲','鬼','魚','鳥','鹵','鹿','麥','麻','黃','黍','黑','黹','黽','鼎','鼓','鼠','鼻','齊','齒','龍','龜','龠'}
local corr_s = {='讠',='门',='饣',='飞',='马',='见',='贝',='纟',='车',='长',='韦',='风',='钅',='鸟',='龙',='页',='齐',='麦',='龟',='鱼',='黾',='齿',='卤'}
-- from text
local targets = {'RSUnicode','TotalStrokes','FourCornerCode','Cangjie','IRGKangXi','IRGDaiKanwaZiten','IRGDaeJaweon','IRGHanyuDaZidian','Definition'}
for _, property in ipairs(targets) do
local value = trim(match(text, 'k'..property..'%s+(+)') or '')
mw.log(property .. '|' .. value)
if property == 'RSUnicode' then
value = replace(value, ' .+', '') -- 龽
value = split(value, '%.')
model = replace(model, '$rs1', value)
model = replace(model, '$rs2', value)
elseif property == 'Definition' then
if value == '' or find(frame.args, 'x') then
model = replace(model, '# $Definition\n\n', '')
else
value = replace(value, ';', '\n#')
model = replace(model, '$Definition', value)
end
else
model = replace(model, '$'..property, value)
end
if property == 'FourCornerCode' and value ~= '' then
model = replace(model, '$four', '')
end
if property == 'Cangjie' and value ~= '' then
model = replace(model, '$canj', '')
end
end
-- read from existing page or manually provided
local ex = {'ids','four','canj'}
for _, property in ipairs(ex) do
model = replace(model, '$'..property, match(x, '|'..property..'=(+)') or frame.args or '')
end
-- |rad=
model = replace(model, '(|rn=)(%d+)(\'?)(|rad=)($rad)(|)', function(a,b,c,d,e,f)
local z = corr
return a .. b .. d .. (c == '\'' and corr_s or z) .. f
end
)
-- remove empty dict fields
local template_ref_fields = {'kx','dkj','dj','hdz'}
for _, property in ipairs(template_ref_fields) do
model = replace(model, '|'..property..'=|', '|')
end
model = replace(model, '$hex', match(text, 'Unihan data for U.(%x+)'))
return model
end
function export.test_2(frame)
local search_base = 'https://en.wiktionary.orghttps://en.wiktionary.org/w/index.php?title=Special%3ASearch&profile=default&fulltext=Search&search='
return ')), 'PATH') .. ' ' .. trim(frame.args or frame.args) .. ']'
end
function export.test_3(text)
if type(text) == 'table' then text = text.args end
local syllable, tone = match(text, '(.+)()')
local voiced = false
if match(syllable, "^") or match(syllable, "^m") or match(syllable, "jj") or match(syllable, "xx") or match(syllable, "hh") then
voiced = true
end
if match(syllable, '^+i') then
syllable = replace(syllable, '^+', { ='j', ='j', ='q', ='q', ='jj', ='x', ='x', ='xx', ='xx' } )
end
syllable = replace(syllable, 'h$', 'q')
-- too lazy for vowels
if tone == 'P' and voiced then
tone = '3'
elseif tone == 'P' and not voiced then
tone = '1'
elseif tone == 'S' and voiced then
tone = '3'
elseif tone == 'S' and not voiced then
tone = '2'
elseif tone == 'Q' and voiced then
tone = '3'
elseif tone == 'Q' and not voiced then
tone = '2'
elseif tone == 'R' and voiced then
tone = '5'
elseif tone == 'R' and not voiced then
tone = '4'
end
return tone .. syllable
end
function export.test_9()
return '{{attn|ltc}}{{attn|och|Middle+Old Chinese needs to be distributed}}'
end
function export.test_10(content, target_header, pagename)
local section = 0
for header in itermatch(content, '==+(+)==+\n') do
section = section + 1
if header == target_header then
break
end
end
return tostring(mw.uri.canonicalUrl(pagename,'action=edit§ion=' .. section))
end
function export.test_11()
local ciout = {}
local ziout = {}
local levels = {'Beginning','Elementary','Intermediate','Advanced'}
levels = {'Elementary','Intermediate'}
for _, level in ipairs(levels) do
table.insert(ciout, '*' .. level .. '\n**')
table.insert(ziout, '*' .. level .. '\n**')
local apcontent = mw.title.new('Appendix:HSK list of Mandarin words/' .. level .. ' Mandarin'):getContent()
apcontent = replace(apcontent, '{{l|cmn|(+)|tr={{l|cmn|(+)}}', '%2')
apcontent = replace(apcontent, '{{zh.l|(+)/(+)|', '%1')
apcontent = replace(apcontent, '{{zh.l|(+)/(+)/(+)|', '%1+%2')
apcontent = replace(apcontent, '%]+)%]%] %(%]+)%]%],', '%2')
apcontent = replace(apcontent, 'is called a +', '')
for ci in itermatch(apcontent, '+') do
local cicontent = mw.title.new(ci):getContent() or ''
local ok = match(cicontent, 'zh%-pron')
local url = export.test_10(cicontent, 'Chinese', ci)
if not find(cicontent, level) then
table.insert(len(ci)==1 and ziout or ciout,
(ok and '' or '<mark>')
..
', '
..
(ok and '' or '</mark>')
)
end
end
table.insert(ciout, '\n')
table.insert(ziout, '\n')
end
return table.concat(ciout, '')..table.concat(ziout, '')
end
function export.test_12(frame)
local out = {}
local kj = {
= '亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去',
= '巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催',
= '塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇勺尺借酌釈爵若弱寂手主守朱取狩首殊珠酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱',
= '尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂錘随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭銑潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝脹貼超腸跳徴嘲潮澄調聴懲直',
= '勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府怖阜附訃負',
= '赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返変偏遍編弁辛便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲耗猛網目黙門紋問匁冶夜野弥厄役約訳薬躍闇由油喩愉諭輸',
= '癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕',
}
local x = split((frame.args and frame.args or kj)]), '')
for i, hz in ipairs(x) do
local content = mw.title.new(hz):getContent() or ''
local jar_ins = match(content, '{{ja%-readings(+)}}')
local jar_new = mw.getCurrentFrame():preprocess('{{#invoke:User:Suzukaze-c/02|test_13' .. jar_ins .. '}}')
local jar_old = '{{ja-readings' .. jar_ins .. '}}'
local ok = (jar_old == jar_new)
mw.log('__' .. jar_old .. '__\n__' .. jar_new .. '__\n\n\n')
local url = export.test_10(content, 'Readings', hz)
if not ok then
table.insert(out, '')
end
end
return '<span lang="ja" class="plainlinks">' .. table.concat(out, '') .. '</span>'
end
function export.test_13(frame)
local a = frame.args
local hz = mw.title.getCurrentTitle().text
local yomi = mw.loadData('Module:ja/data/jouyou-yomi').yomi
local out = {}
local order = {'goon', 'kanon', 'toon', 'kanyoon', 'soon', 'on', 'kun', 'nanori'}
local processed = {}
local missing = {}
for type, text in pairs(a) do
text = replace(text, '%s*%(%)', '')
text = replace(text, '{{non%-joyo%-reading}}%s*', '')
text = replace(text, '%s*{{q*|non%-%%] reading}}', '')
text = replace(text, ',%s*{{q*|historical}}', '<')
text = replace(text, '%+)%]%]', '')
text = replace(text, '{{+(' .. hz .. '+)}}', '')
text = replace(text, '%|]+)|(|]+)%]%]', function(a,b)
return replace(b, '%.', '-')
end
)
text = replace(text, '()%.()', '@')
text = replace(text, '+', '@')
text = replace(text, '(@*)(@*)', '<') -- or something. also, the > is intentional (some entries indeed have backwards arrows)
text = replace(text, '^@', '')
text = replace(text, '@$', '')
text = replace(text, '@', ', ')
if type ~= 'kun' then
text = replace(text, '%-', '')
end
processed = text
end
table.insert(out, '{{ja-readings')
for _, type in ipairs(order) do
if processed then
table.insert(out, '|'..type..'='..processed)
end
end
table.insert(out, '}}')
out = table.concat(out, '\n')
if yomi then
for reading, type in pairs(yomi) do
reading = require('Module:string utilities').pattern_escape(reading)
if type == 1 or type == 3 then
reading = require('Module:ja').kata_to_hira(reading)
end
if not (
find(out, '' .. reading .. '')
--[[
or
find(out, '=' .. reading .. '')
or
find(out, '' .. reading .. '\n')
or
find(out, '=' .. reading .. '\n')
]]
) then
table.insert(missing, reading)
end
end
end
missing = table.concat(missing, ', ') -- hastemplate:ja-readings insource:/\{\{ja-readings+\}\}/
return out .. replace(missing, '%%', '')
end
function export.test_14(frame)
local text = frame.args
local entries = export.test_18(text)
local x = {}
table.insert(x, '{| class="wikitable sortable"')
table.insert(x, '! - || -')
for entry in entries do
local content = mw.title.new(entry):getContent() or ''
local attention = match(content, '{{attention|(+)}}') or match(content, '{{attn|(+)}}') or '<mark>?</mark>'
local line = '!]\n|'..replace(attention, '|', '¦')
table.insert(x, line)
end
table.insert(x, '|}')
return table.concat(x, '\n|-\n')
end
-- ]
function export.make_map(data)
local frame = mw.getCurrentFrame()
local to_json = require("Module:Json").jsonValueFromValue
local mapframe = frame:extensionTag({
name = "mapframe",
content = to_json(data),
args = {
width = 1000,
height = 800,
zoom = 4,
latitude = 30,
longitude = 105,
frameless = true,
align = "center",
}
})
return mapframe
end
function export.map_test_data()
local data = {}
table.insert(data, {
type = "Feature",
geometry = {
type = "Point",
coordinates = { 114, 23 },
},
properties = {
title = "Kwangtung",
fill = "#07c63e",
}
})
return data
end
function export.test_16(frame)
local a1 = split('豆斧魚鋸晶狼箆泌達目劣属主履絛症狭獺香沿用草除鴻章湿膚新神宙浮欽抵参六販酪猿酢西狐艦鷴蜃鶺老緑兼星鹿楔腸鼠匹殉嬢法十拳娘奴卵脱円塾私烏鳥僚漢絆嘘豚刈俺親編抗貴二酔海体以抒友傘合相大總总訣鬪鮫女岩牛刀泉水竜鞭雷羊茸獣心妃霧敵王亀青蠍蛇蝮花鏡嘴口茨冠鎌鎖觜喙鰐森石屍沌翼蕉芭懽歓蝗嵐腕紙狗猪莨烟蜱曳苣萵祥欝蝨虱飯障坂蝶鋲癬鸛燕糞鬆癇癲鼈託鯰徳德商料煙接嚔府戯叔蚣蛺椒胡颪易貿鵆鴟鵂鶹鵩炬鞋藁沓誕曼鰰鰤鳶虫兜澄黽飴哥蟇雛斑樺那支藍奇蹄駒楼嫁徨彷喰郭囃炎蘇蒻重菎蒟酒麦弾陸光組犀過僕氈蕈単婦婚複貝鵡鸚樹台燭辞指臨', '')
local a2 = split('再村烧燒誓尸変縄鵜国公後午爵氷弟家篇餅蝿帳几弁鵲鰌泥哈土艮蛍勝君長集現貘魄刃葉子姫隼妻手敬莓苺肩爺致鯢束約工夢獏打奕語粤朏湾之詰湖瓶死隕蛭産揚寺楽雅素痛頭前度知風邪売卸箱清魂準標鍵無軸枢団教宗術美聖人成鶚身化牝雌酸硫亜亞帽高雉梟客刺義超鋼羽羆狒猴剣廻輪彦車灯提赤明名南北東田陽林川悪空時鯨血地年天闇黒白熊蛙狸鶏小猫犬金木月日男九八七五三一四秋冬徴次魔髄割先氏彼流寓箸夫丈母婆父姉兄農乳然万千百廿仮分至鯱巨沖洞唇鶩半点濁諺薬缶原雨畳誌雑瞳溶狹陿囘回徊黥史樋秩鵐潤由考族傷聡左右彙猬蝟悁捐涓貫慣溝鼷妹強味棒閏鶉鰯似税審会幻喃哪劫永野干瓜蕃舘古醤声柔粥蝋折栓蛾想精椰燃謎密塔饗歌状堅塁蚤旧鴫鷸戌納紐祕躄叭錐顏繪拉費琰未島山沼癸壬辛庚己甲丙丁乙戊卯亥酉申寅辰丑巳虎兎上虜良遍磯秒育黍雁擂鈴杏醋已蛼靨燧梨刻彫炉艫鶴馭幼蛬蠅鮭蓄碇錨釈粒肝恐凰鯧俎社魁雀称染聯隊菱斗灸可眠宅梢柘嚢異椅鎮都裟袈覧拇鵥痘鵄瀝歴紫昇欱規楓顕掃冷尊診淫卓蜂穿館駆文也眉髟豬貍第鯛講蝌訓鮮雹驎騏洋疑吃符嘆茘函鷽弗鏨鑢資捕睦勒隷堕黴痢痳淋争菫疱甚瞬鰹鱓魳魣克簀葭葦瀬吉好渡谷橘篠貞立迎浦桂諫崎臘真鶸鵟鸇鶻鷲鷹挑燈條鳧鴇鴨康鴦鴛昏郎責力張旦面睛螢翆翠翡爆錦鉛沃銀砒銅鉄珪硼窒枘坊裴齣湯銭形罔魎魍莫厨雎龍侍観孫殺聚又鈷槃涅戈戟鋒鉾矛刹祠叢試模肛惚繭摸韆鞦計鍛盗姥瘴儡傀珀琥珊桔穴師薩菩乎愚舵梶牽織袍褞歇浴倶夏春摩獾房俗腱帯派鵝鵞選豹哲橽芃隈余積差和被憲拡卿朕黙薇炭紀識僧方腹駝駱改導綱消羚得鱇鮟瑁瑇玳澹憺啗亶膽耽鄲丹擔戴貸帶詒逮隶瑳磋忖焠淬倅毳萃衰竄趣徂粗從聰朿賜此辭蜯倍堋啜脣淳純捶吹牀撞窻喘椽傳俶觸黜畜杵處芻廚出臭讎愁疇綢稠抽寵崇蟲衝憃舂熾斥翅齒侈哆恥弛馳踟池遲坻螭魑逞騁乘丞承盛呈懲稱瞠晨忱瞋唱悵鬯倡敞裳嘗諂單纏躔蝉廛豺儕察槎插曾測廁册曹操藏倉粲蠶殘慙驂菜蔡彩采簿卜補哺播簸跛薄帛撥番枋並秉稟禀冰表變諞卞辯釆貶邊畢必蹕庇璧蔽臂閉嬖斃幣敝婢辟鄙逼崩奔悖孛杯卑碑陂報瀑抱暴保寶褒胞傍蚌邦辦阪頒拜稗敗霸罷茇奧澳媼熬岸案按鞍艾愛隘埃榜卬端橋轤轆置引普靫犯詞済角震免態毒剖科博振本俄棍柴倫鯆甸勃剌伯賓律匈威抹班瑞坡墺印爾阿陀蘭転濯洗錬副領統軟背反頂両放側擦裂拗便宝磨蝎秤処獅牡蜍蟾駃夬是騠騾驢縞袋蒼鷺鵠紋聞瓦運線幹螟鴿鳩恋量探連宇針荒鵯栗蟀蟋螽斯蝣蜉津蛉蜻鵇禿鳴啼麋双娑訳叉夜羅弥冉耶瓊呂芸薙迦智軻弉伊佐翁利追了最始開欲韓制造創速薔続喚融通式備果能召謀暗壁鬼鉱罠行飛蛛蜘蝓蛞殿民貂兔者忍麟猟馬門漠蟹築射儀秘雪説策類爬昆使座耗落洒駄書中怪繁妖樂市加寿闍毘比餡巴奈独注縦横紅獱帥', '')
local x = {}
for i, hz in ipairs(a1) do
local content = mw.title.new(hz):getContent() or ''
local r = match(content, '|kun=(+)')
if r then
local rsplit = split(r, ', *')
local rnew = {}
for _, y in ipairs(rsplit) do
if match(y, '%-$') then
table.insert(rnew, y)
end
end
if #rnew > 0 then
r = '{{ja-readings|kun=' .. table.concat(rnew, ', ') .. '|pagename=' .. hz .. '}}'
table.insert(x, '\n==]==\n' .. frame:preprocess(r))
end
else
mw.log(hz)
end
end
return table.concat(x, ' ')
end
function export.test_17()
local data = mw.loadData('Module:ja/data/jouyou-yomi').yomi
local ret = {
'<mark>red link</mark>',
'<del>]</del>',
'<u>missing ja-kanjitab alt or ja-see</u>',
'<i>reading not found on kanji-spelling entry</i>',
'<b>ja-see to kanji on on kanji-spelling entry</b>',
}
for z, ys in pairs(data) do
local t = ''
for y, type in pairs(ys) do
if type == 1 or type == 3 then
-- TODO
elseif type == 4 then
-- rare kun'yomi. do not do anything for now.
elseif type == 2 then
local yomiG, okuriG = string.match(y, '^(.+)%-(.*)$')
local o = string.format('%s%s', z, okuriG)
local oRubied = string.format('<ruby>%s<rt>%s</rt></ruby>%s', z, yomiG, okuriG)
local yStripped = string.gsub(y, '%-', '')
local link = string.format(']<sub>]</sub>', o, oRubied, yStripped)
local oC = mw.title.new(o):getContent()
local yC = mw.title.new(yStripped):getContent()
if not oC or not yC then
link = '<mark>' .. link .. '</mark>'
else
oC = string.gsub(oC, '%{%{ja%-readings+%}%}', '')
local oFound = string.match(oC, '%{%{ja%-see%|' .. yStripped)
local oFoundSee = string.match(oC, '%{%{ja%-see%|(*)')
local yFound = string.match(yC, '%{%{ja%-kanjitab%|*' .. o)
local oFoundReading = string.match(string.gsub(oC, '', ''), yStripped)
if oFound and yFound then
link = '<del>' .. link .. '</del>'
elseif oFound or yFound then
link = '<u>' .. link .. '</u>'
end
if not oFoundReading then
link = '<i>' .. link .. '</i>'
end
if oFoundSee and match(oFoundSee, '') then
link = link .. '<b><sup>' .. oFoundSee .. '<sup></b>'
end
end
t = t .. link
end
end
table.insert(ret, t)
end
return table.concat(ret) .. require("Module:TemplateStyles")("Template:sandbox/styles.css")
end
function export.test_18(text)
local entries = {}
--[[
text = replace(text, ".+are in this category, out of %d+ total%.%s+", "")
text = replace(text, "\nCategory:.+", "")
for entry in itermatch(text, " +(+)") do
table.insert(entries, entry)
end
]]
for entry in itermatch(text, ' (+)') do
table.insert(entries, entry)
end
return entries
end
function export.test_19(frame)
local text = frame.args
local outZhSee = {}
local outZhForms = {}
local entries = export.test_18(text)
for _, entry in ipairs(entries) do
local content = mw.title.new(entry):getContent() or ''
local zhSees = {}
local zhFormss = {}
for template in itermatch(content, "(%{%{zh%-see*%}%})") do
template = mw.getCurrentFrame():preprocess(template)
table.insert(zhSees, template)
end
for template in itermatch(content, "(%{%{zh%-forms*%}%})") do
template = replace(template, '%}%}', '|t=' .. entry .. '}}')
template = mw.getCurrentFrame():preprocess(template)
table.insert(zhFormss, template)
end
if #zhSees ~= 0 then
table.insert(outZhSee, "= =\n")
for _, template in ipairs(zhSees) do
table.insert(outZhSee, template .. "\n")
end
end
if #zhFormss ~= 0 then
table.insert(outZhForms, "= =\n")
for _, template in ipairs(zhFormss) do
table.insert(outZhForms, template .. "\n")
end
table.insert(outZhForms, "<div style='clear:both'></div>\n")
end
end
return table.concat(outZhSee) .. "\n" .. table.concat(outZhForms)
end
return export