Dokumentationen för denna modul kan skapas på Modul:ja/dok /test
local export = {}
-- note that arrays loaded by mw.loadData cannot be directly used by mw.ustring.gsub
local data = mw.loadData("Module:ja/data")
export.data = {
joyo_kanji = data.joyo_kanji,
jinmeiyo_kanji = data.jinmeiyo_kanji,
grade1 = data.grade1,
grade2 = data.grade2,
grade3 = data.grade3,
grade4 = data.grade4,
grade5 = data.grade5,
grade6 = data.grade6
}
function export.romaji_dediacritics(text)
if type(text) == "table" then
text = text.args
end
return (mw.ustring.gsub(text, '.', function (char) return data.rd or char end))
end
function export.hira_to_kata(text)
if type(text) == "table" then
text = text.args
end
return (mw.ustring.gsub(text, '.', function (char) return data.hk or char end))
end
function export.kata_to_hira(text)
if type(text) == "table" then
text = text.args
end
return (mw.ustring.gsub(text, '.', function (char) return data.kh or char end))
end
function export.kana_to_romaji(text, nodiacr, keepdot)
if type(text) == "table" then -- assume a frame.
text, nodiacr = text.args, text.args
end
-- local kr_minus_period = data.kr
-- kr_minus_period = "。"
-- convert Japanese spaces to western spaces
text = mw.ustring.gsub(text, ' ', ' ')
text = mw.ustring.gsub(text, 'っ%%', 'っ')
text = mw.ustring.gsub(text, 'ッ%%', 'ッ')
-- if there is a は separated by halfwidth spaces, romanize it as " wa "
text = mw.ustring.gsub(text, ' は ', ' wa ')
-- also if it follows a space and is the last character, e.g. それでは
text = mw.ustring.gsub(text, ' は$', ' wa')
-- or " は、"
text = mw.ustring.gsub(text, ' は、', ' wa,')
-- or " は。"
text = mw.ustring.gsub(text, ' は。', ' wa. ')
text = mw.ustring.gsub(text, ' は?', ' wa? ')
text = mw.ustring.gsub(text, ' は)', ' wa)')
-- or " '''は''' "
text = mw.ustring.gsub(text, " '''は''' ", " '''wa''' ")
-- romanize では as "dewa"
text = mw.ustring.gsub(text, ' では ', ' dewa ')
text = mw.ustring.gsub(text, ' では$', ' dewa')
text = mw.ustring.gsub(text, ' では、', ' dewa,')
text = mw.ustring.gsub(text, ' では。', ' dewa. ')
text = mw.ustring.gsub(text, ' では?', ' dewa? ')
text = mw.ustring.gsub(text, ' では)', ' dewa)')
text = mw.ustring.gsub(text, "'''では'''", "'''dewa'''")
-- romanize で は (with space) as "de wa"
text = mw.ustring.gsub(text, "'''で は'''", "'''de wa'''")
-- same sort of thing for へ
text = mw.ustring.gsub(text, ' へ ', ' e ')
text = mw.ustring.gsub(text, " '''へ''' ", " '''e''' ")
text = mw.ustring.gsub(text, ' へ$', ' e')
text = mw.ustring.gsub(text, ' へ、', ' e,')
text = mw.ustring.gsub(text, ' へ。', ' e. ')
text = mw.ustring.gsub(text, ' へ?', ' e?')
text = mw.ustring.gsub(text, ' へ)', ' e)')
-- dangling small tsu is romanized as nothing
text = mw.ustring.gsub(text, 'ッ。', '。')
text = mw.ustring.gsub(text, 'ッ!', '!')
text = mw.ustring.gsub(text, 'ッ」', '」')
text = mw.ustring.gsub(text, 'ッ、', '、')
-- ゝ means "repeat the previous character" and is used with hiragana, like 々 is for kanji
-- TODO: do same sort of thing for ゞ
text = mw.ustring.gsub(text, '(.)ゝ', '%1%1')
-- romanising ヶ
text = mw.ustring.gsub(text, 'ヶげつ', 'kagetsu')
text = mw.ustring.gsub(text, 'ヶ', 'ga')
-- convert hiragana to katakana
text = mw.ustring.gsub(text, '.', function (char) return data.hk or char end)
-- replace katakana with romaji (?? not sure what the pattern below does ??)
-- this is hackish, but we're using the period to indicate morpheme boundaries to prevent macrons
-- from forming across them, so we'll remove the ASCII periods used for markup but not the Japanese periods
-- convert the Japanese periods at the end
--table.remove(kr_minus_period, "。")
text = mw.ustring.gsub(text, '.?ェ?',
function (char)
if char == "。" then
return char
else
return data.kr or char
end
end)
-- replace long vowel mark with the vowel that comes before
text = mw.ustring.gsub(text, '()ー', '%1%1')
-- add vowels with diacritics
if not nodiacr then
text = mw.ustring.gsub(text, 'oo', 'ō')
text = mw.ustring.gsub(text, 'aa', 'ā')
text = mw.ustring.gsub(text, 'ee', 'ē')
text = mw.ustring.gsub(text, 'ou', 'ō')
text = mw.ustring.gsub(text, 'uu', 'ū')
text = mw.ustring.gsub(text, 'ii', 'ī')
end
if not keepdot then
-- if input had spaces, keep them
-- if the input string had periods, then remove them now
text = mw.ustring.gsub(text, '%.', '')
-- now that markup periods are gone, convert the Japanese periods to western periods
text = mw.ustring.gsub(text, "。", ". ")
end
-- romanize sokuon or geminate consonants
-- text = mw.ustring.gsub(text, '^ッ', '')
-- double the previous consonant letter if there is a small tsu
text = mw.ustring.gsub(text, 'ッ()', '%1%1')
text = mw.ustring.gsub(text, 'ッ\'\'\'()', '%1\'\'\'%1')
-- replace ッc with tc
text = mw.ustring.gsub(text, 'ッc', 'tc')
text = mw.ustring.gsub(text, 'ッ\'\'\'c', 't\'\'\'c')
-- if small tsu comes at the end, just throw it away
text = mw.ustring.gsub(text, 'ッ$', '')
-- the @ is used to determine when to insert an opostrophe after ん or ン
-- (all is kata at that point)
-- insert apostrophe when ン is followed by a vowel or
-- y, which corresponds to the cases んや (n'ya) んゆ (n'yu) and んよ (n'yo)
text = mw.ustring.gsub(text, "@()", "'%1")
-- remove @
text = mw.ustring.gsub(text, "@", "")
-- capitalize any letter following a ^ symbol
text = mw.ustring.gsub(text, "%^%l", mw.ustring.upper)
-- remove ^
text = mw.ustring.gsub(text, "%^", "")
-- remove %
text = mw.ustring.gsub(text, '%%', '')
return text
end
-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
local text = f.args
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '-', '')
text = mw.ustring.gsub(text, '%.', '')
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '\'', '')
return text
end
function export.romaji_to_kata(f)
local text = f.args
text = mw.ustring.gsub(text, '.', function (char) return data.rd or char end)
text = mw.ustring.gsub(text, 'kk', 'ッk')
text = mw.ustring.gsub(text, 'ss', 'ッs')
text = mw.ustring.gsub(text, 'tt', 'ッt')
text = mw.ustring.gsub(text, 'pp', 'ッp')
text = mw.ustring.gsub(text, 'bb', 'ッb')
text = mw.ustring.gsub(text, 'dd', 'ッd')
text = mw.ustring.gsub(text, 'gg', 'ッg')
text = mw.ustring.gsub(text, 'jj', 'ッj')
text = mw.ustring.gsub(text, 'tc', 'ッc')
text = mw.ustring.gsub(text, 'tsyu', 'ツュ')
text = mw.ustring.gsub(text, 'ts', {='ツ',='ツォ',='ツィ',='ツェ',='ツァ'})
text = mw.ustring.gsub(text, 'sh', {='シュ',='ショ',='シ',='シェ',='シャ'})
text = mw.ustring.gsub(text, 'ch', {='チュ',='チョ',='チ',='チェ',='チャ'})
text = mw.ustring.gsub(text, "n?", {='ヌ',='ノ',='ニ',='ネ',='ナ',='ン',='ン'})
text = mw.ustring.gsub(text, '?', function (char) return data.rk or char end)
text = mw.ustring.gsub(text, 'u', 'ウ')
text = mw.ustring.gsub(text, 'o', 'オ')
text = mw.ustring.gsub(text, 'i', 'イ')
text = mw.ustring.gsub(text, 'e', 'エ')
text = mw.ustring.gsub(text, 'a', 'ア')
return text
end
-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
text, script = type(f) == 'table' and f.args or f, {}
if mw.ustring.match(text, '') then
table.insert(script, 'Hira')
end
-- TODO: there are two kanas. This should insert Kata.
if mw.ustring.match(text, '') then
table.insert(script, 'Kana')
end
-- 一 is unicode 4e00, previously used 丁 is 4e01
if mw.ustring.match(text, '') then
table.insert(script, 'Hani')
end
-- matching %a should have worked but matched the end of every string
if mw.ustring.match(text, '') then
table.insert(script, 'Romaji')
end
if mw.ustring.match(text, '') then
table.insert(script, 'Number')
end
if mw.ustring.match(text, '') then
table.insert(script, 'Abbreviation')
end
return table.concat(script, '+')
end
-- accepts the entry name, extracts the kanji, and
-- puts the kanji inside {{ja-kanji|}} and returns it
function export.extract_kanji(f)
local text = f.args
local len = 1
local result = ''
text = mw.ustring.gsub(mw.ustring.gsub(text, ".", function (char) return data.ky or char end), ".", function (char) return data.hy or char end)
text = mw.ustring.gsub(text,' ','')
len = mw.ustring.len(text)
if text ~= '' then
result = '{{ja-kanjitab'
-- for i = 1, len, 1 do
-- char = mw.ustring.sub(text,i,i)
-- result = (result .. '|' .. char)
-- end
result = (result .. '}}')
end
return result
end
-- returns the number of kanji in this term
function export.count_kanji(f)
local text = f.args
local len = 1
-- replace 時々 with 時時
text = mw.ustring.gsub(text, '()々', '%1%1')
-- See w:Template:CJK_ideographs_in_Unicode (U+3400 - U+4DB5, U+4E00 - U+9FCC, U+F900 (escaped to avoid normalisation) - U+FAD9 (escaped to avoid normalisation), U+20000 - U+2FA1F)
text = mw.ustring.gsub(text, '', '')
len = mw.ustring.len(text)
return len
end
-- used within other functions but >> no longer necessary <<
-- returns a hidx-style hiragana sort key attached to |hidx=,
-- e.g. |hidx=はつぐん' when given ばつぐん
function export.hidx(f)
local text = f.args
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
local kyreplace = ''
kyreplace = mw.ustring.gsub(text,'.',function (char) return data.ky or char end)
if kyreplace == '' then
result = ('|' .. 'hidx' .. '=')
end
text = mw.ustring.gsub(text,'.',function (char) return data.kh or char end)
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.dakuten or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'hidx' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
result = (result .. convertedten .. textsub .. "'")
else
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.handakuten or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'hidx' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
result = (result .. convertedten .. textsub .. "''")
else
if kyreplace == '' then
result = (result .. text)
end
end
end
return result
end
-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae. The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
if type(text) == "table" then
text = text.args
end
-- convert kata to hira (hira is untouched)
text = mw.ustring.gsub(text, '.', function (char) return data.kh or char end)
-- remove all of the small hiragana such as ょ except small tsu
text = mw.ustring.gsub(text,'.',function (char) return data.nonmora_to_empty or char end)
-- remove zero-width spaces
text = mw.ustring.gsub(text, '', '')
-- return number of characters, which should be the number of morae
return mw.ustring.len(text)
end
-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software (hidx of old)
-- this is like hidx above but doesn't return |hidx=sortkey,
-- just the sort key itself, but unlike hidx above, this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
if type(text) == "table" then
text = text.args
end
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
-- remove western spaces, hyphens, and periods
text = mw.ustring.gsub(text, '', '')
text = mw.ustring.gsub(text,'.',function (char) return data.kh or char end)
-- if the first character has dakuten, replace it with the corresponding
-- character without dakuten and add an apostrophe to the end, e.g.
-- がす > かす'
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.dakuten or char end) == '' then
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
text = (convertedten .. textsub .. "'")
else
-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.handakuten or char end) == '' then
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
text = (convertedten .. textsub .. "''")
end
end
-- replace the long vowel mark with the vowel that it stands for
for key,value in pairs(data.longvowels) do
text = mw.ustring.gsub(text,key,value)
end
return text
end
-- returns 'yes' if the string contains kana (not exactly is kana)
-- returns 'no' otherwise, including if string is empty
function export.is_kana(f)
local text = f.args
if mw.ustring.match(text, '') then
return 'yes'
end
if mw.ustring.match(text, '') then
return 'yes'
end
return 'no'
end
-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
local text = type(text) == 'table' and f.args or f
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
local kyreplace = ''
kyreplace = mw.ustring.gsub(text,'.',function (char) return data.ky or char end)
if kyreplace == '' then
result = ('|' .. 'sort' .. '=')
end
text = mw.ustring.gsub(text,'.',function (char) return data.kh or char end)
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.dakuten or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
result = (result .. convertedten .. textsub .. "'")
else
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.handakuten or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',function (char) return data.tenconv or char end)
result = (result .. convertedten .. textsub .. "''")
else
if kyreplace == '' then
result = (result .. text)
end
end
end
return result
end
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
return mw.ustring.sub(f.args,1,(mw.ustring.len(f.args)-1))
end
-- this generates links to categories of the form
-- Category:Japanese terms spelled with (kanji)
-- which was previously done in Template:ja-kanjitab
-- but depended on the editor entering the right kanji
function export.spelled_with_kanji()
local PAGENAME = mw.title.getCurrentTitle().text
--PAGENAME = f.args
local cats = {}
local c = ''
-- remove non-kanji characters
-- technically 々 is not a kanji, but we want a category for it, so leave it in
PAGENAME = mw.ustring.gsub(PAGENAME, '', '')
local uniquekanji = ""
for k in mw.ustring.gmatch(PAGENAME,".") do
if not mw.ustring.find(uniquekanji,k) then uniquekanji = (uniquekanji .. k) end
end
for i = 1, mw.ustring.len(uniquekanji) do
local c = mw.ustring.sub(uniquekanji,i,i)
table.insert(cats, '[[Category:Japanese terms spelled with ')
table.insert(cats, c)
--table.insert(cats, '|')
--table.insert(cats, sortkey)
table.insert(cats, ']]')
--table.insert(cats, "\n")
--table.insert(cats, '</nowiki>')
end
return table.concat(cats, '')
end
-- see also Template:JAruby
-- meant to be called from another module
function export.add_ruby_backend(term, kana, from_ja_link)
local pattern = ""
-- holds the whole segments of markup enclosed in <ruby>...</ruby>
local ruby_markup = {}
-- range of kana: ''
-- nonkana:
local kanji_pattern = ""
-- links without pipes will fail
term = mw.ustring.gsub(term, '%]+)%]%]', ']')
-- remove links from kana
kana = mw.ustring.gsub(kana, '%]+)%]%]', '%1')
kana = mw.ustring.gsub(kana, '%]+|(]+)%]%]', '%1')
-- build up pattern
-- escape the magic characters in the term
pattern = mw.ustring.gsub(term, '%]+|(]+)%]%]', '%1')
pattern = require("Module:utilities").pattern_escape(pattern)
pattern = mw.ustring.gsub(pattern, "]+", " *")
kana = mw.ustring.gsub(kana, "]+", '')
pattern = mw.ustring.gsub(pattern, " *('+) *", "%1")
kana = mw.ustring.gsub(kana, " *('+) *", "%1")
pattern = mw.ustring.gsub(pattern, " +", " ")
kana = mw.ustring.gsub(kana, " +", " ")
-- remove periods and caret signs and hyphens
pattern = mw.ustring.gsub(pattern, '%%', '')
kana = mw.ustring.gsub(kana, '', '')
-- in order to make a pattern that will find the ruby,
-- replace every unbroken string of kanji with a sub-pattern
pattern = mw.ustring.gsub(pattern, kanji_pattern .. '+', '(.+)')
-- get a pattern like
-- (.+)ばか(.+)ばか(.+)ばかばかばああか(.+) when given 超ばか猿超ばか猿超ばかばかばああか猿
-- it turns out we need to keep the spaces sometimes
-- so that kana don't "leak" in ambiguous cases like 捨すてて撤退 where it's not clear if it's
-- す, てったい or すて, ったい. only solution now is to put spaces in the "term" param
-- if they fall between kana
-- build up term (e.g. ])
local replaced = {}
local count = 0
term = mw.ustring.gsub(term, '%]', '%]') -- escape the "]" character so that it cannot appear, example becomes %]
term = mw.ustring.gsub(term, kanji_pattern .. '+', function(text)
count = count + 1
table.insert(replaced, text)
return ''
end) -- example becomes う|った%]%]
-- remove spaces
for i,val in ipairs(replaced) do replaced = mw.ustring.gsub(val, ' ', '') end
while mw.ustring.match(term, '%*%+%]*|') do
term = mw.ustring.gsub(term, '(%*)%+)%](*|)', function(a,b,c)
return a .. replaced .. c
end)
end -- example becomes った%]%]
-- apply that pattern to the kana to collect the rubies
-- if this fails, try it without spaces
if mw.ustring.match(kana, pattern) == nil then kana = mw.ustring.gsub(kana, ' ', '') end
local ruby = { mw.ustring.match(kana, pattern) }
-- local ruby = {}
-- for c in mw.ustring.gmatch(kana, pattern) do table.insert(ruby, c) end
-- find the kanji strings again and combine them with their ruby to make the <ruby> markup
local kanji_segments = {}
for c in mw.ustring.gmatch(term, '%+)%]') do table.insert(kanji_segments, replaced) end
for i = 1, #kanji_segments do
table.insert(ruby_markup, "<ruby>" .. kanji_segments .. "<rp> (</rp><rt>" .. ruby .. "</rt><rp>) </rp></ruby>")
end
count = 0
term = mw.ustring.gsub(term, '%+%]', function()
count = count + 1
return ruby_markup
end)
term = mw.ustring.gsub(term, '%%%]', ']')
term = mw.ustring.gsub(term, '%%', '')
term = mw.ustring.gsub(term, ' ', '')
--done
return '<span style="font-size: 1.2em">' .. term .. '</span>'
end
-- replaces the code in Template:ja-readings which accepted kanji readings
-- and displayed them in a consistent format
function export.readings(frame)
-- only do this if this entry is a kanji page and not some user's page
local PAGENAME = mw.title.getCurrentTitle().text
if mw.ustring.match(PAGENAME, "") then
local args = frame:getParent().args
local goon = args or ""
local kanon = args or ""
local toon = args or ""
local soon = args or ""
local kanyoon = args or ""
local on = args or ""
local kun = args or ""
local nanori = args or ""
local nazuke = args or ""
local nadzuke = args or ""
-- my new field for the actual reading of the _kanji_, not the entire word (which may not even
-- be written with that kanji) which is what "kun" presently has
local corekun = args or ""
-- this holds the finished product composed of wikilinks to be displayed
-- in the Readings section under the Kanji section
local links = {}
if goon ~= "" then table.insert(links, "* ''']''': " .. goon) end
if kanon ~= "" then table.insert(links, "* ''']''': " .. kanon) end
if toon ~= "" then table.insert(links, "* ''']''': " .. toon) end
if soon ~= "" then table.insert(links, "* ''']''': " .. soon) end
if kanyoon ~= "" then table.insert(links, "* ''']''': " .. kanyoon) end
if on ~= "" then
if goon == "" and kanon == "" and toon == "" and soon == "" and kanyoon == "" then
table.insert(links, "* ''']''': " .. on)
else
table.insert(links, "* ''']''' (unclassified): " .. on)
end
end
if kun ~= "" then table.insert(links, "* ''']''': " .. kun) end
-- three names for the same thing
if nanori ~= "" then
table.insert(links, "* ''']''': " .. nanori)
elseif nazuke ~= "" then
table.insert(links, "* ''']''': " .. nazuke)
elseif nadzuke ~= "" then
table.insert(links, "* ''']''': " .. nadzuke)
end
-- add kanji readings categories
-- range of hiragana:
-- determine if this is joyo kanji (常用) or jinmeiyo kanji (人名用) or neither (表外)
local joyo_kanji_pattern = ('')
local jinmeiyo_kanji_pattern = ('')
local sortkey = ""
if mw.ustring.match(PAGENAME, joyo_kanji_pattern) then sortkey = "Common"
elseif mw.ustring.match(PAGENAME, jinmeiyo_kanji_pattern) then sortkey = "Names"
else
sortkey = "Uncommon"
end
local all_usable_readings = (goon .. kanon .. toon .. soon .. kanyoon .. on .. corekun)
for r in mw.ustring.gmatch(all_usable_readings, "(.-)]") do
table.insert(links, "]")
end
-- readings should only be in hiragana
if mw.ustring.match(all_usable_readings, '') then
table.insert(links, "]")
end
return table.concat(links, "\n")
end
end
-- do the work of Template:ja-kanji
function export.kanji(frame)
local PAGENAME = mw.title.getCurrentTitle().text
-- only do this if this entry is a kanji page and not some user's page
if mw.ustring.match(PAGENAME, "") then
local args = frame:getParent().args
local grade = args or ""
local rs = args or ""
local style = args or ""
local shin = args or ""
local kyu = args or ""
local wikitext = {}
local categories = {}
local catsort = (rs ~= "") and rs or PAGENAME
-- display the kanji itself at the top at 275% size
table.insert(wikitext, '<div><span class="Jpan" style="font-size:275%; line-height: 100%;">' .. PAGENAME .. '</span></div>')
-- display information for the grade
-- if grade was not specified, determine it now
if grade == "" then
local joyo_kanji_pattern = ('')
local jinmeiyo_kanji_pattern = ('')
if mw.ustring.match(PAGENAME, joyo_kanji_pattern) then grade = "c"
elseif mw.ustring.match(PAGENAME, jinmeiyo_kanji_pattern) then grade = "n"
else
grade = "uc"
end
end
table.insert(wikitext, "(''")
if grade == "1" then table.insert(wikitext, "]")
elseif grade == "2" then table.insert(wikitext, "]")
elseif grade == "3" then table.insert(wikitext, "]")
elseif grade == "4" then table.insert(wikitext, "]")
elseif grade == "5" then table.insert(wikitext, "]")
elseif grade == "6" then table.insert(wikitext, "]")
elseif grade == "7" or grade == "c" then table.insert(wikitext, "]")
elseif grade == "8" or grade == "n" then table.insert(wikitext, "]")
elseif grade == "9" or grade == "uc" then table.insert(wikitext, "]")
elseif grade == "0" or grade == "r" then table.insert(wikitext, "]")
else
table.insert(categories, "]")
end
-- if style was indicated, mention that and provide link to corresponding kanji
-- (link to shinjitai if this is kyujitai, link to kyujitai if this is shinjitai)
if style == "s" then
table.insert(wikitext, ", ")
if kyu == "" then
table.insert(wikitext, "] kanji")
else
table.insert(wikitext, '] kanji, ] form <span lang="ja" class="Jpan">]</span>')
end
elseif style == "ky" then
table.insert(wikitext, ", ")
if shin == "" then
table.insert(wikitext, "] kanji")
else
table.insert(wikitext, '] kanji, ] form <span lang="ja" class="Jpan">]</span>")
end
end
table.insert(wikitext, "'')")
-- add categories
table.insert(categories, "]")
if grade == "1" then table.insert(categories, "]")
elseif grade == "2" then table.insert(categories, "]")
elseif grade == "3" then table.insert(categories, "]")
elseif grade == "4" then table.insert(categories, "]")
elseif grade == "5" then table.insert(categories, "]")
elseif grade == "6" then table.insert(categories, "]")
elseif grade == "7" or grade == "c" then table.insert(categories, "]")
elseif grade == "8" or grade == "n" then table.insert(categories, "]")
elseif grade == "9" or grade == "uc" then table.insert(categories, "]")
elseif grade == "0" or grade == "r" then table.insert(categories, "]")
end
-- error category
if rs == "" then table.insert(categories, "]") end
return table.concat(wikitext, "") .. table.concat(categories, "\n")
end
end
local grade1_pattern = ('')
local grade2_pattern = ('')
local grade3_pattern = ('')
local grade4_pattern = ('')
local grade5_pattern = ('')
local grade6_pattern = ('')
local secondary_pattern = ('')
local jinmeiyo_kanji_pattern = ('')
local hyogaiji_pattern = ('')
function export.kanji_grade(kanji)
if type(kanji) == "table" then
kanji = kanji.args
end
if mw.ustring.match(kanji, hyogaiji_pattern) then return 9
elseif mw.ustring.match(kanji, jinmeiyo_kanji_pattern) then return 8
elseif mw.ustring.match(kanji, secondary_pattern) then return 7
elseif mw.ustring.match(kanji, grade6_pattern) then return 6
elseif mw.ustring.match(kanji, grade5_pattern) then return 5
elseif mw.ustring.match(kanji, grade4_pattern) then return 4
elseif mw.ustring.match(kanji, grade3_pattern) then return 3
elseif mw.ustring.match(kanji, grade2_pattern) then return 2
elseif mw.ustring.match(kanji, grade1_pattern) then return 1
end
return false
end
function export.new(frame)
local args = frame:getParent().args
local result = "==Japanese=="
if args then
result = result .. "\n{{wikipedia|lang=ja}}"
end
pagename = mw.title.getCurrentTitle().text
text = args ~= "" and args or pagename
text = mw.ustring.gsub(text, "%-", "|")
local function make_tab(original, yomi)
output_text = ""
original = mw.ustring.gsub(original, " ", "|")
if mw.ustring.match(original, "<") then
for word in mw.ustring.gmatch(original, "<(+)>") do
output_text = output_text .. "|" .. word
end
yomi = "k"
else
output_text = mw.ustring.gsub(original, ">()", "|k%1=")
output_text = mw.ustring.match(output_text, "|") and "|" .. output_text or false
end
yomi = yomi or "o"
return "\n{{ja-kanjitab" .. (output_text or "") .. "|yomi=" .. yomi .. "}}", yomi
end
if mw.ustring.match(pagename, "") then
to_add, yomi = make_tab(text, args)
result = result .. to_add
end
if mw.ustring.match(text, "<") then
text = mw.ustring.gsub(text, "", "")
else
text = mw.ustring.gsub(text, "^+>+(+)", "%1")
text = mw.ustring.gsub(text, "|+>+(+)", "%1")
text = mw.ustring.gsub(text, "()|(あ)", "%1.%2")
text = mw.ustring.gsub(text, "()|(い)", "%1.%2")
text = mw.ustring.gsub(text, "()|(う)", "%1.%2")
text = mw.ustring.gsub(text, "()|()", "%1.%2")
text = mw.ustring.gsub(text, "()|()", "%1.%2")
text = mw.ustring.gsub(text, "|", "")
end
local function other(class, title, args)
local code = ""
if args then
code = code .. "\n\n===" .. title .. "===\n* {{l/ja|" .. args .. "}}"
if args then
code = code .. "\n* {{l/ja|" .. args .. "}}"
if args then
code = code .. "\n* {{l/ja|" .. args .. "}}"
if args then
code = code .. "\n* {{l/ja|" .. args .. "}}"
end
end
end
end
return code
end
result = result .. other("alt", "Alternative forms", args)
sortkey = export.script(text) == "Kana" and export.sort(text) or false
if sortkey and sortkey == "|sort=" .. text then
sortkey = false
end
if args or args or args then
result = result .. "\n\n===Etymology===\n"
if args then
result = result .. "{{wasei eigo|" .. args .. (args and "|" .. args or "") .. (sortkey or "") .. "}}"
else
result = result .. (args or
("From {{etyl|" .. (args or "en") .. "|ja" .. (sortkey or "") .. "}} {{m|" ..
(args or "en") .. "|" .. args .. "}}."))
end
end
result = result .. "\n\n===Pronunciation===\n{{ja-pron" .. (args ~= "" and "|" .. text or "") ..
(yomi and "|y=" .. yomi or "") .. (args and "|acc=" .. args or "") .. "}}"
local pos = args ~= "" and args or "n"
local pos_table = {
= { "Noun", "noun", true },
= { "Noun", "noun", true, "Verb", "verb-suru" },
= { "Noun", "noun", true },
= { "Noun", "noun", true, "Verb", "verb-suru" },
= { "Adjective", "adj", true, "Noun", "noun" },
= { "Adjective", "adj", true, "Noun", "noun" },
= { "Verb", "verb", true },
= { "Verb", "verb", true },
= { "Verb", "verb form", true },
= { "Verb", "verb form", true },
= { "Adjective", "adj", true },
= { "Adjective", "adj", true },
= { "Adjective", "adj", true },
= { "Adverb", "adverb", false },
= { "Adverb", "adverb", false },
= { "Proper noun", "proper", false },
= { "Proper noun", "proper", false },
= { "Proper noun", "proper", false },
= { "Phrase", "phrase", true },
= { "Phrase", "phrase", true },
= { "Interjection", "interjection", false },
= { "Conjunction", "conjunction", false },
= { "Particle", "particle", false },
= { "Preposition", "preposition", false },
}
result = result .. "\n\n===" .. pos_table .. "===\n{{ja-" .. (not pos_table and "pos|" or "") .. pos_table ..
(args ~= "" and "|" .. text or "")
if pos_table == "Adjective" then
result = result .. "|infl=" .. (args and args or "na")
end
result = result .. (args and "|type=" .. args or "") .. (args and "|tr=" .. args or "") .. "}}"
result = result .. "\n\n# " .. args
result = result .. other("syn", "=Synonyms=", args)
result = result .. other("ant", "=Antonyms=", args)
result = result .. other("der", "=Derived terms=", args)
result = result .. other("also", "=See also=", args)
if pos_table == "Adjective" then
result = result .. "\n\n====Inflection====\n"
if args == "i" or args == "い" then
result = result .. "{{ja-i" .. (args ~= "" and "|" .. mw.ustring.sub(text, 1, -2) or "") .. "}}"
else
result = result .. "{{ja-na" .. (args ~= "" and "|" .. text or "") .. "}}"
end
end
if pos_table == "verb" then
result = result .. "\n\n====Conjugation====\n{{ja-"
penul = mw.ustring.sub(mw.ustring.gsub(mw.ustring.gsub(mw.ustring.sub(text, -2, -2), ".", function (char) return data.hk or char end), ".", function (char) return data.kr or char end), -1, -1)
cons = mw.ustring.sub(mw.ustring.gsub(mw.ustring.gsub(mw.ustring.sub(text, -1, -1), ".", function (char) return data.hk or char end), ".", function (char) return data.kr or char end), 1, 1)
if cons == "u" then
cons = ""
elseif cons == "t" then
cons = "ts"
end
if final == "る" and (penul == "i" or penul == "e") and args == 2 then
result = result .. "ichi"
else
result = result .. "go-" .. cons .. "u"
end
result = result .. (args ~= "" and "|" .. mw.ustring.sub(text, 1, -2) or "") .. "}}"
end
if pos_table and args ~= "" then
result = result .. "\n\n===" .. pos_table .. "===\n{{ja-" .. pos_table .. (args ~= "" and "|" .. text or "") ..
(args and "|type=" .. args or "") .. (args and "|tr=" .. args or "") .. "}}\n\n#" .. args
if pos_table == "Verb" then
result = result .. "\n\n====Conjugation====\n{{ja-suru" .. (args ~= "" and "|" .. text or "") .. "}}"
end
end
if args then
result = result .."\n\n----\n\n==Korean==\n{{ko-hanjatab}}\n\n===" .. pos_table or "n"] ..
"===\n{{ko-" .. pos_table or "n"] .. "|hj" .. "|hangeul=" .. args .. "}}" ..
"\n\n# {{hanja form of|" .. args .. "|" .. (args or args) .. "}}"
end
return result
end
return export