local export = {}
local find = mw.ustring.find
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local m_ja = require("Module:ja")
local m_ja_pron = require("Module:ja-pron")
local data1 = mw.loadData("Module:ja/data/ojad")
local data2 = mw.loadData("Module:ja/data/ojad/る")
local data3 = mw.loadData("Module:ja/data/ojad/い")
local function map(list, func)
local result = {}
for _, item in ipairs(list) do
table.insert(result, func(item))
end
return result
end
local function contains(list, item)
for i = 1, #list do
if list == item then return true end
end
return false
end
local function ja(text)
return '<span lang="ja" class="Jpan">' .. text .. '</span>'
end
-- interpolate_kanji('開く', 'あく', 'あきます') returns '開きます'
local function interpolate_kanji(kanji, kana1, kana2)
local kanji_stem, okurigana = match(kanji, '^(.-)(*)$')
if not find(kana1, okurigana .. '$') then error(kana1 .. ' does not end with ' .. okurigana) end
local kana_stem = gsub(kana1, okurigana .. '$', '')
if find(kanji_stem, '来$') and find(kana_stem, 'く$') then kana_stem = gsub(kana_stem, 'く$', '') end
if not find(kana2, '^' .. kana_stem) then error(kana2 .. ' does not begin with ' .. kana_stem) end
local kanji2 = gsub(kana2, '^' .. kana_stem, kanji_stem)
return kanji2
end
-- a tick in pron denotes a fall in pitch, no tick means heibangata
-- format_accent("かえりま'す") returns overlined かえります and
local function format_accent(pron)
--count total morae
local PRON = pron
PRON = gsub(PRON, "", '')
local total_morae = m_ja.count_morae(PRON)
-- count morae
local acc = gsub(pron, "", '')
acc = m_ja.count_morae(match(acc, "^(.-)'") or '')
-- call ja_pron to format the accent
pron = gsub(pron, "'", '')
local ja_pron = m_ja_pron.accent(pron, acc)
local kana, romaji = match(ja_pron, '(<span lang="ja" class="Jpan">.-) (<span class="Latn"><samp>.-</samp></span>)')
--return kana, (acc==0 and acc or (acc-total_morae-1))..pron
return kana, pron..acc
end
local function format_row(label, lemma_kanji, lemma_kana, prons1, prons2) -- prons is '-' (no accent provided) or one or more pron separated by '/'
local bg=''
local result=''
do
local kanji, kana, romaji
if prons1 == '' then
kanji = '-'
kana = '-'
romaji = '-'
else
kanji = {}
kana = {}
romaji = {}
for pron in mw.text.gsplit(prons1, '/') do
local new_kanji = interpolate_kanji(lemma_kanji, lemma_kana, gsub(pron, "", ''))
if not contains(kanji, new_kanji) then table.insert(kanji, new_kanji) end
local new_kana, new_romaji = format_accent(pron)
table.insert(kana, new_kana)
table.insert(romaji, new_romaji)
end
kanji = table.concat(kanji, ' ~ ')
kana = table.concat(kana, ' ~ ')
romaji = table.concat(romaji, ' ~ ')
kanji = ja(kanji)
end
result = result .. '! '.. label .. '\n| ' .. '\n| ' .. kana .. '\n| '..bg .. romaji .. '\n'
end
do
local kanji, kana, romaji
if prons2 == '' then
kanji = '-'
kana = '-'
romaji = '-'
else
kanji = {}
kana = {}
romaji = {}
for pron in mw.text.gsplit(prons2, '/') do
local new_kanji = interpolate_kanji(lemma_kanji, lemma_kana, gsub(pron, "", ''))
if not contains(kanji, new_kanji) then table.insert(kanji, new_kanji) end
local new_kana, new_romaji = format_accent(pron)
table.insert(kana, new_kana)
table.insert(romaji, new_romaji)
end
kanji = table.concat(kanji, ' ~ ')
kana = table.concat(kana, ' ~ ')
romaji = table.concat(romaji, ' ~ ')
kanji = ja(kanji)
end
result = result .. '\n| ' .. '\n| ' .. kana .. '\n| '..bg .. romaji .. '\n|-\n'
end
return result
end
function export.show(frame)
local args = frame:getParent().args
local words = mw.text.split(args.words, ' ')
local form = tonumber(args.form)
rows = { '{|\n' }
for _, key in ipairs(words) do
local entry = data1or data2or data3
if entry then
entry = mw.text.split(entry, ',')
local lemma_kanji = gsub(key, ':.*', '')
local lemma_kana = gsub(gsub(entry, '/.*', ''), '', '')
table.insert(rows, format_row(key, lemma_kanji, lemma_kana, entry,entry))
else
table.insert(rows, '|-\n|'..key..'\n|-\n')
end
end
table.insert(rows, '|}')
return table.concat(rows, '')
end
return export