local export = {}
local function str_hira_to_kata(s)
return (mw.ustring.gsub(s, '', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
end
local function str_kata_to_hira(s)
return (mw.ustring.gsub(s, '', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
end
local function str_ucompare(s1, s2, limit)
if s1 == s2 then return {{{s1}, {s2}}} end
local result = {}
local string1, string2
local len1, len2
local id1, id2
if mw.ustring.len(s1) <= mw.ustring.len(s2) then --make string1 the shorter string
string1, string2 = s1, s2
len1, len2 = mw.ustring.len(s1), mw.ustring.len(s2)
id1, id2 = 1, 2
else
string1, string2 = s2, s1
len1, len2 = mw.ustring.len(s2), mw.ustring.len(s1)
id1, id2 = 2, 1
end
-- "holes": a different part of the two strings
-- try to find where are the "holes"
local holes = {1}
local holes_number = 1
local function next_holes()
-- e.g. 'abc'
-- -> '?bc'
-- -> 'a?c'
-- -> 'ab?'
-- -> '??c'
-- -> '?b?'
-- -> 'a??'
-- -> '???'
local hnm = holes_number
local hnmv
while hnm > 0 do
hnmv = holes
if hnmv + holes_number < len1 + hnm then
for i = 1, holes_number + 1 - hnm do
holes = hnmv + i
end
return false
else
hnm = hnm - 1
end
end
holes_number = holes_number + 1
for i = 1, holes_number do
holes = i
end
return true
end
local string1_fragments
local string1_pattern, string2_pattern
local matched1, matched2
local match_success = 0 -- 0 means no result yet; >0 means found result with (holes_number+1-match_success) "holes"
local n_holes_connected
while holes_number <= len1 do
string1_fragments = {''}
n_holes_connected = 1
for i = 2, holes_number do
if holes + 1 < holes then
table.insert(string1_fragments, '(' .. mw.ustring.sub(string1, holes + 1, holes - 1):gsub('(%^%$])', '%%%1') .. ')')
else
n_holes_connected = n_holes_connected + 1 -- if the two "holes" are connected reduce the number of "holes" by 1
end
end
if match_success <= n_holes_connected then
if 1 < holes then
string1_fragments = '(' .. mw.ustring.sub(string1, 1, holes - 1):gsub('(%^%$])', '%%%1') .. ')'
end
if holes < len1 then
table.insert(string1_fragments, '(' .. mw.ustring.sub(string1, holes + 1):gsub('(%^%$])', '%%%1') .. ')')
else
table.insert(string1_fragments, '')
end
string1_pattern = '^' .. table.concat(string1_fragments, '(..-)') .. '$'
string2_pattern = '^' .. table.concat(string1_fragments, '(.-)') .. '$'
matched2 = {mw.ustring.match(string2, string2_pattern)}
if #matched2 > 0 then
matched1 = {mw.ustring.match(string1, string1_pattern)}
if #matched1 > 0 then
if match_success < n_holes_connected then result = {} end
match_success = n_holes_connected
table.insert(result, { = matched1, = matched2})
end
end
end
if next_holes() and match_success > 0 then break end
if limit then
limit = limit - len2
if limit <= 0 then break end
end
end
if #result == 0 then
return {{{s1}, {s2}}}
else
return result
end
end
local function str_parse_link(s)
local t = {}
local lt
local i1, i2
local i_o = 1
local i_n = s:find('%[%[', i_o)
while i_n do
i1, i2 = s:find('%%]', i_n + 2)
if not i2 then break end
while i1 and i1 < i2 do
i_n = i1
i1 = s:find('%[%[', i_n + 1)
end
if i_o < i_n then table.insert(t, {
text = s:sub(i_o, i_n - 1),
}) end
if i_n + 2 < i2 then
lt = s:sub(i_n + 2, i2 - 1)
i1 = lt:find('|')
if i1 then
if i1 + 1 <= #lt then table.insert(t, {
text = lt:sub(i1 + 1),
linkto = i1 > 1 and lt:sub(1, i1 - 1) or nil,
}) end
else
table.insert(t, {
text = lt,
linkto = lt,
})
end
end
i_o = i2 + 2
i_n = s:find('%[%[', i_o)
end
if i_o <= #s then table.insert(t, {
text = s:sub(i_o),
}) end
return t
end
local function table_merge(link_table, ruby_table)
local r = {}
local r_sub, r_insert
local len_cut
local id_l, id_r = 1, 1
local nn = false
while id_l <= #link_table and id_r <= #ruby_table do
len_cut = link_table.text:len() - ruby_table.text:len()
if ruby_table.ruby and (ruby_table.ruby:find'%%]' or len_cut < 0) then
if ruby_table.ruby then
r_sub = {
text = {},
ruby = str_parse_link(ruby_table.ruby),
}
r_insert = r_sub.text
table.insert(r, r_sub)
else
r_insert = r
end
while len_cut < 0 do
table.insert(r_insert, {
text = link_table.text,
linkto = link_table.linkto
})
id_l = id_l + 1
len_cut = len_cut + link_table.text:len()
end
table.insert(r_insert, {
text = link_table.text:sub(1, -1 - len_cut),
linkto = link_table.linkto
})
if len_cut == 0 then
id_l = id_l + 1
id_r = id_r + 1
else
link_table.text = link_table.text:sub(-len_cut)
id_r = id_r + 1
end
else
if link_table.linkto then
r_sub = {
text = {},
linkto = link_table.linkto,
}
r_insert = r_sub.text
table.insert(r, r_sub)
else
r_insert = r
end
while len_cut > 0 and not (ruby_table.ruby and ruby_table.ruby:find'%%]') do
table.insert(r_insert, {
text = ruby_table.text,
ruby = ruby_table.ruby,
})
id_r = id_r + 1
len_cut = len_cut - ruby_table.text:len()
end
if len_cut == 0 then
table.insert(r_insert, {
text = ruby_table.text,
ruby = ruby_table.ruby,
})
id_l = id_l + 1
id_r = id_r + 1
else
if ruby_table.ruby then
link_table.text = link_table.text:sub(-(len_cut + ruby_table.text:len()))
else
table.insert(r_insert, {
text = ruby_table.text:sub(1, -1 + len_cut),
})
ruby_table.text = ruby_table.text:sub(len_cut)
id_l = id_l + 1
end
end
end
end
return r
end
function export.len_text(ruby_table)
local r = 0
for _, v in ipairs(ruby_table) do
v = v.text
r = r + (type(v) == 'string' and mw.ustring.len(v) or export.len_text(v))
end
return r
end
function export.len_ruby(ruby_table)
local r = 0
for _, v in ipairs(ruby_table) do
v = v.ruby or v.text
r = r + (type(v) == 'string' and mw.ustring.len(v) or export.len_ruby(v))
end
return r
end
function export.to_text(ruby_table)
local r = {}
local v_text
for _, v in ipairs(ruby_table) do
v_text = v.text
if type(v_text) == 'string' then
table.insert(r, v_text)
else
table.insert(r, export.to_text(v_text))
end
end
return table.concat(r)
end
function export.to_ruby(ruby_table)
local r = {}
local v_text
for _, v in ipairs(ruby_table) do
v_text = v.ruby or v.text
if type(v_text) == 'string' then
table.insert(r, v_text)
else
table.insert(r, export.to_ruby(v_text))
end
end
return table.concat(r)
end
local function table_to_markup(ruby_table, break_link, lb, lm, lf, rb, rm, rf)
local text = {}
local v_text, v_ruby, v_linkto
for _, v in ipairs(ruby_table) do
v_linkto, v_ruby = v.linkto, v.ruby
if type(v.text) ~= 'string' then
if break_link and v.linkto then
v_text = {}
for _, vv in ipairs(v.text) do
table.insert(v_text, {
text = {{
text = vv.text,
linkto = v_linkto,
}},
ruby = vv.ruby,
})
end
v_linkto, v_ruby = nil, nil
v_text = table_to_markup(v_text, break_link, lb, lm, lf, rb, rm, rf)
else
v_text = table_to_markup(v.text, break_link, lb, lm, lf, rb, rm, rf)
end
else
v_text = v.text
end
if v_linkto then
if v_linkto ~= '' then table.insert(text, lb .. v_linkto .. lm .. (v_text ~= '' and v_text or '_') .. lf)
else table.insert(text, v_text) end
elseif v_ruby then
if type(v_ruby) ~= 'string' then v_ruby = table_to_markup(v_ruby, break_link, lb, lm, lf, rb, rm, rf) end
if v_ruby ~= '' then table.insert(text, rb .. v_text .. rm .. v_ruby .. rf)
else table.insert(text, v_text) end
else
table.insert(text, v_text)
end
end
return table.concat(text)
end
-- "options.markup": Use custom markups. See below.
-- "options.break_link = true": Change ] to <ruby>]<ruby>.
function export.to_markup(ruby_table, options)
options = options or {}
omarkup = options.markup or {}
return table_to_markup(
ruby_table,
options.break_link,
omarkup.link_border_left or '[[',
omarkup.link_border_middle or '|',
omarkup.link_border_right or ']]',
omarkup.ruby_border_left or '[',
omarkup.ruby_border_middle or '](',
omarkup.ruby_border_right or ')')
end
-- The options are the same as "function export.to_markup"
function export.to_wiki(ruby_table, options)
options = options or {}
omarkup = options.markup or {}
return table_to_markup(
ruby_table,
options.break_link,
omarkup.link_border_left or '[[',
omarkup.link_border_middle or '|',
omarkup.link_border_right or ']]',
omarkup.ruby_border_left or '<ruby>',
omarkup.ruby_border_middle or '<rp>(</rp><rt>',
omarkup.ruby_border_right or '</rt><rp>)</rp></ruby>')
end
function export.parse_markup(markup)
local ruby = {}
local link_table = str_parse_link(markup:gsub('(%b)(%b())', function(m1, m2)
table.insert(ruby, m2:sub(2, -2))
return m1:sub(2, -2)
end))
local plain_text = export.to_text(str_parse_link(markup))
local ruby_table = {}
local p0 = 1
local ruby_n = 1
local s_text, s_ruby
plain_text:gsub('()(%b)(%b())()', function(p1, m1, m2, p2)
if p0 < p1 then
s_text = plain_text:sub(p0, p1 - 1)
table.insert(ruby_table, {text = s_text})
end
if #m1 > 2 then
s_text = m1:sub(2, -2)
s_ruby = ruby
table.insert(ruby_table, {
text = s_text,
ruby = s_ruby ~= '' and s_ruby or nil,
})
end
p0 = p2
ruby_n = ruby_n + 1
end)
if p0 <= #plain_text then
s_text = plain_text:sub(p0)
table.insert(ruby_table, {text = s_text})
end
return table_merge(link_table, ruby_table)
end
-- "options.try == nil": Lauch an error when the initial match failed.
-- "options.try == 'return'": Return "nil, (error information)" when the initial match failed.
-- "options.try == 'force'": Try every possible pattern when the initial match failed.
-- "options.try_force_limit": Limit the time used by "options.try == 'force'".
-- "options.space == nil": Remove spaces between kana or kanji but preserve elsewhere.
-- "options.space == 'all'": Preserve all spaces.
-- "options.space == 'none'": Remove all spaces.
-- "options.allow_ruby_link == true": Try to match the links in the rubies.
function export.parse_text(term, kana, options)
options = options or {}
local pat_kana = 'ぁ-ゖァ-ヶー' -- signs subject to hira-kata matching
local pat_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-0-9A-Za-z〆〇' -- signs that can have ruby, but not spaces
local pat_rubiable_probable = '0-9a-zA-Zα-ωΑ-Ω' -- signs that can have both ruby and spaces
local pat_mute_probable = '%^%-%.゠・' -- signs that may appear in term, but not kana
local _remove_space
if options.space == 'none' then
_remove_space = function(_r)
local function _next(p1, p2)
if p2 and p2 < #_r.text then
return p1, p2 + 1
end
p1 = p1 + 1
if p1 > #_r then
p2 = nil
else
p2 = type(_r.text) ~= 'string' and 1 or nil
end
return p1, p2
end
local pos1, pos2 = _next(0, nil)
while pos1 <= #_r do
_t = pos2 and _r.text or _r
_t.text = _t.text:gsub(' ', '')
if _t.linkto then _t.linkto = _remove_space({{text = _t.linkto}}).text end
if _t.ruby then _t.ruby = _remove_space({{text = _t.ruby}}).text end
if pos2 then
if _r.linkto then _r.linkto = _remove_space({{text = _r.linkto}}).text end
if _r.ruby then _r.ruby = _remove_space({{text = _r.ruby}}).text end
end
pos1, pos2 = _next(pos1, pos2)
end
return _r
end
elseif options.space == 'all' then
_remove_space = function(_r)
return _r
end
else
_remove_space = function(_r, context_ak, context_bk)
local function _next(p1, p2)
if p2 and p2 < #_r.text then
return p1, p2 + 1
end
p1 = p1 + 1
if p1 > #_r then
p2 = nil
else
p2 = type(_r.text) ~= 'string' and 1 or nil
end
return p1, p2
end
local pos1, pos2 = _next(0, nil)
local pos3, pos4 = pos1, pos2
local after_k = context_ak
local before_k
local _t, char
while pos1 <= #_r do
if pos3 == pos1 and (pos4 == pos2 or pos4 < pos2) or pos3 < pos1 then
before_k = context_bk
pos3, pos4 = _next(pos1, pos2)
while pos3 <= #_r do
_t = pos4 and _r.text or _r
char = mw.ustring.find(_t.text, '')
if char then
char = mw.ustring.sub(_t.text, char, char)
before_k = mw.ustring.find(char, '^$')
break
end
pos3, pos4 = _next(pos3, pos4)
end
end
_t = pos2 and _r.text or _r
if _t.linkto then _t.linkto = _remove_space({{text = _t.linkto}}, after_k, before_k).text end
if _t.ruby then _t.ruby = _remove_space({{text = _t.ruby}}, after_k, before_k).text end
if pos2 then
if _r.linkto then _r.linkto = _remove_space({{text = _r.linkto}}, after_k, before_k).text end
if _r.ruby then _r.ruby = _remove_space({{text = _r.ruby}}, after_k, before_k).text end
end
local seg = {}
local i0 = 1
mw.ustring.gsub(_t.text, '()(+)()', function(i1, m1, i2)
if after_k and not mw.ustring.sub(_t.text, i0, i1 - 1):find'' then
table.insert(seg, (mw.ustring.sub(_t.text, i0, i1 - 1):gsub(' ', '')))
else
table.insert(seg, mw.ustring.sub(_t.text, i0, i1 - 1))
end
table.insert(seg, m1)
after_k = true
i0 = i2
end)
after_k = after_k and not mw.ustring.sub(_t.text, i0):find''
if after_k and before_k then
table.insert(seg, (mw.ustring.sub(_t.text, i0):gsub(' ', '')))
else
table.insert(seg, mw.ustring.sub(_t.text, i0))
end
_t.text = table.concat(seg)
pos1, pos2 = _next(pos1, pos2)
end
return _r
end
end
-- Create the link table
-- e.g. "]の]"
local link_table = str_parse_link(term:gsub('%%', '')) -- remove '%'
--[[link_table = {
{text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
{text = 'の'},
{text = '推理 小説', linkto = '推理 小説'},
}]]
-- Remove romaji markup
kana = kana:gsub('', '') -- remove '^', '-', '.', preserve '%', ' '
-- Create the ruby table
-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
-- ("ぽお" is not allowed)
local ruby_table = {}
local plain_term_raw = export.to_text(str_parse_link(term)) -- Remove links: ] -> B, ] -> C
local plain_kana_raw = options.allow_ruby_link and kana or export.to_text(str_parse_link(kana))
local plain_term = mw.text.split(plain_term_raw, '%%')
local plain_kana = mw.text.split(plain_kana_raw, '%%')
if #plain_term ~= #plain_kana and #plain_term == 1 then -- Try default separator
local n_seg = 1
plain_kana = plain_kana:gsub('%d*$', '')
plain_term = mw.text.split(mw.ustring.gsub(plain_term_raw, '()()', function(pos1, m1)
if mw.ustring.find(mw.ustring.sub(plain_term_raw, pos1 - 1, pos1 - 1), '^$') then
m1 = '%' .. m1
n_seg = n_seg + 1
end
if pos1 < mw.ustring.len(plain_term_raw) then
if n_seg < #plain_kana and plain_kana == '' then
table.remove(plain_kana, n_seg + 1)
else
n_seg = n_seg + 1
m1 = m1 .. '%'
end
end
return m1
end), '%%')
--require('Modül:debug').track('ja-ruby/default separator used')
end
if #plain_term ~= #plain_kana then
mw.logObject(plain_term)
mw.logObject(plain_kana)
error('Separator "%" in the kanji and kana strings do not match.')
end
for i, plain_term_i in ipairs(plain_term) do
local pattern_ruby, pattern_ruby_is_ruby = {}, {}
local function _func_pat(s_sub)
local in_xml_tag = false
table.insert(pattern_ruby, '(' .. mw.ustring.gsub(s_sub, '.', function(m0)
if in_xml_tag then
if m0 == '>' then in_xml_tag = false end
return ''
else
if m0 == '<' then
in_xml_tag = true
return ' ?<.->'
else
local m0_m = m0
if m0:find'^%^%$]$' then m0_m = '%' .. m0_m end
if mw.ustring.find(m0, '^$') then m0_m = '?'
elseif mw.ustring.find(m0, '^$') then
m0_m = ""
elseif mw.ustring.find(m0, '^$') then
m0_m = ""
end
return ' ?' .. m0_m
end
end
end) .. ' ?)')
end
local plain_term_noxml = plain_term_i:gsub('%b<>', '<>')
local pos0 = 1
mw.ustring.gsub(plain_term_noxml, '()+()', function(pos1, pos2)
if pos0 < pos1 then _func_pat(mw.ustring.sub(plain_term_noxml, pos0, pos1 - 1)) end
if not pattern_ruby_is_ruby then
table.insert(pattern_ruby, '(..-)')
pattern_ruby_is_ruby = true
end
pos0 = pos2
end)
plain_term_noxml = mw.ustring.sub(plain_term_noxml, pos0)
if #pattern_ruby == 0 and not mw.ustring.find(plain_term_noxml, '') then
-- if a "non-rubiable" and "non-kana" string is isolated by %, it matches anything.
table.insert(pattern_ruby, '(.-)')
pattern_ruby_is_ruby = true
else
if #plain_term_noxml > 0 then _func_pat(plain_term_noxml) end
end
local pat_ruby_s = table.concat(pattern_ruby)
-- 'アラン・ポーの推理 小説' to '( ? ? ? ?? ? ?ー ? ?)(..-)( )(..-)'
-- Excute matching
local ruby_table_i_ruby = {mw.ustring.match(plain_kana, '^'..pat_ruby_s..'$')}
if #ruby_table_i_ruby > 0 then
local ruby_table_i_text = {mw.ustring.match(plain_term_i, '^'..pat_ruby_s..'$')}
for n_match = 1, #pattern_ruby do
if pattern_ruby_is_ruby and ruby_table_i_text ~= ruby_table_i_ruby then
table.insert(ruby_table, {
text = ruby_table_i_text,
ruby = ruby_table_i_ruby,
})
else
if #ruby_table > 0 and ruby_table.ruby == nil then
ruby_table.text = ruby_table.text .. ruby_table_i_text
else
table.insert(ruby_table, {text = ruby_table_i_text})
end
end
end
elseif options.try == 'force' then
--require('Modül:debug').track('ja-ruby/forced match')
local forced_result = str_ucompare(plain_term_i, plain_kana, options.try_force_limit)
for ii, vv in ipairs(forced_result) do
table.insert(ruby_table, {
text = vv,
ruby = forced_result ~= vv and forced_result or nil,
})
end
elseif options.try == 'return' then
return nil, 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana .. '".'
else
mw.log(pat_ruby_s)
error('Can not match "' .. plain_term_i .. '" and "' .. plain_kana .. '"')
end
end
--[[ruby_table = {
{text = 'アラン・ポーの'},
{text = '推理', ruby = 'すいり'},
{text = ' '}
{text = '小説', ruby = 'しょうせつ'},
}]]
return _remove_space(table_merge(link_table, ruby_table))
-- Merge the ruby and link table
--[[return {
{text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
{text = 'の'},
{text = {
{text = '推理', ruby = 'すいり'},
{text = ''}
{text = '小説', ruby = 'しょうせつ'},
}, linkto = '推理小説'},
}]]
end
-- shortcut
function export.ruby_auto(args)
local to_target
if args.target == 'text' then
to_target = export.to_text
elseif args.target == 'ruby' then
to_target = export.to_ruby
elseif args.target == 'markup' then
to_target = export.to_markup
else
to_target = export.to_wiki
end
if args.term and args.kana then
return to_target(export.parse_text(args.term, args.kana, args.options), args.options)
elseif args.markup then
return to_target(export.parse_markup(args.markup, args.options), args.options)
else
error('Cannot find "term" and "kana" or "markup"')
end
end
return export