This implements Japanese headword-line templates and all of the associated templates that they called to do categorization and error checking.
local m_ja = require("Module:ja")
local m_ja_ruby = require("Module:ja-ruby")
local m_str_utils = require("Module:string utilities")
local byteoffset = mw.ustring.byteoffset
local concat = table.concat
local insert = table.insert
local kana_to_romaji = require("Module:Hrkt-translit").tr
local maxn = table.maxn or require("Module:table").maxIndex -- maxn is deprecated; maxIndex is not strictly equivalent, but equivalent enough here
local moraify = m_ja.moraify
local remove = table.remove
local ugmatch = mw.ustring.gmatch
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local usub = m_str_utils.sub
local gsplit = m_str_utils.gsplit
local export = {}
local pos_functions = {}
local range = mw.loadData('Module:ja/data/range')
local Jpan = require("Module:scripts").getByCode("Jpan")
local function remove_links(text)
return (text:gsub("%]-|", "")
:gsub("%[%[", "")
:gsub("%]%]", ""))
end
local function assign_kana_to_kanji(head, kana, pagename, template_name)
local m_tu = require'Module:template utilities'
local kanji_pos = { = { nil, 0}}
local head_nolink = {}
local link_border = 0
local function insert_kanji_pos(substr)
insert(head_nolink, substr)
for p1, w1 in ugmatch(substr, '()()') do
p1 = byteoffset(substr, p1) + link_border
insert(kanji_pos, { p1, p1 + w1:len() - 1 })
end
end
for p1, p2, w1 in m_tu.gfind_bracket(head, { = ']]'}) do
insert_kanji_pos(head:sub(link_border + 1, p1 - 1))
local p_pipe = w1:find'|' or 2
link_border = p1 + p_pipe - 1
insert_kanji_pos(w1:sub(p_pipe + 1, -3))
link_border = p2
end
insert_kanji_pos(head:sub(link_border + 1))
head_nolink = concat(head_nolink)
local pagetext = mw.title.new(pagename):getContent()
if not pagetext then return head, kana end
local non_kanji = {}
local last_kanji = 1
for p1 in ugmatch(head_nolink, '()') do
insert(non_kanji, usub(head_nolink, last_kanji, p1 - 2))
last_kanji = p1
end
insert(non_kanji, usub(head_nolink, last_kanji))
for kanjitab in pagetext:gmatch('(){{%s*' .. template_name) do
kanjitab = select(3, m_tu.find_bracket(pagetext, m_tu.brackets_temp, kanjitab))
if not kanjitab then error('ill-formed ] syntax') end
kanjitab = m_tu.parse_temp(kanjitab)
local readings = {}
local readings_len = {}
for i = 1, maxn(kanjitab.args) do
local r_i = kanjitab.args or ''
local r_o = kanjitab.args or ''
if kanjitab.args then
readings = kanjitab.args .. r_o
readings_len = tonumber(r_i:match'^%s*%D*(%d*)%s*$') or 1
else
local r_kana, r_len = r_i:match'^%s*(%D*)(%d*)%s*$'
readings = r_kana .. r_o
readings_len = tonumber(r_len) or 1
end
end
local kana_decom = {}
local reading_id = 1
local reading_len = 1
for i = 1, #non_kanji - 1 do
if reading_len <= 1 then
reading_len = readings_len or 1
insert(kana_decom, non_kanji)
insert(kana_decom, readings)
reading_id = reading_id + 1
else
reading_len = reading_len - 1
end
end
insert(kana_decom, non_kanji)
local function strip_nonkana(str, repl)
return ugsub(str, '+', repl) or nil
end
local xeno_reading = {strip_nonkana(kana, ''):match('^' .. strip_nonkana(concat(kana_decom), '(.-)') .. '$')}
if #xeno_reading > 0 then
local head_decom = {}
reading_id = 1
reading_len = 1
for i = 1, #non_kanji - 1 do
if reading_len <= 1 then
reading_len = readings_len or 1
insert(head_decom, head:sub(kanji_pos + 1, kanji_pos - 1))
insert(head_decom, head:sub(kanji_pos, kanji_pos))
reading_id = reading_id + 1
else
reading_len = reading_len - 1
end
end
insert(head_decom, head:sub(kanji_pos + 1))
if #head_decom ~= #kana_decom then error('number of parameters in ] is incorrect') end
local n_xeno_reading = 0
for i = 1, #kana_decom, 2 do
kana_decom = ugsub(kana_decom, '+', function()
n_xeno_reading = n_xeno_reading + 1
if xeno_reading == '' then return nil
else return xeno_reading end
end)
end
return concat(head_decom, '%'), concat(kana_decom, '%')
end
end
return head, kana
end
local en_grades = {
"first grade", "second grade", "third grade",
"fourth grade", "fifth grade", "sixth grade",
"secondary school", "jinmeiyō", "hyōgai"
}
local aliases = {
='tr', ='tr',
='in', ='in', ='in',
='1', ='2', ='irr'
}
local adverbs_optional_tag = 'optionally '
local adverbs_optional_aliases = {
='と', ='と', ='と',
='に', ='に', ='に',
}
local adverbs_optional_links = {
=']',
=']',
}
local function formatting_adjustments(rom, kana, pos_category)
-- hyphens for prefixes, suffixes, and counters (classifiers)
if pos_category == "prefixes" then
rom = rom:gsub('%-?$', '-')
elseif pos_category == "suffixes" or pos_category == "suffix forms" or pos_category == "counters" or pos_category == "classifiers" then
rom = rom:gsub('^%-?', '-')
elseif pos_category == "proper nouns" and not kana:match'%^' then -- automatic caps for proper nouns, if not already specified
rom = ugsub(ugsub(rom, '%f%l', string.uupper), "%w'%u", ulower) -- no caps after medial apostrophes
end
return rom
end
local function kana_to_romaji_with_pos_format(kana, data, args)
if data.headword.pos_category == "combining forms" or data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" then
return "-"
end
local rom = remove_links(kana_to_romaji(kana, data.lang_code))
-- make adjustments for -u verbs and -i adjectives
if args == '1' or args == '1s' or args == 'godan' then
rom = rom:gsub('ō$', 'ou'):gsub('ū$', 'uu')
elseif args == 'i' or args == 'is' or args == 'い' then
rom = rom:gsub('ī$', 'ii')
end
return formatting_adjustments(rom, kana, data.headword.pos_category)
end
local function iterate_rare_chars(text)
local ch, i
return function()
repeat
ch, i = umatch(text, "(*)()", i)
until not (ch and umatch(ch, "^$"))
return ch
end
end
local function historical_kana(data, hist_kana, modern_kana)
-- Disallow historical kana for kana and morae, as there's no one-to-one correspondence.
local pos = data.headword.pos_category
if pos == "syllables" or pos == "kana" or pos == "morae" then
error(("Cannot specify historical kana for %s."):format(pos))
end
local hist_kana_no_formatting = hist_kana:gsub("+", "")
local rare_chars, lang_name, hc = {}, data.lang_name, data.headword.categories
for ch in iterate_rare_chars(hist_kana_no_formatting) do
if not (modern_kana and modern_kana:find(ch)) then
rare_chars = true
end
end
for _, mora in ipairs(moraify((ugsub(hist_kana_no_formatting, "+", " ")))) do
if not (mora:gsub(" +", ""):match("^.?*$") or (modern_kana and modern_kana:find(mora))) then
rare_chars = true
end
end
for ch in pairs(rare_chars) do
insert(hc, lang_name .. " terms historically spelled with " .. ch)
end
insert(data.info_hist, require("Module:ja-link").link({
lang = data.headword.lang,
lemma = hist_kana,
tr = formatting_adjustments(
remove_links(kana_to_romaji(hist_kana, data.lang_code, nil, {hist = true})),
hist_kana,
pos
)
}, {
face = "head",
disableSelfLink = true,
}))
end
local function detect_pagename_kana(data, digraphs)
local pagename = data.pagename
-- Exclude "&" and "@", which are part of %p (e.g. リズム&ブルース).
local function remove_kana(m)
return m:match("") or ""
end
if ugsub(pagename, '', remove_kana) == "" then
return 'hira'
elseif ugsub(pagename, '', remove_kana) == "" then
return 'kata'
elseif ugsub(pagename, '', remove_kana) == "" then
return 'both'
end
end
-- go through args and build inflections by finding whatever kanas were given to us
local function format_headword(args, data)
local pagename, kanas, lang_name = data.pagename, data.kanas, data.lang_name
data.pagename_kana = detect_pagename_kana(data)
if args and not args:match'' then
-- filter out POS designations
remove(args, 1)
end
local linked_translit = data.headword.lang:link_tr(Jpan)
local suru_ending, rom_suru_ending
if data.headword.pos_category == "suru verbs" then
suru_ending = "]"
rom_suru_ending = linked_translit and " ]" or " suru"
else
suru_ending, rom_suru_ending = "", ""
end
if data.pagename_kana then -- pure-kana-title entry
if #args.head > 0 or args.head.default then
insert(data.headword.categories, lang_name .. " terms with redundant head parameter")
end
-- {{ja-xxx}} vs {{ja-xxx|こ.うし}} vs {{ja-xxx|コウシ}} in ]
if not args then
args = pagename
elseif remove_links(args:gsub("+", "")) ~= pagename then
insert(args, 1, pagename)
end
for i, k in ipairs(args) do
insert(data.headword.heads, {
term = k:gsub("+", "") .. suru_ending,
tr = '-',
l = args.label and {args.label} or nil,
})
end
for i = 1, math.max(args.rom.maxindex, 1) do
local rom = args.rom or args.rom.default or kana_to_romaji_with_pos_format(args, data, args)
if not data.headword.heads then
data.headword.heads = {term = data.headword.heads.term}
end
if rom == "-" then
data.headword.heads.tr = "-"
elseif linked_translit then
data.headword.heads.tr = "]" .. rom_suru_ending
else
data.headword.heads.tr = rom .. rom_suru_ending
end
if not data.inflection_base.form then
data.inflection_base.form = remove_links(args:gsub("+", "")) .. suru_ending
data.inflection_base.romaji = rom .. rom_suru_ending
end
end
kanas = pagename
if args.hist then
historical_kana(data, args.hist, args)
end
else -- non-pure-kana-title entry
if #args == 0 and not (data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" or data.headword.pos_category == "symbols") then
error("Kana form is required.")
end
if args.head.default == pagename then
insert(data.headword.categories, lang_name .. " terms with redundant head parameter")
end
local rom_repetition_final = {}
for i, k in ipairs(args) do
local rom_auto = kana_to_romaji_with_pos_format(k, data, args)
local head = args.head or args.head.default or pagename
if args.head == pagename then
insert(data.headword.categories, lang_name .. " terms with redundant head parameter")
end
local head_for_ruby, kana_for_ruby
if ulen(head) > 1 and head:match'%%' == nil and k:match'%%' == nil then
head_for_ruby, kana_for_ruby = assign_kana_to_kanji(head, k, pagename, data.lang_code .. '%-kanjitab')
else
head_for_ruby, kana_for_ruby = head, k
end
local format_table = m_ja_ruby.parse_text(head_for_ruby, kana_for_ruby, {
try = 'force',
try_force_limit = 10000
})
local kana_bare = remove_links(k:gsub("+", ""))
local rom = args.rom or args.rom.default or rom_auto
head = {
term = m_ja_ruby.to_wiki(format_table, {
break_link = true,
}):gsub('<rt>(..-)</rt>', "<rt>]</rt>") .. suru_ending,
l = args.label and {args.label} or nil,
}
if rom == "-" or rom_repetition_final then
head.tr = "-"
elseif linked_translit then
head.tr = "]" .. rom_suru_ending
else
head.tr = rom .. rom_suru_ending
end
insert(data.headword.heads, head)
rom_repetition_final = true
insert(kanas, kana_bare)
if args.hist then
historical_kana(data, args.hist, k)
end
if not data.inflection_base.form then
data.inflection_base.form = remove_links(m_ja_ruby.to_markup(format_table)) .. suru_ending
data.inflection_base.romaji = rom .. rom_suru_ending
end
end
local first_reading, multiple = kanas
if not first_reading then
return
end
first_reading = ulower(kana_to_romaji(first_reading, lang_code)):gsub("%%", "")
for i = 2, #kanas do
if ulower(kana_to_romaji(kanas, lang_code)):gsub("%%", "") ~= first_reading then
multiple = true
break
end
end
if not multiple then
local lang_code = data.lang_code
local content = mw.title.getCurrentTitle():getContent()
local loc1, loc2 = content:find("%f==%s*" .. lang_name:gsub("%-", "%%%-") .. "%s*==()")
loc2 = content:find("%f==+==", loc2)
if loc1 then
content = content:sub(loc1, loc2)
for template in require("Module:template parser").find_templates(content) do
local name, reading = template:get_name()
if (
name == lang_code .. "-head" or
name == lang_code .. "-pos"
) then
reading = template:get_arguments()
if reading ~= nil then
reading = remove_links(reading):gsub("%%", "")
end
elseif (
name == lang_code .. "-noun" or
name == lang_code .. "-verb" or
name == lang_code .. "-adj" or
name == lang_code .. "-phrase" or
name == lang_code .. "-verb form" or
name == lang_code .. "-verb-suru"
) then
reading = template:get_arguments()
if reading ~= nil then
reading = remove_links(reading):gsub("%%", "")
end
elseif name == lang_code .. "-see" then
reading = template:get_arguments()
if reading ~= nil then
reading = remove_links(reading):gsub("%%", "")
end
-- if umatch(reading, "") then
-- TODO: check linked page
-- end
end
if reading and ulower(kana_to_romaji(reading, lang_code)):gsub("%%", "") ~= first_reading then
multiple = true
end
end
end
end
if multiple then
insert(data.headword.categories, lang_name .. " terms with multiple readings")
end
end
end
local function add_transitivity(data, tr)
tr = aliases or tr
if tr == "tr" then
insert(data.info_mid, 'transitive')
insert(data.headword.categories, data.lang_name .. " transitive verbs")
elseif tr == "in" then
insert(data.info_mid, 'intransitive')
insert(data.headword.categories, data.lang_name .. " intransitive verbs")
elseif tr == "both" then
insert(data.info_mid, 'transitive or intransitive')
insert(data.headword.categories, data.lang_name .. " transitive verbs")
insert(data.headword.categories, data.lang_name .. " intransitive verbs")
else
insert(data.headword.categories, data.lang_name .. " verbs without transitivity")
end
end
local function get_final(lemma, data)
return kana_to_romaji(remove(moraify(m_ja_ruby.to_ruby(m_ja_ruby.parse_markup(lemma)))), data.lang_code)
end
local function add_inflections(data, inflection_type, cat_suffix)
local lemma = data.inflection_base.form
local romaji = data.inflection_base.romaji
inflection_type = aliases or inflection_type
local function replace_suffix(lemma_from, lemma_to, romaji_from, romaji_to)
-- e.g. 持って来る, lemma = "(も)って来(く)る"
-- lemma_from = "くる", lemma_to = {"き","きた"}
local result = {}
local pattern_from, n_from = lemma_from:gsub('.*', function(c)
return '(*)'
end)
pattern_from = pattern_from .. '$'
-- "(*)(*)$"
for i_lemma_to, s_lemma_to in ipairs(lemma_to) do
local n_to = 0
local pattern_to = s_lemma_to:gsub('.*', function(c)
if n_to < n_from then
n_to = n_to + 1
return c .. "%" .. n_to
else
return c
end
end)
for i = n_to + 1, n_from do
pattern_to = pattern_to .. "%" .. i
end
-- "き%1%2", "き%1た%2"
local lemma_inflected, success = ugsub(lemma, pattern_from, pattern_to)
if success == 0 then
return
end
local romaji_inflected
romaji_inflected, success = romaji:gsub(romaji_from .. "$", romaji_to)
if success == 0 then
romaji_inflected, success = romaji:gsub("%%]$", " .. "]]")
if success == 0 then
return
end
end
insert(result, {lemma = lemma_inflected, romaji = romaji_inflected})
end
return result -- {{lemma="(も)って来(き)",romaji="motteki"},{lemma="(も)って来(き)た",romaji="mottekita"}}
end
local function insert_form(label, ...)
-- label = "stem" or "past" etc.
-- ... = {lemma=...,romaji=...},{lemma=...,romaji=...}
local labeled_forms = {label = label}
for _, v in ipairs{...} do
local table_form = m_ja_ruby.parse_markup(v.lemma)
local form_term = m_ja_ruby.to_wiki(table_form)
if not form_term:find'%%]' then
form_term = ']'
end
insert(labeled_forms, {
term = form_term,
translit = v.romaji,
})
end
insert(data.headword.inflections, labeled_forms)
end
local inflected_forms
if data.lang_code == 'ja' then
if inflection_type == '1' or inflection_type == '1s' then
insert(data.info_mid, '<abbr title="godan (group 1) conjugation">godan</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " godan " .. cat_suffix)
local romaji = data.inflection_base.romaji
if cat_suffix == "verbs" then
local final = get_final(lemma, data)
insert(data.headword.categories, data.lang_name .. " godan " .. cat_suffix .. " ending with -" .. final)
if final == "ru" then
if umatch(romaji, "ru$") then
insert(data.headword.categories, data.lang_name .. " godan " .. cat_suffix .. " ending with -iru")
elseif umatch(romaji, "ru$") then
insert(data.headword.categories, data.lang_name .. " godan " .. cat_suffix .. " ending with -eru")
end
end
end
end
if inflection_type == '1' then
inflected_forms =
replace_suffix('く', {'き', 'いた'}, 'ku', {'ki', 'ita'}) or
replace_suffix('ぐ', {'ぎ', 'いだ'}, 'gu', {'gi', 'ida'}) or
replace_suffix('す', {'し', 'した'}, 'su', {'shi', 'shita'}) or
replace_suffix('つ', {'ち', 'った'}, 'tsu', {'chi', 'tta'}) or
replace_suffix('ぬ', {'に', 'んだ'}, 'nu', {'ni', 'nda'}) or
replace_suffix('ぶ', {'び', 'んだ'}, 'bu', {'bi', 'nda'}) or
replace_suffix('む', {'み', 'んだ'}, 'mu', {'mi', 'nda'}) or
replace_suffix('る', {'り', 'った'}, 'ru', {'ri', 'tta'}) or
replace_suffix('う', {'い', 'った'}, 'u', {'i', 'tta'})
if inflected_forms then
insert_form('stem', inflected_forms)
insert_form('past', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
else
inflected_forms =
replace_suffix('る', {'り', 'った', 'い'}, 'ru', {'ri', 'tta', 'i'}) or --くださる
replace_suffix('いく', {'いき', 'いった'}, 'iku', {'iki', 'itta'}) or --行く
replace_suffix('う', {'い', 'うた'}, 'ou', {'oi', 'ōta'}) --問う
if inflected_forms then
insert_form('stem', inflected_forms, inflected_forms)
insert_form('past', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
end
elseif inflection_type == '2' then
insert(data.info_mid, '<abbr title="ichidan (group 2) conjugation">ichidan</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " ichidan " .. cat_suffix)
local romaji = data.inflection_base.romaji
if umatch(romaji, "ru$") then
insert(data.headword.categories, data.lang_name .. " kami ichidan " .. cat_suffix)
elseif umatch(romaji, "ru$") then
insert(data.headword.categories, data.lang_name .. " shimo ichidan " .. cat_suffix)
else
insert(data.headword.categories, data.lang_name .. " irregular " .. cat_suffix)
end
end
inflected_forms = replace_suffix('る', {'', 'た'}, 'ru', {'', 'ta'})
if inflected_forms then
insert_form('stem', inflected_forms)
insert_form('past', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
elseif inflection_type == 'suru' then
insert(data.info_mid, '<abbr title="suru (group 3) conjugation">suru</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " suru " .. cat_suffix)
end
inflected_forms =
replace_suffix('する', {'し', 'した'}, 'suru', {'shi', 'shita'}) or
replace_suffix('ずる', {'じ', 'じた'}, 'zuru', {'ji', 'jita'})
if inflected_forms then
insert_form('stem', inflected_forms)
insert_form('past', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
elseif inflection_type == 'kuru' then
insert(data.info_mid, '<abbr title="kuru (group 3) conjugation">kuru</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " kuru " .. cat_suffix)
end
inflected_forms = replace_suffix('くる', {'き', 'きた'}, 'kuru', {'ki', 'kita'})
if inflected_forms then
insert_form('stem', inflected_forms)
insert_form('past', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
elseif inflection_type == 'i' or inflection_type == 'い' then
insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix)
end
inflected_forms = replace_suffix('い', {'く'}, 'i', {'ku'})
if inflected_forms then
insert_form('adverbial', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
elseif inflection_type == 'is' then
insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix)
end
inflected_forms = replace_suffix('いい', {'よく'}, 'ii', {'yoku'})
if inflected_forms then
insert_form('adverbial', inflected_forms)
else
require'Module:debug'.track'Jpan-headword/inflection failed/ja'
end
elseif inflection_type == 'na' or inflection_type == 'な' then
insert(data.info_mid, '<abbr title="-na (type II) inflection">-na</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " な-na " .. cat_suffix)
end
inflected_forms = replace_suffix('', {']', ']'}, '', {' ]', ' ]'})
insert_form('adnominal', inflected_forms)
insert_form('adverbial', inflected_forms)
elseif inflection_type == "yo" then
insert(data.info_mid, '<abbr title="yodan conjugation (classical)"><sup><small>†</small></sup>yodan</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " yodan " .. cat_suffix)
insert(data.headword.categories, data.lang_name .. " yodan " .. cat_suffix .. " ending with -" .. get_final(lemma, data))
end
elseif inflection_type == "kami ni" then
insert(data.info_mid, '<abbr title="kami nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " nidan " .. cat_suffix)
insert(data.headword.categories, data.lang_name .. " kami nidan " .. cat_suffix)
end
elseif inflection_type == "shimo ni" then
insert(data.info_mid, '<abbr title="shimo nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " nidan " .. cat_suffix)
insert(data.headword.categories, data.lang_name .. " shimo nidan " .. cat_suffix)
end
elseif inflection_type == "rahen" then
insert(data.info_mid, '<abbr title="r-special conjugation (classical)"><sup><small>†</small></sup>-ri</abbr>')
elseif inflection_type == "sahen" then
insert(data.info_mid, '<abbr title="s-special conjugation (classical)"><sup><small>†</small></sup>-se</abbr>')
elseif inflection_type == "kahen" then
insert(data.info_mid, '<abbr title="k-special conjugation (classical)"><sup><small>†</small></sup>-ko</abbr>')
elseif inflection_type == "nahen" then
insert(data.info_mid, '<abbr title="n-special conjugation (classical)"><sup><small>†</small></sup>-n</abbr>')
elseif inflection_type == "nari" or inflection_type == "なり" then
insert(data.info_mid, '<abbr title="-nari inflection (classical)"><sup><small>†</small></sup>-nari</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " なり-nari " .. cat_suffix)
end
elseif inflection_type == 'tari' or inflection_type == 'たり' then
insert(data.info_mid, '<abbr title="-tari inflection (classical)"><sup><small>†</small></sup>-tari</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " たり-tari " .. cat_suffix)
end
inflected_forms = replace_suffix('', {']', ']', ']', ']'}, '', {' ]', ' ]', ' ]', ' ]'})
insert_form('adnominal', inflected_forms, inflected_forms)
insert_form('adverbial', inflected_forms, inflected_forms)
elseif inflection_type == "ku" or inflection_type == "く" then
insert(data.info_mid, '<abbr title="-ku inflection (classical)"><sup><small>†</small></sup>-ku</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " く-ku " .. cat_suffix)
end
elseif inflection_type == "shiku" or inflection_type == "しく" then
insert(data.info_mid, '<abbr title="-shiku inflection (classical)"><sup><small>†</small></sup>-shiku</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " しく-shiku " .. cat_suffix)
end
elseif inflection_type == "ka" or inflection_type == "か" then
insert(data.info_mid, '<abbr title="-ka inflection (dialectal)"><sup><small>†</small></sup>-ka</abbr>')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " か-ka " .. cat_suffix)
end
elseif inflection_type and inflection_type:len() > adverbs_optional_tag:len() and inflection_type:sub(1, adverbs_optional_tag:len()) == adverbs_optional_tag then
adverbs_optional_list = inflection_type:sub(adverbs_optional_tag:len() + 1)
for option in gsplit(adverbs_optional_list, ':') do
normalized_option = adverbs_optional_aliases
if not normalized_option then
error('unrecognized adverb opt= argument: "' .. option .. '"')
end
normalized_option_romaji = kana_to_romaji(normalized_option, data.lang_code)
normalized_option_link = adverbs_optional_links
inflected_forms = replace_suffix('', {normalized_option_link}, '', {' ]'})
insert_form('optionally as', inflected_forms)
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " " .. cat_suffix .. " optionally taking " .. normalized_option .. "-" .. normalized_option_romaji)
end
end
elseif inflection_type == 'irr' then
insert(data.info_mid, 'irregular')
if cat_suffix then
insert(data.headword.categories, data.lang_name .. " irregular " .. cat_suffix)
end
elseif inflection_type == '-' or inflection_type == 'un' then
insert(data.info_mid, 'uninflectable')
end
--elseif data.lang_code == 'ryu' then ...
end
end
local function add_categories(data)
local lang_name = data.lang_name
local pagename = data.pagename
local tc = data.headword.categories
-- adds category terms spelled with jōyō kanji or terms spelled with non-jōyō kanji
-- (if it contains any kanji)
local number_of_kanji = 0
for c in ugmatch(pagename, "") do
number_of_kanji = number_of_kanji + 1
if c ~= "々" and c ~= "〻" then -- Not a kanji for the purposes of categorisation.
insert(tc, (lang_name .. " terms spelled with %s kanji"):format(en_grades))
end
end
-- categorize by number of kanji
if number_of_kanji ~= 0 then
insert(tc, (lang_name .. " terms with %s kanji"):format(number_of_kanji))
-- single-kanji terms
if ulen(pagename) == 1 then
insert(tc, lang_name .. " terms spelled with " .. pagename)
insert(tc, lang_name .. " single-kanji terms")
end
end
-- categorize by the script of the pagename or specific characters contained in it
-- if pagename is hiragana or katakana
if detect_pagename_kana(data, true) == 'hira' then insert(tc, lang_name .. " hiragana") end
if detect_pagename_kana(data, true) == 'kata' then insert(data.katakana_category, lang_name .. " katakana") end
local p, n = ugsub(pagename, '+', '')
if p ~= '' and n > 0 then insert(tc, lang_name .. " terms written in multiple scripts") end
local pos = data.headword.pos_category
local rare_chars = {}
for ch in iterate_rare_chars(pagename) do
rare_chars = true
end
-- Categorise yōon, but exclude kana and mora entries, since they can't be spelled with themselves.
-- FIXME: allow kana categories for morae.
if not (pos == "syllables" or pos == "kana" or pos == "morae") then
for _, mora in ipairs(moraify((ugsub(pagename, "+", " ")))) do
if not mora:gsub(" +", ""):match("^.?*$") then
rare_chars = true
end
end
end
for ch in pairs(rare_chars) do
insert(tc, lang_name .. " terms spelled with " .. ch)
end
if (
pos ~= "proverbs" and
pos ~= "phrases" and
umatch(ugsub(pagename, "+", ""), "") and
umatch(ugsub(pagename, "+", ""), "")
) then
insert(tc, lang_name .. " terms spelled with mixed kana")
end
end
pos_functions = function(args, data)
add_transitivity(data, args)
add_inflections(data, args, 'verbs')
end
pos_functions = function(args, data)
add_inflections(data, args)
end
pos_functions = function(args, data)
insert(data.headword.categories, data.lang_name .. " auxiliary verbs")
add_inflections(data, args)
data.headword.pos_category = "verbs"
end
pos_functions = function(args, data)
add_transitivity(data, args)
add_inflections(data, 'suru', 'verbs')
data.headword.pos_category = "verbs"
end
pos_functions = function(args, data)
add_inflections(data, args, 'adjectives')
end
pos_functions = function(args, data)
-- the counter (classifier) parameter, only relevant for nouns
local counter = args or ""
if counter == "-" then
insert(data.headword.inflections, {label = "uncountable"})
elseif counter ~= "" then
insert(data.headword.inflections, {label = "counter", counter})
end
end
pos_functions = function(args, data)
local opt = args
if opt then
opt = adverbs_optional_tag .. opt
end
add_inflections(data, opt, 'adverbs')
end
--[==[
Generate categories by pagename, also optionally by POS
Also for use in soft redirect pages (]).
Sortkey is not provided.
data = {
pagename = ..., -- (required)
lang = ..., -- (required) language object
categories = {}, -- (required) receive categories
katakana_category = {}, -- (required) receive katakana-sorted categories
pos = ..., "noun", "verb", etc. no POS categories if not given
}
]==]
function export.cat(data)
data.lang_name = data.lang:getCanonicalName()
data.pagename_kana = detect_pagename_kana(data)
if data.pos then
local pos = data.pos:gsub('x$', 'xe') .. 's'
insert(data.categories, data.lang_name .. ' ' .. pos)
insert(data.categories, data.lang_name .. ' ' .. require'Module:headword'.pos_lemma_or_nonlemma(pos, true) .. 's')
end
data.headword = { categories = data.categories }
add_categories(data)
end
--[==[
The main entry point.
This is the only function that can be invoked from a template.
]==]
function export.show(frame)
local poscat = frame.args or frame.args or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local params = {
= {list = true},
= {list = true, allow_holes = true, separate_no_index = true},
= {list = true, allow_holes = true, separate_no_index = true},
= {list = true, allow_holes = true},
= {list = true}, = {alias_of = 'hist'}, = {alias_of = 'hist'},
= {},
= {}, = {alias_of = 'infl'}, = {alias_of = 'infl'},
= {},
= {},
= {},
= {},
}
-- For backwards compatibility with uses of {{ja-syllable}} with the script parameter.
if poscat == "syllables" then
params = {}
end
local args = require('Module:parameters').process(frame:getParent().args, params)
local data = {
headword = {
pos_category = poscat,
categories = {},
heads = {},
no_redundant_head_cat = true,
inflections = {},
genders = {'m'}, -- placeholder
nogendercat = true
},
--custom info
pagename = args.pagename or mw.loadData("Module:headword/data").pagename,
pagename_kana = nil, -- "hira" "kata" "both", nil
lang_code = frame.args,
lang_name = nil, -- "Japanese", "Okinawan" ...
katakana_category = {},
info_mid = {}, -- "godan", "intransitive" ...
info_hist = {}, -- historical kana
inflection_base = {}, -- base of inflections
kanas = {}, -- kana id
}
data.headword.lang = require("Module:languages").getByCode(data.lang_code)
data.lang_name = data.headword.lang:getCanonicalName()
-- sort out all the kanas and do the romanization business
format_headword(args, data)
-- add certain inflections and categories for adjectives, verbs, nouns, or adverbs
if pos_functions then
pos_functions(args, data)
end
-- categories
add_categories(data)
local sort_base = args.sort or data.kanas or data.pagename
data.headword.sort_key = data.headword.lang:makeSortKey(sort_base)
local katakana_category = #data.katakana_category > 0 and
require("Module:utilities").format_categories(
data.katakana_category,
data.headword.lang,
nil,
sort_base,
nil,
require("Module:scripts").getByCode("Kana")
) or ""
-- output
local i_kanas = 0
return katakana_category .. require('Module:headword').full_headword(data.headword):gsub('<span class="gender">.-</span>', function()
return (#data.info_hist > 0 and '<sup>←' .. concat(data.info_hist, ' or ') .. '<sup>]</sup></sup>' or '') .. ('<i>' .. concat(data.info_mid, ' ') .. '</i>')
end):gsub('<strong .->.-</strong>', function(m0)
i_kanas = i_kanas + 1
if data.kanas then
return m0
end
end):gsub('<span class="headword%-tr tr" dir="ltr"><span class="Latn" lang="ja">', '<span lang="ja-Latn" class="headword-tr tr Latn" dir="ltr">'):gsub('</span></span>', '</span>')
end
return export