Japanese pronunciation template.
See Template:ja-pron for usage and examples.
local m_str_utils = require("Module:string utilities")
local concat = table.concat
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local split = m_str_utils.split
local sub = m_str_utils.sub
local lang = require("Module:languages").getByCode("ja")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_accent = require("Module:accent qualifier")
-- also ]
local PAGENAME = mw.loadData("Module:headword/data").pagename
local export = {}
local ref_template_name_data = {
= 'R:Daijirin',
= 'R:Daijirin4',
= 'R:Daijisen',
= 'R:Kokugo Dai Jiten',
= 'R:NHK Hatsuon',
= 'R:Nihon Kokugo Daijiten 2 Online',
= 'R:Shinmeikai2',
= 'R:Shinmeikai5',
= 'R:Shinmeikai7',
= 'R:Zenkoku Akusento Jiten',
= 'R:Kenkyusha JEL Pocket',
= 'R:ja:JAccent',
}
local function add_acc_refs(text)
local output = {}
for ref_name in gsplit(text, ',') do
mw.log(ref_name)
local ref_template_name = ref_template_name_data
if ref_template_name then
insert(output, mw.getCurrentFrame():extensionTag{
name = 'ref',
args = { name = ref_name },
content = '{{' .. ref_template_name .. '}}',
})
elseif match(ref_name, 'ref') then
insert(output, mw.getCurrentFrame():preprocess(ref_name))
else
-- ]
require("Module:debug").track("ja-pron/unrecognized ref")
end
end
return concat(output)
end
function export.show(frame)
local params = {
= {default = PAGENAME, list = true},
= {list = true},
= {list = true},
= {list = true, allow_holes = true},
= {list = true, allow_holes = true},
= {alias_of = "accent", list = true},
= {alias_of = "accent\1_loc", list = true},
= {alias_of = "accent\1_ref", list = true},
= {alias_of = "accent\1_note", list = true},
= {},
= {},
= {},
= {alias_of = "audio"},
= {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local au = args.audio
local dev = args.dev or args.devm
local dev2 = args.dev2
local maxindex = table.getn(args)
local html_list_main = mw.html.create('ul')
-- Deals with the accents
local a, al, ar, an = args.accent, args.accent_loc, args.accent_ref, args.accent_note
local no_acc = true
for i, position in ipairs(a) do
local result
no_acc = false
local text = args
if not al then
al = "]"
end
result = m_accent.format_qualifiers(lang, {al}) .. " "
result = result .. export.accent(text, position, dev, dev2)
if ar then
result = result .. add_acc_refs(ar)
else
require("Module:debug").track("ja-pron/unsourced accent")
end
result = result .. (an and (" " .. an) or "")
html_list_main:tag('li'):wikitext(
result
)
end
if no_acc then
-- track when entries have no pitch information
require("Module:debug").track("ja-pron/no accent")
end
-- Deals with the IPA
local m_IPA = require("Module:IPA")
for _, text in ipairs(args) do
local sortkey = (lang:makeSortKey(text))
html_list_main:tag('li'):wikitext(
m_IPA.format_IPA_full {
lang = lang,
items = {{ pron = "" }},
sort_key = sortkey,
}
)
end
-- Deals with the audio
if au then
html_list_main:tag('li'):wikitext(
require("Module:audio").format_audio {
lang = lang,
file = au,
sort = (lang:makeSortKey(args))
}
)
end
return '\n' .. tostring(html_list_main)
end
function export.ipa(text, dev, dev2)
if type(text) == "table" then
text, dev, dev2 = text.args, text.args, text.args end
dev = dev or ""
dev2 = dev2 or ""
if dev2 ~= "" then error('Please remove parameter dev2 and change parameter dev to \"dev=' .. dev .. ',' .. dev2 .. '"') end
local position_mora = {}
for i=1,len(text) do
if not match(sub(text,i,i), "") then
if sub(text,i+1,i+1) and match(sub(text,i+1,i+1), "") then
insert(position_mora, i+1)
else
insert(position_mora, i)
end
end
end
-- insert @ to stand for devoicing
if dev ~= "" then
for position in gsplit(dev,",") do
position = tonumber(position)
if #position_mora == position then
text = text .. "@"
else
local position_devspace = position_mora
text = sub(text, 1, position_devspace) .. "@" .. sub(text, position_devspace+1, -1)
end
for i=position+1,#position_mora do
position_mora = position_mora + 1
end
end
end
text = kana_to_romaji(text, "ja", nil, {keep_period = true})
text = gsub(text, "", {
= "r", = "b", = "ʔ",
= "̥", = "ː"
})
-- Hyphens which have been geminated over are removed; otherwise converted to dots.
text = gsub(text, "()%-%1", "%1%1")
:gsub("-", ".")
text = gsub(text, "?", {
= "p̚p", = "t̚ch", = "k̚k", = "b̚b̥", = "d̚j",
= "d̚d̥", = "g̚g̊", = "d̚z", = "t̚t", = "t̚ts",
= "r̚r", = "ɕː" })
text = gsub(text, "ei", "ē")
text = gsub(text, "", {
= "aː", = "eː", = "iː", = "oː", = "uː",
= "ɸ", = "d͡ʑ", = "ɾ", = "j", = "d͡z" })
text = gsub(text, "", {
= "ɕ",
= "t͡ɕ",
= "t͡s" })
text = gsub(text, "()d͡()", "%1%2")
text = gsub(text, "(?)i", "%1ʲi")
text = gsub(text, "(?)j", "%1ʲ")
text = gsub(text, "nʲ", "ɲ̟")
text = gsub(text, "()(ː?)n()", "%1̃%2n%3")
for _, args in pairs{
{ "(ː?)n$", "̃%1ɴ" },
{ "n( ?)()", "m%1%2" },
{ "n( ?)(.͡)", "ɲ̟%1%2" },
{ "n( ?)ɲ̟", "ɲ̟%1ɲ̟" },
{ "n( ?)()(ʲ?)", "ŋ%1%3%2%3" },
{ "n( ?)()", "ɰ̃%1%2" },
{ "nʔ", "ɰ̃" },
{ "n ()", "ɰ̃ %1" },
} do
text = gsub(text, args, args)
end
text = gsub(text, "h", {
= "çi", = "ç",
= "ɸu" })
text = gsub(text, "h()", "%1%1")
text = gsub(text, "()%1", "%1ː")
text = gsub(text, "ːʲ", "ʲː")
text = gsub(text, "̚(.?)ʲ", "̚ʲ%1ʲ")
text = gsub(text, "", {
= "a̠",
= "e̞",
= "o̞",
= "ɯ̟ᵝ",
= "ɰᵝ"})
text = gsub(text, "()ɯ̟", "%1ɨ")
text = gsub(text, "ᵝ̥", "̥ᵝ")
text = gsub(text, "ᵝ̃", "̃ᵝ")
text = gsub(text, "̠", "̥̃˗")
text = gsub(text, "̞", "̥̃˕")
text = gsub(text, "̟", "̥̃˖")
text = gsub(text, "()̥", "%1̊")
text = gsub(text, "%.", "")
text = gsub(text, "'", ".")
text = gsub(text, "g", "ɡ")
return text
end
function export.rise_and_fall(word, rftype)
word = gsub(word, "()()", "%1.%2")
word = gsub(word, "()()", "%1.%2")
word = kana_to_romaji(word, "ja")
if rftype == "rise" then
word = gsub(word, ".", {
= "á", = "é", = "í", = "ó", = "ú",
= "áá", = "éé", = "íí", = "óó", = "úú" })
word = gsub(gsub(word, "n()", "ń%1"), "n$", "ń")
elseif rftype == "fall" then
word = gsub(word, ".", {
= "à", = "è", = "ì", = "ò", = "ù",
= "àà", = "èè", = "ìì", = "òò", = "ùù" })
word = gsub(gsub(word, "n()", "ǹ%1"), "n$", "ǹ")
else
return error("Type not recognised.")
end
return word
end
-- ] and ] rely on the output format of this function
function export.accent(text, class, dev, dev2)
local result
if(type(text)) == "table" then text, class, dev, dev2 = text.args, text.args, text.args, text.args end
text = gsub(text, "()", "%1ー")
text = gsub(text, "()", "%1ー")
text = gsub(text, "%.", "")
if dev == "" then dev = false end
if dev2 == "" then dev2 = false end
local down_first = "<span style=\"border-top:1px solid black;position:relative;padding:1px;\">"
local down_last = "<span style=\"position:absolute;top:0;bottom:67%;right:0%;border-right:1px solid black;\">​</span></span>"
local high_first = "<span style=\"border-top:1px solid black\">"
local start = "<span lang=\"ja\" class=\"Jpan\">"
local romaji_start = " <span class=\"Latn\"><samp>["
local romaji_last = "]</samp></span> "
local last = "</span>"
local position_kana = {} --position of each kana (ぁ counted), text without space
local position_mora = {} --position of each mora (ぁ not counted), text without space
local position_mora_space = {} --position of each mora (ぁ not counted), text with space
for i=1, len(text) do
if not match(sub(text,i,i), "") then
local extra = len(match(sub(text,i+1), "^*"))
insert(position_mora_space, i+extra)
end
end
local space_removed = gsub(text," ","")
for i=1, len(space_removed) do
insert(position_kana, i)
if not match(sub(space_removed,i,i), "") then
local extra = len(match(sub(space_removed,i+1), "^*"))
insert(position_mora, i+extra)
end
end
local acc_type, acc_number
if match(class, "^$") then
acc_type, acc_number = "h", 0
elseif match(class, "^$") then
acc_type, acc_number = "a", 1
elseif match(class, "^o$") then
acc_type = "o"
acc_number = len(gsub(text, "", ""))
end
if match(class, "^+$") and not match(class,"^$") then
class = gsub(class, "", "")
acc_number = tonumber(class)
local morae_count = len(gsub(text, "", ""))
if morae_count == acc_number then
acc_type = "o"
elseif morae_count < acc_number then
return error(("Mora count (%d) is smaller than position of downstep mora (%d).")
:format(morae_count, acc_number))
else
acc_type = "n"
end
elseif not acc_number then
acc_number = class
end
local start_index = 1
while match(sub(text, start_index+1, start_index+1), "") do
start_index = start_index + 1
end
local kanas = {}
local single_mora
for i=1, len(text) do
if not match(sub(text,i,i), "") then
single_mora = gsub(sub(text, i, -1), "^(.*).*", "%1")
insert(kanas, single_mora)
end
end
local function kana_devoice(text)
return '<span style="border:1px dotted gray; border-radius:50%;">' .. text .. "</span>"
end
if dev then
for position in gsplit(dev, ",") do
position = tonumber(position)
kanas = kana_devoice(kanas)
end
end
local romaji_text = gsub(text, "()ー", "%1お")
romaji_text = gsub(romaji_text, "()ー", "%1え")
romaji_text = gsub(romaji_text, "()ー", "%1う")
romaji_text = gsub(romaji_text, "()ー", "%1い")
romaji_text = gsub(romaji_text, "()ー", "%1あ")
local romajis = split(romaji_text, "")
local function count_nspaces(text, index)
local i, sample, nspaces = 0, "", 0
while len(sample) < index do
i = i + 1
sample, nspaces = gsub(sub(text, 1, i), " ", "")
end
return nspaces
end
local function romaji_devoice(text)
-- use @ instead of ̥
return text .. "@"
end
if dev then
for position in gsplit(dev,",") do
position = position_mora_space
romajis = romaji_devoice(romajis)
end
end
if acc_type == "n" then
local r_start_index = start_index + count_nspaces(romaji_text, start_index)
local r_index = position_mora_space
local k_index = acc_number
local r_parts = {
= concat(romajis, "", 1, r_start_index),
= concat(romajis, "", r_start_index + 1, r_index),
= concat(romajis, "", r_index + 1, #romajis)
}
local k_parts = {
= concat(kanas, "", 1, 1),
= concat(kanas, "", 2, k_index),
= concat(kanas, "", k_index + 1, #kanas)
}
local space2 = ""
local space3 = ""
if sub(r_parts, 1, 1) == " " then
space2 = " "
end
if sub(r_parts, 1, 1) == " " then
space3 = " "
end
result = start ..
k_parts ..
down_first ..
k_parts ..
down_last ..
k_parts ..
last ..
romaji_start ..
export.rise_and_fall(r_parts, "fall") ..
space2 ..
export.rise_and_fall(r_parts, "rise") ..
"ꜜ" ..
space3 ..
export.rise_and_fall(r_parts, "fall") ..
romaji_last ..
"(] – )"
else
local r_start_index = start_index + count_nspaces(romaji_text, start_index)
local r_parts = {
= concat(romajis, "", 1, r_start_index),
= concat(romajis, "", r_start_index + 1, #romajis)
}
local k_parts = {
= concat(kanas, "", 1, 1),
= concat(kanas, "", 2, #kanas)
}
local space2 = ""
if sub(r_parts, 1, 1) == " " then
space2 = " "
end
if acc_type == "h" then
result = start ..
k_parts ..
high_first ..
k_parts ..
last ..
last ..
romaji_start ..
export.rise_and_fall(r_parts, "fall") ..
space2 ..
export.rise_and_fall(r_parts, "rise") ..
romaji_last ..
"(] – )"
elseif acc_type == "a" then
result = start ..
down_first ..
k_parts ..
down_last ..
k_parts ..
last ..
romaji_start ..
export.rise_and_fall(r_parts, "rise") ..
"ꜜ" ..
space2 ..
export.rise_and_fall(r_parts, "fall") ..
romaji_last ..
"(] – )"
elseif acc_type == "o" then
result = start ..
k_parts ..
down_first ..
k_parts ..
down_last ..
last ..
romaji_start ..
export.rise_and_fall(r_parts, "fall") ..
space2 ..
export.rise_and_fall(r_parts, "rise") ..
"ꜜ" ..
romaji_last ..
"(] – )"
else
return error("Accent type not recognised.")
end
end
result = gsub(result, "(.)@", "<del>%1</del>")
return result
end
return export