Module:ja-pron

The following documentation is located at Module:ja-pron/documentation. Categories were auto-generated by Module:module categorization.
Useful links: subpage list • links • transclusions • testcases • sandbox
Japanese pronunciation template.
See Template:ja-pron for usage and examples.
local m_str_utils = require("Module:string utilities")

local concat = table.concat
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local min = math.min
local split = m_str_utils.split
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC

local lang = require("Module:languages").getByCode("ja")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_accent = require("Module:accent qualifier")
-- also ]
local PAGENAME = mw.loadData("Module:headword/data").pagename

local range = mw.loadData("Module:ja/data/range")
local a_kana = range.vowels.a
local i_kana = range.vowels.i
local u_kana = range.vowels.u
local e_kana = range.vowels.e
local o_kana = range.vowels.o
local n_kana = range.vowels.n
local submoraic_kana = range.submoraic_kana

local _accent

local export = {}

local ref_template_name_data = {
	 = "R:Daijirin",
	 = "R:Daijirin4",
	 = "R:Daijisen",
	 = "R:Kokugo Dai Jiten",
	 = "R:NHK Hatsuon",
	 = "R:NHK2016",
	 = "R:ja:Nihon Kokugo Daijiten 2 Online",
	 = "R:ja:Nihon Kokugo Daijiten 2 Print",
	 = "R:Shinmeikai2",
	 = "R:Shinmeikai5",
	 = "R:Shinmeikai7",
	 = "R:Shinmeikai8",
	 = "R:Sankoku8",
	 = "R:Zenkoku Akusento Jiten",
	 = "R:Kenkyusha JEL Pocket",
	 = "R:ja:JAccent",
}

local function add_acc_refs(frame, text)
	local output = {}
	for ref_name in gsplit(text, ",") do
		mw.log(ref_name)
		local ref_template_name = ref_template_name_data
		if ref_template_name then
			insert(output, frame:extensionTag("ref", "{{" .. ref_template_name .. "}}", {name = ref_name}))
		elseif match(ref_name, "ref") then
			insert(output, frame:preprocess(ref_name))
		else
			-- ]
			require("Module:debug").track("ja-pron/unrecognized ref")
		end
	end
	return concat(output)
end

function export.show(frame)
	local params = {
		 = {default = PAGENAME, list = true},
		
		 = {list = true},
		 = {list = true, allow_holes = true},
		 = {list = true, allow_holes = true},
		 = {list = true, allow_holes = true},
		
		 = {alias_of = "accent", list = true},
		 = {alias_of = "accent\1_loc", list = true},
		 = {alias_of = "accent\1_ref", list = true},
		 = {alias_of = "accent\1_note", list = true},
		
		 = {},
		 = {},
		 = {},
		
		 = {alias_of = "audio"},
		 = {}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local au = args.audio
	local dev = args.dev or args.devm
	local dev2 = args.dev2
	local maxindex = #args	-- replace with args.maxindex if 1 gets allow_holes=true
	local html_list_main = mw.html.create("ul")
	local categories = {}
	
	-- Deals with the accents
	local a, al, ar, an = args.accent, args.accent_loc, args.accent_ref, args.accent_note
	for i, position in ipairs(a) do
		local text = args
		local al_is_tokyo_default = false
		if not al then
			al_is_tokyo_default = true
			al = "]"
		end
		local result = m_accent.format_qualifiers(lang, {al}) .. " "
		
		local acc_wikitext, acc_type = _accent(text, position, dev, dev2)
		
		result = result .. acc_wikitext
		if ar then
			result = result .. add_acc_refs(frame, ar)
		else
			require("Module:debug").track("ja-pron/unsourced accent")
		end
		result = result .. (an and (" " .. an) or "")
		
		if al_is_tokyo_default then
			if acc_type == "h" then
				insert(categories, lang:getCanonicalName() .. " terms with Heiban pitch accent (Tōkyō)")
			elseif acc_type == "a" then
				insert(categories, lang:getCanonicalName() .. " terms with Atamadaka pitch accent (Tōkyō)")
			elseif acc_type == "n" then
				insert(categories, lang:getCanonicalName() .. " terms with Nakadaka pitch accent (Tōkyō)")
			elseif acc_type == "o" then
				insert(categories, lang:getCanonicalName() .. " terms with Odaka pitch accent (Tōkyō)")
			end
		end

		html_list_main:tag("li"):wikitext(
			result
		)
	end
	
	-- Deals with the IPA
	local m_IPA, no_acc = require("Module:IPA"), false
	for i, text in ipairs(args) do
		html_list_main:tag("li"):wikitext(
			m_IPA.format_IPA_full {
				lang = lang,
				items = {{ pron = "" }},
			}
		)
		if not a then
			no_acc = true
		end
	end
	
	if no_acc then
		insert(categories, lang:getCanonicalName() .. " terms with IPA pronunciation missing pitch accent")
	end
	
	-- Deals with the audio
	if au then
		html_list_main:tag("li"):wikitext(
			require("Module:audio").format_audio {
				lang = lang,
				file = au,
			}
		)
	end
	
	return "\n" .. tostring(html_list_main) ..
		(#categories > 0 and require("Module:utilities").format_categories(categories, lang) or "")
end

function export.ipa(text, dev, dev2)
	if type(text) == "table" then
		text, dev, dev2 = text.args, text.args, text.args end
	dev = dev or ""
	dev2 = dev2 or ""
	
	if dev2 ~= "" then error("Please remove parameter dev2 and change parameter dev to \"dev=" .. dev .. "," .. dev2 .. "\"") end
	
	-- Convert 〜 and 〰 to a regular ー.
	text = gsub(text, "", "ー")
	
	local position_mora = {}
	
	for i = 1, len(text) do
		if not match(sub(text, i, i), "") then
			local nxt = sub(text, i + 1,i + 1)
			if nxt and match(nxt, "") then
				insert(position_mora, i + 1)
			else
				insert(position_mora, i)
			end
		end
	end
	
	-- insert @ to stand for devoicing
	if dev ~= "" then
		for position in gsplit(dev, ",") do
			position = tonumber(position)
			if #position_mora == position then
				text = text .. "@"
			else
				local position_devspace = position_mora
				text = sub(text, 1, position_devspace) .. "@" .. sub(text, position_devspace+1, -1)
			end
			for i = position + 1, #position_mora do
				position_mora = position_mora + 1
			end
		end
	end
	
	text = toNFC(kana_to_romaji(text, "ja", nil, {keep_dot = true, disambig = true}))
	
	text = gsub(text, "", {
		 = "r",  = "b",  = "ʔ",  = "̥"
	})
	
	-- Hyphens which have been geminated over are removed; otherwise converted to dots.
	text = gsub(text, "()%-%1", "%1%1")
		:gsub("-", ".")
	
	text = text:gsub("()(%s*)%1", "%1̚%2%1")
		:gsub("t(%s*)ch", "t̚%1ch")
		:gsub("()(%s*)%1", "%1̚%2%1̥")
		:gsub("g(%s*)g", "g̚%1g̊")
		:gsub("()(%s*)%1", "d̚%2%1")
		:gsub("s(%s*)sh", "ɕ%1ɕ")
	
	text = gsub(text, "ei", "ē")
	text = gsub(text, "", {
		 = "aː",  = "eː",  = "iː",  = "oː",  = "uː", 
		 = "ɸ",  = "d͡ʑ",  = "ɾ",  = "j",  = "d͡z" })
	
	text = gsub(text, "", {
		 = "ɕ", 
		 = "t͡ɕ", 
		 = "t͡s" })
	
	text = gsub(text, "()d͡()", "%1%2")

	text = gsub(text, "(*)i", "%1ʲi")
	text = gsub(text, "(*)j", "%1ʲ")
	text = gsub(text, "(*)w", "%1ᵝ")
	text = gsub(text, "nʲ", "ɲ̟")
	text = gsub(text, "n+$", function(n)
		return ("ɴ"):rep(#n)
	end)
	text = gsub(text, "()(ː*)ɴ", "%1̃%2ɴ")
	text = gsub(text, "()(ː*)n()", "%1̃%2n%3")
	text = gsub(text, "n*", function(m)
		return m:gsub("n", "m")
	end)
	text = gsub(text, "n*.͡", function(m)
		return m:gsub("n", "ɲ̟")
	end)
	text = gsub(text, "n*ɲ̟", function(m)
		return m:gsub("n", "ɲ̟")
	end)
	text = gsub(text, "n*ɾ", function(m)
		return m:gsub("n", "n̺")
	end)
	text = gsub(text, "_ng", "ŋ")
	text = gsub(text, "(n*)()(*)", function(m1, m2, m3)
		return m1:gsub("n", "ŋ" .. m3) .. m2 .. m3
	end)
	text = gsub(text, "_nw", "nᵝ")
	text = gsub(text, "n*", function(m)
		return m:gsub("n", "ɰ̃")
	end)
	text = gsub(text, "(*n)", function(m) -- ʔ to be removed once Hrkt-translit `disambig` flag is implemented, as ʔ will always represent a glottal
		return m:gsub("n", "ɰ̃")
	end)
	text = gsub(text, "n* ", function(m)
		return m:gsub("n", "ɰ̃")
	end)
	
	text = gsub(text, "h", {
		 = "çi",  = "ç",
		 = "ɸu"
	})
	
	text = gsub(text, "h()", "%1%1")
	for _, cons in ipairs{"ç", "ɕ", "ɸ", "h", "j", "m", "n", "ɴ", "ŋ", "ɾ", "s", "w", "z", "ʑ"} do
		text = gsub(text, "(" .. cons .. ")()" .. cons .. "+()", function(cons, i, j)
			return cons .. ("ː"):rep(j - i)
		end)
	end
	for _, cons in ipairs{"n̺", "nᵝ", "ɲ̟", "ŋʲ", "ŋᵝ", "ɰ̃"} do
		local char1, char2 = cons:match("(.*)(.*)")
		text = gsub(text, "(" .. cons .. ")()" .. char1 .. "*" .. char2 .. "()", function(cons, i, j)
			return cons .. ("ː"):rep((j - i) / 2)
		end)
	end
	text = gsub(text, "(ː+)(+)", "%2%1")
	text = gsub(text, "̚(.*)(+)", "̚%2%1%2")
	text = gsub(text, "", {
		 = "a̠", 
		 = "e̞", 
		 = "o̞", 
		 = "ɯ̟", 
		 = "β̞"
	})
	
	text = gsub(text, "(*ː*)ɯ̟", "%1ɨ")
	
	text = gsub(text, "̠", "̥̃˗")
	text = gsub(text, "̞", "̥̃˕")
	text = gsub(text, "̟", "̥̃˖")
	text = gsub(text, "()̥", "%1̊")
		:gsub("", "")
		:gsub("g", "ɡ")
	
	return text
end

function export.rise_and_fall(word, rftype)
	word = gsub(word, "(?)()", "%1.%2")
	word = gsub(word, "(?)()", "%1.%2")
	word = kana_to_romaji(word, "ja")

	if rftype == "rise" then
		word = gsub(word, ".", {
			 = "á",  = "é",  = "í",  = "ó",  = "ú", 
			 = "áá",  = "éé",  = "íí",  = "óó",  = "úú" })
	
		word = gsub(gsub(word, "n()", "ń%1"), "n$", "ń")
		
	elseif rftype == "fall" then
		word = gsub(word, ".", {
			 = "à",  = "è",  = "ì",  = "ò",  = "ù", 
			 = "àà",  = "èè",  = "ìì",  = "òò",  = "ùù" })
		
		word = gsub(gsub(word, "n()", "ǹ%1"), "n$", "ǹ")
		
	else
		return error("Type not recognised.")
	end
	
	return word
end

-- ] and ] rely on the output format of this function
function export.accent(text, class, dev, dev2)
	return select(1, _accent(text, class, dev, dev2))
end

_accent = function(text, class, dev, dev2)
	local result

	if(type(text)) == "table" then text, class, dev, dev2 = text.args, text.args, text.args, text.args end
	text = gsub(text, "(?)", "%1ー")
	text = gsub(text, "(?)", "%1ー")
	text = gsub(text, "%.", "")
	if dev == "" then dev = false end
	if dev2 == "" then dev2 = false end
	
	local down_first = "<span style=\"border-top:1px solid;position:relative;padding:1px;\">"
	local down_last = "<span style=\"position:absolute;top:0;bottom:67%;right:0%;border-right:1px solid;\"></span></span>"
	local high_first = "<span style=\"border-top:1px solid\">"
	local start = "<span lang=\"ja\" class=\"Jpan\">"
	local romaji_start = " <span class=\"Latn\"><samp>["
	local romaji_last = "]</samp></span> "
	local last = "</span>"
	
	local position_kana = {}       --position of each kana (ぁ counted), text without space
	local position_mora = {}       --position of each mora (ぁ not counted), text without space
	local position_mora_space = {} --position of each mora (ぁ not counted), text with space
	
	for i=1, len(text) do
		if not match(sub(text,i,i), "") then
			local extra = len(match(sub(text,i+1), "^*"))
			insert(position_mora_space, i+extra)
		end
	end
	local space_removed = gsub(text," ","")
	for i=1, len(space_removed) do
		insert(position_kana, i)
		if not match(sub(space_removed,i,i), "") then
			local extra = len(match(sub(space_removed,i+1), "^*"))
			insert(position_mora, i+extra)
		end
	end
	
	local acc_type, acc_number
	if match(class, "^$") then
		acc_type, acc_number = "h", 0
		
	elseif match(class, "^$") then
		acc_type, acc_number = "a", 1
	
	elseif match(class, "^o$") then
		acc_type = "o"
		acc_number = len(gsub(text, "", ""))
	end
	
	if match(class, "^+$") and not match(class,"^$") then
		class = gsub(class, "", "")
		acc_number = tonumber(class)
		
		local morae_count = len(gsub(text, "", ""))
		
		if morae_count == acc_number then
			acc_type = "o"
		elseif morae_count < acc_number then
			return error(("Mora count (%d) is smaller than position of downstep mora (%d).")
				:format(morae_count, acc_number))
		else
			acc_type = "n"
		end
	elseif not acc_number then
		acc_number = class
	end
	
	local start_index = 1
	while match(sub(text, start_index+1, start_index+1), "") do
		start_index = start_index + 1
	end
	
	local kanas = {}
	local single_mora
	for i=1, len(text) do
		if not match(sub(text,i,i), "") then
			single_mora = gsub(sub(text, i, -1), "^(.*).*", "%1")
			insert(kanas, single_mora)
		end
	end
	
	local function kana_devoice(text)
		return "<span style=\"border:1px dotted gray; border-radius:50%;\">" .. text .. "</span>"
	end
	
	if dev then
		for position in gsplit(dev, ",") do
			position = tonumber(position)
			kanas = kana_devoice(kanas)
		end
	end
	
	local romaji_text = gsub(text, "(?)ー", "%1お")
	romaji_text = gsub(romaji_text, "(?)ー", "%1え")
	romaji_text = gsub(romaji_text, "(?)ー", "%1う")
	romaji_text = gsub(romaji_text, "(?)ー", "%1い")
	romaji_text = gsub(romaji_text, "(?)ー", "%1あ")
	romaji_text = gsub(romaji_text, "(?)ー", "%1%1")
	
	local romajis = split(romaji_text, "")
	
	local function count_nspaces(text, index)
		local i, sample, nspaces = 0, "", 0
		while len(sample) < index do
			i = i + 1
			sample, nspaces = gsub(sub(text, 1, i), " ", "")
		end
		return nspaces
	end
	
	local function romaji_devoice(text)
		-- use @ instead of  ̥
		return text .. "@"
	end
	
	if dev then
		for position in gsplit(dev,",") do
			position = position_mora_space
			romajis = romaji_devoice(romajis)
		end
	end
	
	if acc_type == "n" then
		local r_start_index = start_index + count_nspaces(romaji_text, start_index)
		local r_index = position_mora_space
		local k_index = acc_number
		
		local r_parts = {
			 = concat(romajis, "", 1, r_start_index),
			 = concat(romajis, "", r_start_index + 1, r_index),
			 = concat(romajis, "", r_index + 1, #romajis)
		}
		
		local k_parts = {
			 = concat(kanas, "", 1, 1),
			 = concat(kanas, "", 2, k_index),
			 = concat(kanas, "", k_index + 1, #kanas)
		}
		
		local space2 = ""
		local space3 = ""
		if sub(r_parts, 1, 1) == " " then
			space2 = " "
		end
		if sub(r_parts, 1, 1) == " " then
			space3 = " "
		end
		
		result = start .. 
			k_parts .. 
			down_first .. 
			k_parts .. 
			down_last .. 
			k_parts .. 
			last .. 
			romaji_start .. 
			export.rise_and_fall(r_parts, "fall") .. 
			space2 ..
			export.rise_and_fall(r_parts, "rise") .. 
			"ꜜ" .. 
			space3 ..
			export.rise_and_fall(r_parts, "fall") .. 
			romaji_last .. 
			"(] – )"
		
	else
		local r_start_index = start_index + count_nspaces(romaji_text, start_index)
		local r_parts = {
			 = concat(romajis, "", 1, r_start_index),
			 = concat(romajis, "", r_start_index + 1, #romajis)
		}
		
		local k_parts = {
			 = concat(kanas, "", 1, 1),
			 = concat(kanas, "", 2, #kanas)
		}
		
		local space2 = ""
		if sub(r_parts, 1, 1) == " " then
			space2 = " "
		end
			
		if acc_type == "h" then
			result = start .. 
				k_parts .. 
				high_first .. 
				k_parts .. 
				last .. 
				last .. 
				romaji_start .. 
				export.rise_and_fall(r_parts, "fall") .. 
				space2 ..
				export.rise_and_fall(r_parts, "rise") .. 
				romaji_last .. 
				"(] – )"
				
		elseif acc_type == "a" then
			result = start .. 
				down_first .. 
				k_parts .. 
				down_last .. 
				k_parts .. 
				last .. 
				romaji_start .. 
				export.rise_and_fall(r_parts, "rise") .. 
				"ꜜ" .. 
				space2 ..
				export.rise_and_fall(r_parts, "fall") .. 
				romaji_last .. 
				"(] – )"
				
		elseif acc_type == "o" then
			result = start .. 
				k_parts .. 
				down_first .. 
				k_parts .. 
				down_last .. 
				last .. 
				romaji_start .. 
				export.rise_and_fall(r_parts, "fall") .. 
				space2 ..
				export.rise_and_fall(r_parts, "rise") .. 
				"ꜜ" .. 
				romaji_last .. 
				"(] – )"
				
		else
			return error("Accent type not recognised.")
		end
		
	end
	
	result = gsub(result, "(.)@", "<del>%1</del>")
	
	return result, acc_type
end

return export
Module:ja-pron

Wikious

Boobota

Sagapedia