Modul:ja

Üdvözlöm, Ön a Modul:ja szó jelentését keresi. A DICTIOUS-ban nem csak a Modul:ja szó összes szótári jelentését megtalálod, hanem megismerheted az etimológiáját, a jellemzőit és azt is, hogyan kell a Modul:ja szót egyes és többes számban mondani. Minden, amit a Modul:ja szóról tudni kell, itt található. A Modul:ja szó meghatározása segít abban, hogy pontosabban és helyesebben fogalmazz, amikor beszélsz vagy írsz. AModul:ja és más szavak definíciójának ismerete gazdagítja a szókincsedet, és több és jobb nyelvi forráshoz juttat.

The module does various things related to Japanese.

Testcases

Module:ja/testcases

Functions

  1. hira_to_kata:
    {{#invoke:ja|hira_to_kata|おはようございます}} → オハヨウゴザイマス
  2. kata_to_hira:
    {{#invoke:ja|kata_to_hira|アメリカンアパレル}} → あめりかんあぱれる
  3. kana_to_romaji:
    {{#invoke:ja|kana_to_romaji|おやすみなさい}} → oyasuminasai
    {{#invoke:ja|kana_to_romaji|バックブリーカー}} → bakkuburīkā
    {{#invoke:ja|kana_to_romaji|「^すげぇ やん!」}} → “Sugē yan!”
  4. romaji_to_kata:
    {{#invoke:ja|romaji_to_kata|bakkurasshu}} → バックラッシュ
  5. script:
    {{#invoke:ja|script|どうも有難う御座います}} → Hira+Hani
    {{#invoke:ja|script|どうぞよろしく}} → Hira
    {{#invoke:ja|script|アメリカ合衆国}} → Kana+Hani
  6. sort:
    {{#invoke:ja|sort|バックラッシュ}} → |sort=はっくらっしゅ'
    {{#invoke:ja|sort|どうぞよろしく}} → |sort=とうぞよろしく'
    {{#invoke:ja|sort|アメリカ}} → |sort=あめりか

Uses

It is used by

  1. {{ja-new}} (which uses it substitutively)
  2. {{ja-verbconj}} and its subtemplates
  3. {{ja-noun}}, {{ja-verb}}, {{ja-adj}}, {{ja-pos}}, and {{ja-verb-suru}} which detect the script, generate romanizations, generate sort keys
  4. {{ja-readingcat}}, {{ja-readascat}} to generate romanizations, sort keys, count morae, perform checks
  5. {{ja-readings}} (Module:ja-kanji-readings) to generate romanizations and to convert from hiragana to katakana for on readings in Module:ja/data/jouyou-yomi

local export = {}

local titleObj = mw.title.getCurrentTitle()
local pagename = titleObj.text
local namespace = titleObj.nsText

local str_gsub = string.gsub
local find = mw.ustring.find
local length = mw.ustring.len
local trim = mw.text.trim
local split = mw.text.split
local sub, gsub = mw.ustring.sub, mw.ustring.gsub
local match, gmatch = mw.ustring.match, mw.ustring.gmatch
local to_cp, to_char = mw.ustring.codepoint, mw.ustring.char

local Jpan = require("Module:scripts").getByCode("Jpan")
local lang = require("Module:languages").getByCode("ja")

-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local data = mw.loadData("Module:ja/data")

-- Unicode normalization often converts these to the corresponding CJK Unified Ideographs characters
local compat_ideo = mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9)

export.data = {
	joyo_kanji = data.joyo_kanji,
	jinmeiyo_kanji = data.jinmeiyo_kanji,
	grade1 = data.grade1,
	grade2 = data.grade2,
	grade3 = data.grade3,
	grade4 = data.grade4,
	grade5 = data.grade5,
	grade6 = data.grade6
}

local function track(code)
	if type(code) ~= "string" then
		error("The track function requires a string as argument.")
	end
	require("Module:debug").track("ja/" .. code)
end

local function change_codepoint(added_value)
	return function(char)
		return to_char(to_cp(char) + added_value)
	end
end

function export.hira_to_kata(text)
	if type(text) == "table" then text = text.args end

	return (gsub(gsub(text, '', change_codepoint(96)), '', change_codepoint(20)))
end

function export.kata_to_hira(text)
	if type(text) == "table" then text = text.args end

	return (gsub(gsub(text, '', change_codepoint(-96)), '', change_codepoint(-20)))
end

function export.fullwidth_to_halfwidth(text)
	if type(text) == "table" then text = text.args end

	text = gsub(text, ' ', ' ')
	return (gsub(text, '', change_codepoint(-65248)))
end

function export.kana_to_romaji(text, options)
	-- options: no_diacritics, keep_period, hist, phonetic
	local str_find = string.find

	if type(text) == "table" then
		text = text.args
	end

	if not options then options = {} end

	-- conversions
	if not options.phonetic then
		text = gsub(text, '(%-)()$', '%1㊟㈛㊟%2') -- は as suffix (派 "-ha", etc.) and appearing at the end of string
		text = gsub(text, '(%-)() ', '%1㊟㈛㊟%2 ') -- は as suffix and appearing mid-sentence
	end

	text = str_gsub(text, '%', '㊟㌫㊟') -- at ], for example; avoid collision with % used in our ruby syntax
	text = str_gsub(text, '\'\'\'', '㊟⒝㊟')
	text = str_gsub(text, '<u>', '㊟㋑⒰㊟')
	text = str_gsub(text, '</u>', '㊟㋺⒰㊟')

	local text_styling = "㊟+㊟"

	-- avoid tampering with existing latin text: store it away
	local escape = {}
	local id = 0
	for latin in string.gmatch(text, "+") do
		escape = latin
		text = str_gsub(text, latin, "㊟㊕㊕㊟" .. id .. "㊟㊕㊕㊟")
		id = id + 1
	end

	-- special preformatting
	text = str_gsub(text, 'ヶげつ', 'かげつ')
	text = gsub(text, 'ヶ(' .. text_styling .. ')げつ', 'か%1げつ') -- 「'''ヶ'''げつ」
	text = str_gsub(text, 'ヶ', 'が')
	text = str_gsub(text, 'ヵ', 'か')
	text = gsub(text, '(.)', '%1%1')
	text = gsub(text, '(.)', function(char) return mw.ustring.toNFC(char .. char .. '゙') end) -- unicode hax

	-- ]
	if options.hist then
		text = gsub(text, '', '㊟⒳㊟%0')
		text = gsub(text, '.',
			{
				 = 'si',
				 = 'zi',				
				 = 'ti',
				 = 'di',
				 = 'tu',
				 = 'du',
				 = 'ye',
				 = 'yi',
				 = 'wi',
				 = 'wo'
			}
		)
	end

	text = export.hira_to_kata(text)
	text = gsub(text, '.', data.kr)
	text = export.fullwidth_to_halfwidth(text)

	if options.hist then
		text = str_gsub(text, 'oo', 'o.o')
		text = str_gsub(text, 'ou', 'o.u')
		text = str_gsub(text, 'h', 'f')

		local old = text
		text = str_gsub(text, 'i㊟⒳㊟y', 'y') -- くゐやう kwyau
		text = str_gsub(text, '()u㊟⒳㊟w', '%1w')
		if old ~= text then
			--[=[
				There may be cases in which i or u is deleted incorrectly, and a
				period should be inserted.
				"Syncope" isn't quite accurate, as there wasn't a sound change.
				It's just an orthographic convention.
				]
			]=]
			--mw.log(str_gsub(old, '㊟⒳㊟', '')  .. ' → ' .. str_gsub(text, '㊟⒳㊟', '') )
			track('mora syncope')
		end

		text = str_gsub(text, '㊟⒳㊟', '') -- ゑつ wetsu
	end

	-- markup
	text = str_gsub(text, '%%', '.') -- ruby "percent sign" syntax
	text = gsub(text, '()%.', '%1') -- 「し を ぼっ.す」; 「るい%じん%えん」→「rui.jin¤.en¤」

	-- 「テェェェ」→「テェーー」 (avoid funky romaji effected by the "(テュ→)teユ→tyu" line below)
	local kogaki_vowels = {'ァ','ィ','ゥ','ェ','ォ'}
	for _, char in ipairs(kogaki_vowels) do
		text = gsub(text, '('..char..')('..char..'+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	end

	-- (ゲェ→)geェ→gee (note that this causes things like ウゥ→ū and ギィ→gī)
	text = gsub(text, '', {='aa',='ii',='uu',='ee',='oo',})

	-- (クヮ→)kuヮ→kwa, (ク𛅤→)ku𛅤→kwi, (ク𛅥→)ku𛅥→kwe, (ク𛅦→)ku𛅦→kwo
	text = gsub(text, '()', {='wa',='wi',='we',='wo',})

	-- (クァ→)kuァ→kwa, (トァ→)toァ→twa, (ウィ→)uィ→wi
	text = gsub(text, '()', {='wa',='wi',='we',='wo',})
	if not options.hist then
		-- (ツァ→)cwa→ca
		text = str_gsub(text, '()w', '%1')
	end

	-- (テュ→)teユ→tyu, (ギェ→)giェ→gye
	text = gsub(text, '()', {='ya',='yu',='ye',='yo',})
	-- (ジュ→)jyu→ju
	text = gsub(text, '()y', '%1')

	-- (ティ→)teィ→ti (essentially forget about the vowel in between)
	text = gsub(text, '()', {='a',='i',='u',='e',='o',})

	-- chouonpu and sokuon
	while str_find(text, 'ー') or str_find(text, 'ッ *') or find(text, 'ッ' .. text_styling .. '') do
		text = str_gsub(text, '()ー', '%1%1')
		text = str_gsub(text, 'ッ( *)()', '%2%1%2')
		text = gsub(text, 'ッ(' .. text_styling .. ')()', '%2%1%2')
	end
	-- deal with leftover sokuon not used as geminate
	text = str_gsub(text, 'ッ', '&#39;') -- Apostrophe

	-- (ん→)n¤
	text = str_gsub(text, '¤()', "'%1")
	text = str_gsub(text, '¤', '')

	-- は, へ
	if not options.phonetic and str_find(text, "h") then
		for i, v in ipairs{
			{ "ha", "wa" },
			{ "he", "e" }
		} do
			local thingy = '' -- not sure what this should be named
			text = gsub(text, "(" .. thingy .. ")" .. v .. "(" .. thingy .. ")", "%1" .. v .. "%2")
			text = gsub(text, "(" .. thingy .. ")" .. v .. "$", "%1" .. v)
			text = gsub(text, "^" .. v .. "(" .. thingy .. ")", v .. "%1")
			if find(text, text_styling) then
				text = gsub(text, "(" .. thingy .. ")" .. v .. "(" .. text_styling .. thingy ..")", "%1" .. v .. "%2")
				text = gsub(text, "(" .. thingy .. ")" .. v .. "(" .. text_styling .. ")$", "%1" .. v .. "%2")
				text = gsub(text, "(" .. thingy .. text_styling .. ")" .. v .. "(" .. text_styling .. thingy ..")", "%1" .. v .. "%2")
				text = gsub(text, "(" .. thingy .. text_styling .. ")" .. v .. "(" .. text_styling .. ")$", "%1" .. v .. "%2")
			end
		end
	end
	-- change only when
	--   ① not flanked by a-z or a period ("^sore wa nani$", "^hyappou no .he hitotsu$")
	--   ② at the end of the string and not preceded by a-z or a period ("^are wa$")
	--   ③ at the beginning of the string and not followed by a-z or a period ("^he ikou$") 
	-- this also means that "^ha$" becomes "ha"
	-- period can be used next to the kana (either side) to force the "dumb" romanization (i.e. "ha", "he")

	-- fix sh, ch, ts
	local function handle_digraphs(geminate, intervening, main, following)
		--「めちゃ」→「mecha」
		--「めっちゃ」→「metcha」
		--「めっっちゃ」→「mettcha」
		local corresp_geminate_form = {='s',='t',='t'}
		local corresp_main = {='sh',='ch',='ts'}

		local geminate_repl, main_repl

		-- So as not to convert ch to tsh.
		if not following or main .. following ~= "ch" then
			main_repl = corresp_main
		end

		if geminate ~= "" then
			geminate_repl = string.rep(corresp_geminate_form, #geminate)
		end

		return (geminate_repl or geminate) .. (intervening or "") .. (main_repl or main) .. (following or "")
	end

	local function handle_digraphs2(geminate, main, following)
		return handle_digraphs(geminate, nil, main, following)
	end

	text = gsub(text, '(+)(' .. text_styling .. ')()', handle_digraphs)
	text = gsub(text, '(*)()(.?)', handle_digraphs2)


	-- macrons
	-- Will cause problems if combined vowel-macron characters are used below.
	if not options.no_diacritics then
		if not options.phonetic then
			text = str_gsub(text, 'ou', 'ō')
		end
		local macron = to_char(0x304)
		text = str_gsub(
			text,
			'()%1',
			'%1' .. macron
		)
	end

	-- remove markup and convert real periods
	if not options.keep_period then
		text = str_gsub(text, '%.', '')
		text = str_gsub(text, '。', '◆.◇')
	end

	--
	text = str_gsub(text, '◇◆', '')
	text = str_gsub(text, '◆◇', '')
	text = str_gsub(text, ' *◆ *', '')
	text = str_gsub(text, ' *◇ *', ' ')

	-- restore latin text
	text = str_gsub(text, "㊟㊕㊕㊟(%d+)㊟㊕㊕㊟", function(id) return escape end)

	-- clean up spaces
	text = trim(text)
	text = str_gsub(text, ' +', ' ')

	-- remove double ampersands used in ruby
	text = str_gsub(text, '&&(.-)&&', '%1')

	-- uppercase markup
	text = str_gsub(text, "(%^)(㊟⒝㊟)", "%2%1") -- move ^ to an effective position if placed before bold markup
	text = str_gsub(text, "(%^)( )", "%2%1") -- same but with spaces
	text = gsub(text, '%^(.)', mw.ustring.upper) -- uppercase conversion

	-- clean up spaces again
	text = str_gsub(text, ' +', ' ')

	-- conversions
	text = str_gsub(text, '㊟⒝㊟', '\'\'\'')
	text = str_gsub(text, '㊟㋑⒰㊟', '<u>')
	text = str_gsub(text, '㊟㋺⒰㊟', '</u>')
	text = str_gsub(text, '㊟㈛㊟', '')
	text = str_gsub(text, '㊟㌫㊟', '%%')

	-- unicode NFC
	text = mw.ustring.toNFC(text)

	if find(text, '') then
		track('k2r failure')
	end

	return text
end

-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
	local text = type(f) == 'table' and f.args or f
	text = str_gsub(text, '.', {  = '',  = '',  = '',  = '' })
	text = str_gsub(text, '&nbsp;', '')
	return text
end

function export.romaji_to_kata(f)
	local text = type(f) == 'table' and f.args or f
	text = string.lower(text)
	text = gsub(text, '.', data.rd)
	text = str_gsub(text, '(.)%1', {
		k = 'ッk', s = 'ッs', t = 'ッt', p = 'ッp',
		b = 'ッb', d = 'ッd', g = 'ッg', j = 'ッj'
	})
	text = str_gsub(text, 'tc', 'ッc')
	text = str_gsub(text, 'tsyu', 'ツュ')
	text = str_gsub(text, 'ts', {='ツ',='ツォ',='ツィ',='ツェ',='ツァ'})
	text = str_gsub(text, 'sh', {='シュ',='ショ',='シ',='シェ',='シャ'})
	text = str_gsub(text, 'ch', {='チュ',='チョ',='チ',='チェ',='チャ'})
	text = str_gsub(text, "n?", {='ヌ',='ノ',='ニ',='ネ',='ナ'})
	text = str_gsub(text, '?', data.rk)
	text = str_gsub(text, "n'?", 'ン')
	text = str_gsub(text, '', {
		u = 'ウ', o = 'オ', i = 'イ', e = 'エ', a = 'ア'
	})
	return text
end

-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
	text, script = type(f) == 'table' and f.args or f, {}

	if find(text, '') or find(text, '') then
		table.insert(script, 'Hira')
	end
	-- TODO: there are two kanas.  This should insert Kata.
	if find(text, '') or find(text, '') then
		table.insert(script, 'Kana')
	end
	-- 一 is unicode 4e00, previously used 丁 is 4e01
	if find(text, '') then
		table.insert(script, 'Hani')
	end
	-- matching %a should have worked but matched the end of every string
	if find(text, '') then
		table.insert(script, 'Romaji')
	end
	if find(text, '') then
		table.insert(script, 'Number')
	end
	if find(text, '') then
		table.insert(script, 'Abbreviation')
	end

	return table.concat(script, '+')
end

-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae.  The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
	if type(text) == "table" then
		text = text.args
	end
	-- convert kata to hira (hira is untouched)
	text = export.kata_to_hira(text)
	-- remove all of the small hiragana such as ょ except small tsu
	text = gsub(text,'.',data.nonmora_to_empty)
	-- remove zero-width spaces
	text = gsub(text, '‎', '')
	-- return number of characters, which should be the number of morae
	return length(text)
end

-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software
-- this is like sort() but doesn't return |sort=sortkey,
-- just the sort key itself, but unlike sort(), this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
	if type(text) == "table" then
		text = text.args
	end
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1

	-- remove western spaces, hyphens, and periods
	-- diff=41967612: also remove caret
	text = gsub(text, '', '')

	text = export.kata_to_hira(text)

	-- if the first character has dakuten, replace it with the corresponding
	-- character without dakuten and add an apostrophe to the end, e.g.
	-- がす > かす'
	if gsub(sub(text,1,1),'.',data.dakuten) == '' then
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',data.tenconv)
		text = (convertedten .. textsub .. "'")
	else
		-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
		if gsub(sub(text,1,1),'.',data.handakuten) == '' then
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',data.tenconv)
			text = (convertedten .. textsub .. "''")
		end
	end
	-- replace the long vowel mark with the vowel that it stands for
	for key,value in pairs(data.longvowels) do
		text = gsub(text,key,value)
	end
	return text
end

-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
	local text = type(f) == 'table' and f.args or f
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1
	local kyreplace = ''
	kyreplace = gsub(text,'', '')
	if kyreplace == '' then
		result = ('|' .. 'sort' .. '=')
	end
	text = export.kata_to_hira(text)
	if gsub(sub(text,1,1),'.',data.dakuten) == '' then
		if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',data.tenconv)
		result = (result .. convertedten .. textsub .. "'")
	else
		if gsub(sub(text,1,1),'.',data.handakuten) == '' then
			if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',data.tenconv)
			result = (result .. convertedten .. textsub .. "''")
		else
			if kyreplace == '' then
				result = (result .. text)
			end
		end
	end
	return result
end

-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
	return sub(f.args,1,(length(f.args)-1))
end

function export.remove_ruby_markup(text)
	return (string.gsub(text, "", ""))
end

-- do the work of Template:ja-kanji
function export.kanji(frame)
	local pagename = mw.title.getCurrentTitle().text
	-- only do this if this entry is a kanji page and not some user's page
	if namespace == "" then
		local params = {
			grade = {},
			rs = {},
			shin = {},
			kyu = {},
			head = {},
		}
		local args = require("Module:parameters").process(frame:getParent().args, params)

		local rs = args or require("Module:zh-sortkey").makeSortKey(pagename, "ja")
		local shin = args
		local kyu = args
		local head = args

		local grade_replacements = {
			 = 7,
			 = 8,
			 = 9,
			 = 0,
		}
		local grade = tonumber(args) or args
		grade = grade_replacements or grade

		local wikitext = {}
		local categories = {}

		local catsort = rs or pagename

		-- display the kanji itself at the top at 275% size
		table.insert(wikitext, '<div><span lang="ja" class="Jpan" style="font-size:275%; line-height:1;">' .. (args or pagename) .. '</span></div>')

		-- display information for the grade

		-- if grade was not specified, determine it now
		if not grade then
			grade = export.kanji_grade(pagename)
		end

		local in_parenthesis = {}
		local grade_links = {
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
			 = "]",
		}
		if grade_links then
			table.insert(in_parenthesis, grade_links)
		else
			table.insert(categories, "]")
		end

		-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified

		if kyu then
			table.insert(in_parenthesis, '] kanji, ] form <span lang="ja" class="Jpan">]</span>')
		elseif shin then
			table.insert(in_parenthesis, '] kanji, ] form <span lang="ja" class="Jpan">]</span>')
		end
		table.insert(wikitext, "''(" .. table.concat(in_parenthesis, ",&nbsp;") .. "'')")

		-- add categories
		table.insert(categories, "]")
		local grade_categories = {
			 = "Grade 1 kanji",
			 = "Grade 2 kanji",
			 = "Grade 3 kanji",
			 = "Grade 4 kanji",
			 = "Grade 5 kanji",
			 = "Grade 6 kanji",
			 = "Common kanji",
			 = "Kanji used for names",
			 = "Uncommon kanji",
			 = "CJKV radicals",
		}
		table.insert(categories, " or error("The grade " .. grade .. " is invalid.")) .. "|" .. (grade == "0" and " " or catsort) .. "]]")

		-- error category
		if not rs then
			table.insert(categories, "]")
		end

		return table.concat(wikitext, "") .. table.concat(categories, "\n")
	end
end

local grade1_pattern = ('')
local grade2_pattern = ('')
local grade3_pattern = ('')
local grade4_pattern = ('')
local grade5_pattern = ('')
local grade6_pattern = ('')
local secondary_pattern = ('')
local jinmeiyo_kanji_pattern = ('')
local hyogaiji_pattern = ('')

function export.kanji_grade(kanji)
	if type(kanji) == "table" then
		kanji = kanji.args
	end

	if find(kanji, hyogaiji_pattern) then return 9
	elseif find(kanji, jinmeiyo_kanji_pattern) then return 8
	elseif find(kanji, secondary_pattern) then return 7
	elseif find(kanji, grade6_pattern) then return 6
	elseif find(kanji, grade5_pattern) then return 5
	elseif find(kanji, grade4_pattern) then return 4
	elseif find(kanji, grade3_pattern) then return 3
	elseif find(kanji, grade2_pattern) then return 2
	elseif find(kanji, grade1_pattern) then return 1
	end

	return false
end

return export