Modül:ja

Modül belgelemesi
Bu belgeleme Modül:ja/belge (düzenle | geçmiş) sayfasından yansıtılmaktadır. Arayüz düzenleyicilerinin deney yapabilmeleri için ayrıca Modül:ja/deneme tahtası sayfası kullanılabilir.
Bu modül şu Lua modüllerini kullanıyor:
local export = {}
local find = mw.ustring.find
local length = mw.ustring.len
local trim = mw.text.trim
local split = mw.text.split
local sub, gsub = mw.ustring.sub, mw.ustring.gsub
local match, gmatch = mw.ustring.match, mw.ustring.gmatch
local to_cp, to_char = mw.ustring.codepoint, mw.ustring.char

local Jpan = require("Modül:alfabeler").getirKodaGore("Jpan")
local lang = require("Modül:diller").getirKodaGore("ja")

-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local data = mw.loadData("Modül:ja/veri")

-- Unicode normalization often converts these to the corresponding CJK Unified Ideographs characters
local compat_ideo = mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9)

export.data = {
	joyo_kanji = data.joyo_kanji,
	jinmeiyo_kanji = data.jinmeiyo_kanji,
	grade1 = data.grade1,
	grade2 = data.grade2,
	grade3 = data.grade3,
	grade4 = data.grade4,
	grade5 = data.grade5,
	grade6 = data.grade6
}

function export.hira_to_kata(text)
	if type(text) == "table" then text = text.args end

	return (gsub(text, '', function(char) return to_char(to_cp(char) + 96) end))
end

function export.kata_to_hira(text)
	if type(text) == "table" then text = text.args end

	return (gsub(text, '', function(char) return to_char(to_cp(char) - 96) end))
end

function export.fullwidth_to_halfwidth(text)
	if type(text) == "table" then text = text.args end

	text = gsub(text, '　', ' ')
	return (gsub(text, '', function(char) return to_char(to_cp(char) - 65248) end))
end

function export.kana_to_romaji(text, options)
	-- options: no_diacritics, keep_period, hist

	if type(text) == "table" then
		text = text.args
	end

	if not options then options = {} end

	local tracking_has_percent = find(text, '%%')
	local text_old = trim(require('Modül:ja/k2r-old').kana_to_romaji(text, options.no_diacritics, options.keep_period))

	-- conversions
	text = gsub(text, '(%-)()$', '%1㊟㈛㊟%2') -- は as suffix and appearing at the end of string
	text = gsub(text, '(%-)() ', '%1㊟㈛㊟%2 ') -- は as suffix and appearing mid-sentence
	text = gsub(text, '％', '㊟㌫㊟') -- at ], for example; avoid collision with % used in our ruby syntax
	text = gsub(text, '\'\'\'', '㊟⒝㊟')
	text = gsub(text, '<u>', '㊟㋑⒰㊟')
	text = gsub(text, '</u>', '㊟㋺⒰㊟')

	-- avoid tampering with existing latin text: store it away
	local escape = {}
	local id = 0
	for latin in gmatch(text, "+") do
		escape = latin
		text = gsub(text, latin, "㊟㊕㊕㊟" .. id .. "㊟㊕㊕㊟")
		id = id + 1
	end

	-- special preformatting
	text = gsub(text, 'ヶげつ', 'かげつ')
	text = gsub(text, 'ヶ(㊟+㊟)げつ', 'か%1げつ') -- 「'''ヶ'''げつ」
	text = gsub(text, 'ヶ', 'が')
	text = gsub(text, '(.)ゝ', '%1%1')
	text = gsub(text, '(.)ゞ', function(char) return mw.ustring.toNFC(char .. char .. '゙') end) -- unicode hax

	-- ]
	if options.hist then
		text = gsub(text, 'づ', 'du')
		text = gsub(text, 'ぢ', 'di')
		text = gsub(text, 'を', 'wo')
		text = gsub(text, '()', '㊟⒳㊟%1')
	end

	text = export.hira_to_kata(text)
	text = gsub(text, '.', function(char) return data.kr or char end)
	text = export.fullwidth_to_halfwidth(text)

	if options.hist then
		text = gsub(text, 'ou', 'o.u')
		text = gsub(text, '()㊟⒳㊟', '') -- くゐやう kwyau
		text = gsub(text, '㊟⒳㊟', '') -- ゑつ wetsu
	end

	-- markup
	text = gsub(text, '%%', '.') -- ruby "percent sign" syntax
	text = gsub(text, '()%.', '%1') -- 「し を ぼっ.す」; 「るい%じん%えん」→「rui.jin¤.en¤」

	-- 「テェェェ」→「テェーー」 (avoid funky romaji effected by the "(テュ→)teユ→tyu" line below)
	text = gsub(text, '(ァ)(ァ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ィ)(ィ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ゥ)(ゥ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ェ)(ェ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ォ)(ォ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)

	-- (ゲェ→)geェ→gee (note that this causes things like ウゥ→ū and ギィ→gī)
	text = gsub(text, '', {='aa',='ii',='uu',='ee',='oo',})

	-- (クヮ→)kuヮ→kwa
	text = gsub(text, '()', {='wa',})

	-- (クァ→)kuァ→kwa, (トァ→)toァ→twa, (ウィ→)uィ→wi
	text = gsub(text, '()', {='wa',='wi',='we',='wo',})
	-- (ツァ→)cwa→ca
	text = gsub(text, '()w', '%1')

	-- (テュ→)teユ→tyu, (ギェ→)giェ→gye
	text = gsub(text, '()', {='ya',='yu',='ye',='yo',})
	-- (ジュ→)jyu→ju
	text = gsub(text, '()y', '%1')

	-- (ティ→)teィ→ti (essentially forget about the vowel in between)
	text = gsub(text, '()', {='a',='i',='u',='e',='o',})

	-- chouonpu and sokuon
	while find(text, 'ー') or find(text, 'ッ *') or find(text, 'ッ㊟+㊟') do
		text = gsub(text, '()ー', '%1%1')
		text = gsub(text, 'ッ( *)()', '%2%1%2')
		text = gsub(text, 'ッ(㊟+㊟)()', '%2%1%2')
	end
	-- deal with leftover sokuon not used as geminate
	text = gsub(text, 'ッ', 'h')

	-- (ん→)n¤
	text = gsub(text, '¤()', "'%1")
	text = gsub(text, '¤', '')

	-- は
	text = gsub(text, "()ha()", "%1wa%2")
	text = gsub(text, "()ha$", "%1wa")
	text = gsub(text, "^ha()", "wa%1")
	-- へ
	text = gsub(text, "()he()", "%1e%2")
	text = gsub(text, "()he$", "%1e")
	text = gsub(text, "^he()", "e%1")
	-- change only when
	--   ① not flanked by a-z or a period ("^sore wa nani$", "^hyappou no .he hitotsu$")
	--   ② at the end of the string and not preceded by a-z or a period ("^are wa$")
	--   ③ at the beginning of the string and not followed by a-z or a period ("^he ikou$") 
	-- this also means that "^ha$" becomes "ha"
	-- period can be used next to the kana (either side) to force the "dumb" romanization (i.e. "ha", "he")

	-- fix sh, ch, ts
	text = gsub(text, '(*)()', function(geminate,main)
		--「めちゃ」→「mecha」
		--「めっちゃ」→「metcha」
		--「めっっちゃ」→「mettcha」
		local corresp_geminate_form = {='s',='t',='t'}
		local corresp_main = {='sh',='ch',='ts'}
		return (geminate and mw.ustring.rep(corresp_geminate_form, length(geminate))) .. corresp_main
	end
	)

	-- macrons
	if not options.no_diacritics then
		text = gsub(text, 'oo', 'ō')
		text = gsub(text, 'aa', 'ā')
		text = gsub(text, 'ee', 'ē')
		text = gsub(text, 'ou', 'ō')
		text = gsub(text, 'uu', 'ū')
		text = gsub(text, 'ii', 'ī')
	end

	-- remove markup and convert real periods
	if not options.keep_period then
		text = gsub(text, '%.', '')
		text = gsub(text, '。', '◆.◇')
	end

	-- 
	text = gsub(text, '◇◆', '')
	text = gsub(text, '◆◇', '')
	text = gsub(text, ' *◆ *', '')
	text = gsub(text, ' *◇ *', ' ')

	-- restore latin text
	text = gsub(text, "㊟㊕㊕㊟(%d+)㊟㊕㊕㊟", function(id) return escape end)

	-- clean up spaces
	text = trim(text)
	text = gsub(text, ' +', ' ')

	-- uppercase markup
	text = gsub(text, "(%^)(㊟⒝㊟)", "%2%1") -- move ^ to an effective position if placed before bold markup
	text = gsub(text, "(%^)( )", "%2%1") -- same but with spaces
	text = gsub(text, '%^(.)', mw.ustring.upper) -- uppercase conversion

	-- clean up spaces again
	text = gsub(text, ' +', ' ')

	-- conversions
	text = gsub(text, '㊟⒝㊟', '\'\'\'')
	text = gsub(text, '㊟㋑⒰㊟', '<u>')
	text = gsub(text, '㊟㋺⒰㊟', '</u>')
	text = gsub(text, '㊟㈛㊟', '')
	text = gsub(text, '㊟㌫㊟', '%')

	-- comparison with old kana_to_romaji() code
	text_old = gsub(text_old, '%(ba%)', ' (ba)') -- avoid flooding the tracking template with na-adjectives. ← this really should be looked at though
	text_old = gsub(text_old, ' ”', '”') -- and spacing around quotation marks
	if text ~= text_old then
		if mw.ustring.lower(text) == mw.ustring.lower(text_old) then
			--require('Modül:debug').track('ja/k2r diff caps')
		elseif find(text_old, 'ッ') then
			--require('Modül:debug').track('ja/k2r diff w xtu')
		elseif tracking_has_percent then
			--require('Modül:debug').track('ja/k2r diff pc')
		else
			--require('Modül:debug').track('ja/k2r diff')
		end
		mw.log('new]' .. text .. '[')
		mw.log('old]' .. text_old .. '[')
	end

	if find(text, '') then
		--require('Modül:debug').track('ja/k2r failure')
	end

	return text
end

-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
	local text = type(f) == 'table' and f.args or f
	text = gsub(text, ' ', '')
	text = gsub(text, '-', '')
	text = gsub(text, '%.', '')
	text = gsub(text, '&nbsp;', '')
	text = gsub(text, '\'', '')
	return text
end

function export.romaji_to_kata(f)
	local text = type(f) == 'table' and f.args or f
	text = gsub(text, '.', function (char) return data.rd or char end)
	text = gsub(text, 'kk', 'ッk')
	text = gsub(text, 'ss', 'ッs')
	text = gsub(text, 'tt', 'ッt')
	text = gsub(text, 'pp', 'ッp')
	text = gsub(text, 'bb', 'ッb')
	text = gsub(text, 'dd', 'ッd')
	text = gsub(text, 'gg', 'ッg')
	text = gsub(text, 'jj', 'ッj')
	text = gsub(text, 'tc', 'ッc')
	text = gsub(text, 'tsyu', 'ツュ')
	text = gsub(text, 'ts', {='ツ',='ツォ',='ツィ',='ツェ',='ツァ'})
	text = gsub(text, 'sh', {='シュ',='ショ',='シ',='シェ',='シャ'})
	text = gsub(text, 'ch', {='チュ',='チョ',='チ',='チェ',='チャ'})
	text = gsub(text, "n?", {='ヌ',='ノ',='ニ',='ネ',='ナ',='ン',='ン'})
	text = gsub(text, '?', function (char) return data.rk or char end)
	text = gsub(text, 'u', 'ウ')
	text = gsub(text, 'o', 'オ')
	text = gsub(text, 'i', 'イ')
	text = gsub(text, 'e', 'エ')
	text = gsub(text, 'a', 'ア')
	return text
end

-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
	text, script = type(f) == 'table' and f.args or f, {}

	if find(text, '') or find(text, '') then
		table.insert(script, 'Hira')
	end
	-- TODO: there are two kanas.  This should insert Kata.
	if find(text, '') or find(text, '') then
		table.insert(script, 'Kana')
	end
	-- 一 is unicode 4e00, previously used 丁 is 4e01
	if find(text, '') then
		table.insert(script, 'Hani')
	end
	-- matching %a should have worked but matched the end of every string
	if find(text, '') then
		table.insert(script, 'Romaji')
	end
	if find(text, '') then
		table.insert(script, 'Number')
	end
	if find(text, '') then
		table.insert(script, 'Abbreviation')
	end

	return table.concat(script, '+')
end

-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae.  The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
	if type(text) == "table" then
		text = text.args
	end
	-- convert kata to hira (hira is untouched)
	text = export.kata_to_hira(text)
	-- remove all of the small hiragana such as ょ except small tsu
	text = gsub(text,'.',function (char) return data.nonmora_to_empty or char end)
	-- remove zero-width spaces
	text = gsub(text, '‎', '')
	-- return number of characters, which should be the number of morae
	return length(text)
end

-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software
-- this is like sort() but doesn't return |sort=sortkey,
-- just the sort key itself, but unlike sort(), this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
	if type(text) == "table" then
		text = text.args
	end
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1

	-- remove western spaces, hyphens, and periods
	-- diff=41967612: also remove caret
	text = gsub(text, '', '')

	text = export.kata_to_hira(text)

	-- if the first character has dakuten, replace it with the corresponding
	-- character without dakuten and add an apostrophe to the end, e.g.
	-- がす > かす'
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten or char end) == '' then
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv or char end)
		text = (convertedten .. textsub .. "'")
	else
		-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten or char end) == '' then
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv or char end)
			text = (convertedten .. textsub .. "''")
		end
	end
	-- replace the long vowel mark with the vowel that it stands for
	for key,value in pairs(data.longvowels) do
		text = gsub(text,key,value)
	end
	return text
end

-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
	local text = type(f) == 'table' and f.args or f
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1
	local kyreplace = ''
	kyreplace = gsub(text,'', '')
	if kyreplace == '' then
		result = ('|' .. 'sort' .. '=')
	end
	text = export.kata_to_hira(text)
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten or char end) == '' then
		if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv or char end)
		result = (result .. convertedten .. textsub .. "'")
	else
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten or char end) == '' then
			if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv or char end)
			result = (result .. convertedten .. textsub .. "''")
		else
			if kyreplace == '' then
				result = (result .. text)
			end
		end
	end
	return result
end

-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
	return sub(f.args,1,(length(f.args)-1))
end

function export.remove_ruby_markup(text)
	return (string.gsub(text, "", ""))
end

-- do the work of Template:ja-kanji
function export.kanji(frame)
	local PAGENAME = mw.title.getCurrentTitle().text
	-- only do this if this entry is a kanji page and not some user's page
	if match(PAGENAME, "") then
		local args = frame:getParent().args
		local grade = args or args or ""
		local rs = args or ""
		local shin = args or ""
		local kyu = args or ""
		local head = args or args or ""

		local wikitext = {}
		local categories = {}

		local catsort = (rs ~= "") and rs or PAGENAME

		-- display the kanji itself at the top at 275% size
		table.insert(wikitext, '<div><span lang="ja" class="Jpan" style="font-size:275%; line-height: 100%;">' .. (args or PAGENAME) .. '</span></div>')

		-- display information for the grade

		-- if grade was not specified, determine it now
		if grade == "" then
			grade = tostring(export.kanji_grade(PAGENAME))
		end

		table.insert(wikitext, "(''")
		if grade == "1" then table.insert(wikitext, "]")
		elseif grade == "2" then table.insert(wikitext, "]")
		elseif grade == "3" then table.insert(wikitext, "]")
		elseif grade == "4" then table.insert(wikitext, "]")
		elseif grade == "5" then table.insert(wikitext, "]")
		elseif grade == "6" then table.insert(wikitext, "]")
		elseif grade == "7" or grade == "c" then table.insert(wikitext, "]")
		elseif grade == "8" or grade == "n" then table.insert(wikitext, "]")
		elseif grade == "9" or grade == "uc" then table.insert(wikitext, "]")
		elseif grade == "0" or grade == "r" then table.insert(wikitext, "]")
		else
			table.insert(categories, "]")
		end

		-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified

		if kyu ~= "" then
			table.insert(wikitext, ",&nbsp;")
			table.insert(wikitext, '] kanji, ] biçimi <span lang="ja" class="Jpan">]</span>')
		elseif shin ~= "" then
			table.insert(wikitext, ",&nbsp;")
			table.insert(wikitext, '] kanji, ] biçimi <span lang="ja" class="Jpan">]</span>')
		end
		table.insert(wikitext, "'')")

		-- add categories
		table.insert(categories, "]")
		table.insert(categories, "]")
		if grade == "1" then table.insert(categories, "]")
		elseif grade == "2" then table.insert(categories, "]")
		elseif grade == "3" then table.insert(categories, "]")
		elseif grade == "4" then table.insert(categories, "]")
		elseif grade == "5" then table.insert(categories, "]")
		elseif grade == "6" then table.insert(categories, "]")
		elseif grade == "7" or grade == "c" then table.insert(categories, "]")
		elseif grade == "8" or grade == "n" then table.insert(categories, "]")
		elseif grade == "9" or grade == "uc" then table.insert(categories, "]")
		elseif grade == "0" or grade == "r" then table.insert(categories, "]")
		end

		-- error category
		if rs == "" then table.insert(categories, "]") end

		return table.concat(wikitext, "") .. table.concat(categories, "\n")
	end
end

local grade1_pattern = ('')
local grade2_pattern = ('')
local grade3_pattern = ('')
local grade4_pattern = ('')
local grade5_pattern = ('')
local grade6_pattern = ('')
local secondary_pattern = ('')
local jinmeiyo_kanji_pattern = ('')
local hyogaiji_pattern = ('')

function export.kanji_grade(kanji)
	if type(kanji) == "table" then
		kanji = kanji.args
	end

	if find(kanji, hyogaiji_pattern) then return 9
	elseif find(kanji, jinmeiyo_kanji_pattern) then return 8
	elseif find(kanji, secondary_pattern) then return 7
	elseif find(kanji, grade6_pattern) then return 6
	elseif find(kanji, grade5_pattern) then return 5
	elseif find(kanji, grade4_pattern) then return 4
	elseif find(kanji, grade3_pattern) then return 3
	elseif find(kanji, grade2_pattern) then return 2
	elseif find(kanji, grade1_pattern) then return 1
	end

	return false
end

return export
Modül:ja

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot