Module:Jpan-sortkey

Hello, you have come here looking for the meaning of the word Module:Jpan-sortkey. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:Jpan-sortkey, but we will also tell you about its etymology, its characteristics and you will know how to say Module:Jpan-sortkey in singular and plural. Everything you need to know about the word Module:Jpan-sortkey you have here. The definition of the word Module:Jpan-sortkey will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:Jpan-sortkey, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will sort text in the Japanese script. It is used to sort Southern Amami Ōshima, Japanese, Hachijō, Kikai, Translingual, Miyako, Old Japanese, Okinoerabu, Northern Amami Ōshima, Yaeyama, Okinawan, Tokunoshima, Kunigami, Yonaguni, and Yoron. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:Jpan-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

local require = require
local require_when_needed = require("Module:require when needed")

local export = {}

local concat = table.concat
local find_templates = require_when_needed("Module:template parser", "find_templates")
local get_by_code = require_when_needed("Module:languages", "getByCode")
local get_section = require_when_needed("Module:pages", "get_section")
local Hani_sort = require_when_needed("Module:Hani-sortkey", "makeSortKey")
local Hira_sort = require("Module:Hira-sortkey").makeSortKey
local insert = table.insert
local toNFC = mw.ustring.toNFC
local track = require_when_needed("Module:debug/track")
local ugsub = mw.ustring.gsub
local umatch = mw.ustring.match
local usub = require_when_needed("Module:string utilities", "sub")

local range = mw.loadData("Module:ja/data/range")
local kanji_pattern = range.kanji
local ideograph_pattern = range.ideograph
local kana_graph_pattern = range.kana_graph
local latin_pattern = range.latin

function export.makeSortKey(text, lang, sc)
	-- Determine reading.
	local seen_pages, langname = {}
	while lang ~= "mul" and (not seen_pages) and umatch(text, "") do
		repeat
			langname = langname or get_by_code(lang):getCanonicalName()
			seen_pages = true
			local content = mw.title.new(toNFC(text)):getContent()
			content = get_section(content, langname, 2)
			if not content then
				break
			end
			local kanjitab, br
			for template in find_templates(content) do
				local name = template:get_name()
				if (
					name == lang .. "-head" or
					name == lang .. "-pos"
				) then
					local reading = template:get_arguments()
					if reading ~= nil then
						text = reading
						br = true
						break
					end
				elseif (
					name == lang .. "-noun" or
					name == lang .. "-verb" or
					name == lang .. "-adj" or
					name == lang .. "-phrase" or
					name == lang .. "-verb form" or
					name == lang .. "-verb-suru" or
					name == lang .. "-see" or
					name == lang .. "-see-kango" or
					name == lang .. "-gv"
				) then
					local reading = template:get_arguments()
					if reading ~= nil then
						text = reading
						br = true
						break
					end
				elseif (
					name == "head" or
					name == "head-lite"
				) then
					local args = template:get_arguments()
					if args == lang then
						for i, arg in ipairs(args) do
							if arg == "kana" then
								local kana = args
								if kana then
									text = kana
									br = true
									break
								end
							end
						end
					end
				elseif not kanjitab and name == lang .. "-kanjitab" then
					kanjitab = template:get_arguments()
				end
			end
			if kanjitab and not br then
				track{"Jpan-sortkey/kanjitab", "Jpan-sortkey/kanjitab/" .. lang}
				if kanjitab.sortkey then
					text = kanjitab.sortkey
					break
				end
				-- extract kanji and non-kanji
				local kanji = {}
				local non_kanji = {}
				
				local kanji_border = 1
				ugsub(text, "()()()", function(p1, w1, p2)
					insert(non_kanji, usub(text, kanji_border, p1 - 1))
					kanji_border = p2
					insert(kanji, w1)
				end)
				insert(non_kanji, usub(text, kanji_border))
				-- 々
				for i, v in ipairs(kanji) do
					if v == "々" then kanji = kanji end
				end
				-- process readings
				local readings = {}
				local readings_actual = {}
				local reading_length_total = 0
				for i in ipairs(kanjitab) do
					local reading_kana, reading_length = umatch(kanjitab or "", "^(*)(*)$")
					reading_kana = reading_kana ~= "" and reading_kana or nil
					reading_length = reading_kana and tonumber(reading_length) or 1

					insert(readings, {reading_kana, reading_length})
					reading_length_total = reading_length_total + reading_length
					for _ = reading_length_total + 1, #kanji do
						insert(readings, {nil, 1})
					end
					if reading_kana then
						local actual_reading = kanjitab
						local okurigana = kanjitab
						readings_actual = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length}
					else
						readings_actual = {nil, 1}
					end
				end
				local sortkey = {non_kanji}
				local id = 1
				for _, v in ipairs(readings_actual) do
					id = id + v
					v = v ~= "-" and v
					insert(sortkey, (v or "") .. (non_kanji or ""))
				end
				sortkey = concat(sortkey)
				if sortkey ~= "" then
					text = sortkey
				end
			end
		until true
	end
	
	-- Use hiragana sort, after removing spaces and formatting characters.
	text = Hira_sort(ugsub(text, "+", ""), lang, sc)
	
	-- Run through Hani sort, to catch any stray kanji. This shouldn't happen but often does, and we still want to handle them sensibly in the time before the entry is fixed. Exclude spaces and punctuation, since otherwise Hani_sort automatically removes them.
	local ret = ugsub(text, "+", function(str)
		return Hani_sort(str, lang, sc)
	end)
	
	if not (lang == "mul" or ret == text) then
		track{"Jpan-sortkey/fallback", "Jpan-sortkey/fallback/" .. lang}
	end
	
	return ret
end

return export