Module:User:Theknightwho/scripts/charToScript

Hello, you have come here looking for the meaning of the word Module:User:Theknightwho/scripts/charToScript. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Theknightwho/scripts/charToScript, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Theknightwho/scripts/charToScript in singular and plural. Everything you need to know about the word Module:User:Theknightwho/scripts/charToScript you have here. The definition of the word Module:User:Theknightwho/scripts/charToScript will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Theknightwho/scripts/charToScript, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local subexport = {}

local cp = mw.ustring.char
local split = mw.text.split

--[=[
	Takes a codepoint or a character and finds the script code(s) (if any) that are appropriate for it based on the codepoint, using the data module ].
	
	By default, it returns only the first script code if there are multiple matches (i.e. the code we take to be the default). If `all_scripts` is set, then a table of all matching codes is returned.
]=]

local memo = {}

local charToScriptData = mw.loadData("Module:User:Theknightwho/recognition data")
function subexport.charToScript(char, all_scripts)
	local t = type(char)
	if t == "string" then
		if char:find("*") then
			error("bad argument #1 to 'charToScript' (expected a single character)")
		end
	elseif t == "number" then
		char = u(char)
	else
		error(("bad argument #1 to 'charToScript' (expected string or a number, got %s)")
			:format(t))
	end
	
	if not memo then
		local data, ret = charToScriptData
		for byte in char:gmatch(".") do
			local new_data
			if data then
				new_data = data
			else
				for k, v in pairs(data) do
					if #k > 1 and byte:find("") then
						new_data = v
						break
					end
				end
			end
			if not new_data then
				ret = {"None"}
				break
			elseif type(new_data) == "string" then
				ret = split(new_data, "%s*,%s*")
				break
			else
				data = new_data
			end
		end
		
		if all_scripts then
			memo = ret
		else
			memo = ret
		end
	end
	
	return memo
end

--[=[
	Finds the best script for a string in a language-agnostic way.
	
	Converts each character to a codepoint. Iterates the counter for the script code if the codepoint is in the list of individual characters, or if it is in one of the defined ranges in the 4096-character block that it belongs to.
	
	Each script has a two-part counter, for primary and secondary matches. Primary matches are when the script is the first one listed; otherwise, it's a secondary match. When comparing scripts, first the total of both are compared (i.e. the overall number of matches). If these are the same, the number of primary and then secondary matches are used as tiebreakers. For example, this is used to ensure that `Grek` takes priority over `polytonic` if no characters which exclusively match `polytonic` are found, as `Grek` is a subset of `polytonic`.
]=]
function subexport.findBestScriptWithoutLang(text)
	local min = math.min
	
	-- `scripts` contains counters for any scripts detected so far. Jpan and Kore are handled as special-cases, as they are combinations of other scripts.
	local weights_mt = {
		__lt = function(a, b)
			if a + a ~= b + b then
				return a + a < b + b
			elseif a ~= b then
				return a < b
			elseif a ~= b then
				return a < b
			else
				return false
			end
		end
	}
	local scripts_mt = 	{__index = function(t, k)
		return setmetatable({0, 0}, weights_mt)
	end}
	local scripts = setmetatable({}, scripts_mt)
	
	text = require("Module:utilities").get_plaintext(text)
	
	for character in text:gmatch("*") do
		for i, script in ipairs(subexport.charToScript(character, true)) do
			scripts = scripts
			local weight = min(i, 2)
			scripts = scripts + 1
		end
	end
	
	local bestScript
	local greatestCount
	for script, count in pairs(scripts) do
		if (not greatestCount) or greatestCount < count then
			bestScript = script
			greatestCount = count
		end
	end
	
	bestScript = bestScript or "None"
	
	return require("Module:scripts").getByCode(bestScript)
end

return subexport