Module:User:RichardW57/20230306/Module:headword

Hello, you have come here looking for the meaning of the word Module:User:RichardW57/20230306/Module:headword. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:RichardW57/20230306/Module:headword, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:RichardW57/20230306/Module:headword in singular and plural. Everything you need to know about the word Module:User:RichardW57/20230306/Module:headword you have here. The definition of the word Module:User:RichardW57/20230306/Module:headword will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:RichardW57/20230306/Module:headword, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}

local rfind = mw.ustring.find
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local unfc = mw.ustring.toNFC
local uupper = mw.ustring.upper

local m_data = mw.loadData("Module:headword/data")

local title = mw.title.getCurrentTitle()

local isLemma = m_data.lemmas
local isNonLemma = m_data.nonlemmas
local notranslit = m_data.notranslit
local toBeTagged = m_data.toBeTagged

-- If set to true, categories always appear, even in non-mainspace pages
local test_force_categories = false

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local function track(page, code)
	local tracking_page = "headword/" .. page
	if code then
		require("Module:debug/track"){tracking_page, tracking_page .. "/" .. code}
	else
		require("Module:debug/track")(tracking_page)
	end
	return true
end

local function test_script(text, script_code)
	if type(text) == "string" and type(script_code) == "string" then
		local sc = require("Module:scripts").getByCode(script_code)
		local characters
		if sc then
			characters = sc:getCharacters()
		end

		local out
		if characters then
			text = rsub(text, "%W", "")
			out = rfind(text, "")
		end

		if out then
			return true
		else
			return false
		end
	else
		mw.log("Parameters to test_script were incorrect.")
		return nil
	end
end


local spacingPunctuation = "+"
--[[ List of punctuation or spacing characters that are found inside of words.
	 Used to exclude characters from the regex above. ]]
local wordPunc = "-־׳״'.·*’་•:"
local notWordPunc = "+"


-- Return true if the given head is multiword according to the algorithm used
-- in full_headword().
function export.head_is_multiword(head)
	for possibleWordBreak in rgmatch(head, spacingPunctuation) do
		if rfind(possibleWordBreak, notWordPunc) then
			return true
		end
	end

	return false
end


-- Add links to a multiword head.
function export.add_multiword_links(head)
	local function workaround_to_exclude_chars(s)
		return rsub(s, notWordPunc, "]]%1[[")
	end

	head = "]"
	--[=[
	use this when workaround is no longer needed:

	head = "]%1]"

	Remove any empty links, which could have been created above
	at the beginning or end of the string.
	]=]
	head = head:gsub("%%]", "")
	return head
end


local function non_categorizable()
	return (title:inNamespace("") and title.text:find("^Unsupported titles/"))
		or (title:inNamespace("Appendix") and title.text:find("^Gestures/"))
end


local function preprocess(data, postype)
	--[=[
	]
	]
	]=]
	if type(data.heads) ~= "table" then
		if data.heads then
			track("heads-not-table")
		end

		data.heads = { data.heads }
	end

	if type(data.translits) ~= "table" then
		if data.translits then
			track("translits-not-table")
		end

		data.translits = { data.translits }
	end

	if type(data.transcriptions) ~= "table" then
		if data.transcriptions then
			track("transcriptions-not-table")
		end

		data.transcriptions = { data.transcriptions }
	end

	if not data.heads or #data.heads == 0 then
		data.heads = {""}
	end

	-- Determine if term is reconstructed
	local is_reconstructed = false --data.lang:getType() == "reconstructed"
	--	or title.nsText == "Reconstruction"

	-- Create a default headword.
	local subpagename = title.subpageText:gsub("^Unsupported titles/", "")
	local pagename = title.text
	local default_head
	if is_reconstructed then
		default_head = require("Module:utilities").plain_gsub(pagename, data.lang:getCanonicalName() .. "/", "")
	else
		default_head = subpagename
	end

	local unmodified_default_head = default_head

	-- Add links to multi-word page names when appropriate
	if not m_data.no_multiword_links and not is_reconstructed and export.head_is_multiword(default_head) then
		default_head = export.add_multiword_links(default_head)
	end

	if is_reconstructed then
		default_head = "*" .. default_head
	end

	-- If using a discouraged character sequence, add to maintenance category
	for i, script in ipairs(data.sc) do
		if script:hasNormalizationFixes() == true then
			if script:fixDiscouragedSequences(unfc(data.heads)) ~= unfc(data.heads) then
				table.insert(data.categories, "Pages using discouraged character sequences")
			end
		end
	end

	-- If a head is the empty string "", then replace it with the default
	for i, head in ipairs(data.heads) do
		if head == "" then
			head = default_head
		else
			if head == default_head and data.lang:getCanonicalName() == "English" then
				table.insert(data.categories, data.lang:getCanonicalName() .. " terms with redundant head parameter")
			end
		end
		data.heads = head
	end

	-- If the first head is multiword (after removing links), maybe insert into "LANG multiword terms"
	if (not data.nomultiwordcat) and postype == "lemma" and data.sc:hasSpaces() and (not m_data.no_multiword_cat) then
		-- Check for spaces or hyphens, but exclude prefixes and suffixes.
		-- Use the pagename, not the head= value, because the latter may have extra
		-- junk in it, e.g. superscripted text that throws off the algorithm.
		local checkpattern = ".."
		if m_data.hyphen_not_multiword_sep then
			-- Exclude hyphens if the data module states that they should for this language
			checkpattern = ".."
		end
		if rfind(unmodified_default_head, checkpattern) and not non_categorizable() then
			table.insert(data.categories, data.lang:getCanonicalName() .. " multiword terms")
		end
	end

	for i, val in pairs(data.translits) do
		data.translits = {display = val, is_manual = true}
	end

	-- Make transliterations
	for i, head in ipairs(data.heads) do
		local translit = data.translits

		-- Try to generate a transliteration if necessary
		if translit and translit.display == "-" then
			translit = nil
		elseif not translit and not notranslit and data.sc:isTransliterated() then
			translit = (data.lang:transliterate(require("Module:links").remove_links(head), data.sc))
			translit = translit and mw.text.trim(translit)

			-- There is still no transliteration?
			-- Add the entry to a cleanup category.
			if not translit then
				translit = "<small>transliteration needed</small>"
				table.insert(data.categories, "Requests for transliteration of " .. data.lang:getCanonicalName() .. " terms")
			end

			if translit then
				translit = {display = translit, is_manual = false}
			end
		end

		-- Link to the transliteration entry for languages that require this
		if translit and data.lang:link_tr() then
			translit.display = require("Module:links").full_link{
				term = translit.display,
				lang = data.lang,
				sc = require("Module:scripts").getByCode("Latn"),
				tr = "-"
				}
		end

		data.translits = translit
	end

	if data.id and type(data.id) ~= "string" then
		error("The id in the data table should be a string.")
	end
end


-- Format a headword with transliterations
local function format_headword(data)
	local m_scriptutils = require("Module:script utilities")

	-- Are there non-empty transliterations?
	-- Need to do it this way because translit might be nil while translit is not
	local has_translits = false
	local has_manual_translits = false

	-- Format the headwords
	for i, head in ipairs(data.heads) do
		if data.translits or data.transcriptions then
			has_translits = true
		end
		if data.translits and data.translits.is_manual or data.transcriptions then
			has_manual_translits = true
		end

		-- Apply processing to the headword, for formatting links and such
		if head:find(" or data.sc:getCode() ~= "Imag") then
			head = require("Module:links").language_link({term = head, lang = data.lang}, false)
		else
			head = data.lang:makeDisplayText(head, data.sc, true)
		end

		-- Add language and script wrapper
		if i == 1 then
			head = m_scriptutils.tag_text(head, data.lang, data.sc, "head", nil, data.id)
		else
			head = m_scriptutils.tag_text(head, data.lang, data.sc, "head", nil)
		end

		data.heads = head
	end

	local translits_formatted = ""

	if has_manual_translits then
		-- ]
		-- ]
		track("has-manual-translit", data.lang:getCode())
	end

	if has_translits then

		-- Format the transliterations
		local translits = data.translits
		local transcriptions = data.transcriptions

		if translits then
			-- using pairs() instead of ipairs() in case there is a gap
			for i, _ in pairs(translits) do
				if type(i) == "number" then
					translits = m_scriptutils.tag_translit(translits.display, data.lang:getCode(), "head", nil, translits.is_manual)
				end
			end
		end

		if transcriptions then
			for i, _ in pairs(transcriptions) do
				if type(i) == "number" then
					transcriptions = m_scriptutils.tag_transcription(transcriptions, data.lang:getCode(), "head")
				end
			end
		end

		for i = 1, math.max(#translits, #transcriptions) do
			local translits_formatted = {}
			table.insert(translits_formatted, translits and translits or "")
			table.insert(translits_formatted, (translits and transcriptions) and " " or "")
			table.insert(translits_formatted, transcriptions and "/" .. transcriptions .. "/" or "")
			data.translits = table.concat(translits_formatted)
		end

		translits_formatted = " (" .. table.concat(data.translits, " <i>or</i> ") .. ")"

		local transliteration_page = mw.title.new(data.lang:getCanonicalName() .. " transliteration", "Wiktionary")

		if transliteration_page then
			local success, exists = pcall(function () return transliteration_page.exists end)
			if success and exists then
				translits_formatted = " ]" .. translits_formatted
			end
		end
	end

	return table.concat(data.heads, " <i>or</i> ") .. translits_formatted
end


local function format_genders(data)
	if data.genders and #data.genders > 0 then
		local pos_for_cat
		if not data.nogendercat and not m_data.no_gender_cat then
			local pos_category = data.pos_category:gsub("^reconstructed ", "")
			pos_for_cat = m_data.pos_for_gender_number_cat
		end
		local text, cats = require("Module:gender and number").format_genders(data.genders, data.lang, pos_for_cat)
		for _, cat in ipairs(cats) do
			table.insert(data.categories, cat)
		end
		return "&nbsp;" .. text
	else
		return ""
	end
end


local function format_inflection_parts(data, parts)
	for j, part in ipairs(parts) do
		if type(part) ~= "table" then
			part = {term = part}
		end

		local left_qualifiers, right_qualifiers
		local reftext

		left_qualifiers = part.q and #part.q > 0 and part.q or part.qualifiers and #part.qualifiers > 0 and part.qualifiers
		if left_qualifiers then
			left_qualifiers = require("Module:qualifier").format_qualifier(left_qualifiers) .. " "

			-- ]
			track("qualifier")
		end
		right_qualifiers = part.qq and #part.qq > 0 and part.qq
		if right_qualifiers then
			right_qualifiers = " " .. require("Module:qualifier").format_qualifier(right_qualifiers)

			-- ]
			track("qualifier")
		end
		if part.refs and #part.refs > 0 then
			local refs = {}
			for _, ref in ipairs(part.refs) do
				if type(ref) ~= "table" then
					ref = {text = ref}
				end
				local refargs
				if ref.name or ref.group then
					refargs = {name = ref.name, group = ref.group}
				end
				table.insert(refs, mw.getCurrentFrame():extensionTag("ref", ref.text, refargs))
			end
			reftext = table.concat(refs)
		end

		local partaccel = part.accel
		local face = part.hypothetical and "hypothetical" or "bold"

		-- Here the final part 'or data.nolink' allows to have 'nolink=true'
		-- right into the 'data' table to disable links of the entire headword
		-- when inflected forms aren't entry-worthy, e.g.: in Vulgar Latin
		local nolink = part.hypothetical or part.nolink or data.nolink

		local separator = part.separator or j > 1 and " <i>or</i> " -- use "" to request no separator

		if part.label then
			-- There should be a better way of italicizing a label. As is, this isn't customizable.
			part = "<i>" .. part.label .. "</i>"
		else
			-- Convert the term into a full link
			-- Don't show a transliteration here, the consensus seems to be not to
			-- show them in headword lines to avoid clutter.
			part = require("Module:links").full_link(
				{
					term = not nolink and part.term or nil,
					alt = part.alt or (nolink and part.term or nil),
					lang = part.lang or data.lang,
					sc = part.sc or parts.sc or (not part.lang and data.sc),
					id = part.id,
					genders = part.genders,
					tr = part.translit or (not (parts.enable_auto_translit or data.inflections.enable_auto_translit) and "-" or nil),
					ts = part.transcription,
					accel = parts.accel or partaccel,
				},
				face,
				false
				)
		end

		if left_qualifiers then
			part = left_qualifiers .. part
		end
		if reftext then
			part = part .. reftext
		end
		if right_qualifiers then
			part = part .. right_qualifiers
		end
		if separator then
			part = separator .. part
		end

		parts = part
	end

	local parts_output = ""

	if #parts > 0 then
		parts_output = " " .. table.concat(parts)
	elseif parts.request then
		parts_output = " <small></small>"
			.. require("Module:utilities").format_categories(
				{"Requests for inflections in " .. data.lang:getCanonicalName() .. " entries"},
				lang,
				nil,
				nil,
				data.force_cat_output or test_force_categories,
				data.sc
				)
	end

	return "<i>" .. parts.label .. "</i>" .. parts_output
end

-- Format the inflections following the headword
local function format_inflections(data)
	if data.inflections and #data.inflections > 0 then
		-- Format each inflection individually
		for key, infl in ipairs(data.inflections) do
			data.inflections = format_inflection_parts(data, infl)
		end

		return " (" .. table.concat(data.inflections, ", ") .. ")"
	else
		return ""
	end
end


-- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
-- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
-- If you have a POS in its singular form, call pluralize() in ] to
-- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
--
-- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess
-- based on whether it ends in " forms"; otherwise, return nil.
function export.pos_lemma_or_nonlemma(plpos, best_guess)
	-- Is it a lemma category?
	if isLemma or isLemma then
		return "lemma"
	-- Is it a nonlemma category?
	elseif isNonLemma
		or isNonLemma
		or isLemma
		or isNonLemma then
		return "non-lemma form"
	elseif best_guess then
		return plpos:find(" forms$") and "non-lemma form" or "lemma"
	else
		return nil
	end
end


local function show_headword_line(data)
	local namespace = title.nsText
	local langname = data.lang:getCanonicalName()

	-- Check the namespace against the language type
--	if namespace == "" then
--		local langtype = data.lang:getType()
--		if langtype == "reconstructed" then
--			error("Entries in " .. langname .. " must be placed in the Reconstruction: namespace")
--		elseif langtype == "appendix-constructed" then
--			error("Entries in " .. langname .. " must be placed in the Appendix: namespace")
--		end
--	end

	local tracking_categories = {}

	if not data.noposcat then
		local pos_category = langname .. " " .. data.pos_category
		-- FIXME: ] Why is this special case here? Please add an explanatory comment.
		if pos_category ~= "Translingual Han characters" then
			table.insert(data.categories, 1, pos_category)
		end
	end

	if data.sccat and data.sc then
		for i, script in ipairs(data.sc) do
			table.insert(data.categories, langname .. " " .. data.pos_category
				.. " in " .. script:getDisplayForm())
		end
	end

	-- Is it a lemma category?
	local postype = export.pos_lemma_or_nonlemma(data.pos_category)
	if not postype then
		-- We don't know what this category is, so tag it with a tracking category.
		--[=[
		]
		]=]
		table.insert(tracking_categories, "head tracking/unrecognized pos")
		track("unrecognized pos", data.lang:getCode())
		track("unrecognized pos/pos/" .. data.pos_category, data.lang:getCode())
	elseif not data.noposcat then
		table.insert(data.categories, 1, langname .. " " .. postype .. "s")
	end

	-- Preprocess
	preprocess(data, postype)

	if namespace == "" -- and data.lang:getType() ~= "reconstructed"
	then
		local m_links = require("Module:links")
		for i, head in ipairs(data.heads) do
			if title.prefixedText ~= m_links.getLinkPage(m_links.remove_links(head), data.lang, data.sc) then
				--[=[
				]
				]=]
				track("pagename spelling mismatch", data.lang:getCode())
				break
			end
		end
	end

	-- Format and return all the gathered information
	return
		format_headword(data) ..
		format_genders(data) ..
		format_inflections(data) ..
		require("Module:utilities").format_categories(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end

function export.full_headword(data)
	local tracking_categories = {}

	local pagename = title.text
	local fullPagename = title.fullText
	local namespace = title.nsText

	if data.getCanonicalName then
		error("In full_headword(), the first argument `data` needs to be a Lua object (table) of properties, not a language object")
	end

	if not data.lang or type(data.lang) ~= "table" or not data.lang.getCode then
		error("In full_headword(), the first argument `data` needs to be a Lua object (table) and `data.lang` must be a language object")
	end
	
	if not data.heads then
		-- This is the same practice as the main headword template.
		track("heads-is-nil")
		data.heads = {}
	elseif type(data.heads) ~= "table" then
		error("In full_headword(), `data.heads` must be an array of heads but is a " .. type(data.heads))
	elseif data.heads == "" then
		-- This practice is nasty and needs to be eradicated.
		track("heads-is-blank-string")
		if #data.heads > 1 then
			track("heads-is-blank-string-and-multiple")
		end
	end

	-- Try to detect the script(s) if not provided.
	if data.sc then
		if data.sc._type == "script object" then
			data.sc = {data.sc}
		else
			error("No support now or ever for passing in an array of script objects in `data.sc`; this will be supported using headword objects in `data.heads`")
		end
	end
	
	-- FIXME! ] Checking for #data.sc ~= #data.heads and silently overwriting it is bad software
	-- practice. I will be ripping out and rewriting this code. --ben
	if (not data.sc) or #data.sc == 0 or (#data.sc ~= #data.heads and (#data.sc > 1 or #data.heads > 1)) then
		data.sc = {}
		if #data.heads > 1 then
			for i, head in ipairs(data.heads) do
				data.sc = data.lang:findBestScript(data.heads)
			end
		else
			data.sc = data.lang:findBestScript(data.heads ~= "" and data.heads or pagename)
		end
	else
		-- Track uses of sc parameter
		local best = data.lang:findBestScript(pagename)
		track("sc")

		if data.sc:getCode() == best:getCode() then
			track("sc/redundant", data.sc:getCode())
		else
			track("sc/needed", data.sc:getCode())
		end
	end

	-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
	-- (FIXME: Don't make assumptions like this, and if you need to do so, throw an error if the assumption is violated.)
	-- Avoid tagging ASCII as Hani even when it is tagged as Hani in the headword, as in ]. The check for ASCII
	-- might need to be expanded to a check for any Latin characters and whitespace or punctuation. Where there are
	-- multiple headwords, use the script for the first. (FIXME: ] Why are we using the script for the
	-- first headword for all of them rather than the correct per-headword script? Please either expand this comment
	-- explaining why, or correct it if it's a problem.)
	local displayTitle
	if (namespace == "" and data.sc and toBeTagged:getCode()]
			and not pagename:find "^+$")
			or (data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana")))
			or (data.sc:getCode() == "Kore" and (test_script(pagename, "Hang"))) then
		displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
	-- Keep Han entries region-neutral in the display title.
	elseif namespace == "" and data.sc and not pagename:find "^+$"
			and (data.sc:getCode() == "Hant" or data.sc:getCode() == "Hans") then
		displayTitle = '<span class="Hani">' .. pagename .. '</span>'
	elseif namespace == "Reconstruction" then
		displayTitle, matched = rsubn(
			fullPagename,
			"^(Reconstruction:+/)(.+)$",
			function(before, term)
				return before ..
					require("Module:script utilities").tag_text(
						term,
						data.lang,
						data.sc
					)
			end
		)

		if matched == 0 then
			displayTitle = nil
		end
	end

	if displayTitle then
		local frame = mw.getCurrentFrame()
		frame:callParserFunction(
			"DISPLAYTITLE",
			displayTitle
		)
	end

	if data.force_cat_output then
		--[=[
		]
		]=]
		track("force cat output")
	end

	-- Were any categories specified?
	if data.categories and #data.categories > 0 then
		local lang_name = require("Module:pattern utilities").pattern_escape(data.lang:getCanonicalName())
		for _, cat in ipairs(data.categories) do
			-- Does the category begin with the language name? If not, tag it with a tracking category.
			if not rfind(cat, "^" .. lang_name) then
				mw.log(cat, data.lang:getCanonicalName())
				table.insert(tracking_categories, "head tracking/no lang category")

				--[=[
				]
				]=]
				track("no lang category", data.lang:getCode())
			end
		end

		if not data.pos_category
			and data.categories:find("^" .. data.lang:getCanonicalName())
				then
			data.pos_category = data.categories:gsub("^" .. data.lang:getCanonicalName() .. " ", "")
			table.remove(data.categories, 1)
		end
	end

	if not data.pos_category then
		error(
			'No valid part-of-speech categories were found in the list '
			.. 'of categories passed to the function "full_headword". '
			.. 'The part-of-speech category should consist of a language\'s '
			.. 'canonical name plus a part of speech.'
			)
	end

	-- Categorise for unusual characters
	local standard = data.lang:getStandardCharacters()

	if standard then
		if ulen(title.subpageText) ~= 1 and not non_categorizable() then
			for character in rgmatch(title.subpageText, "()") do
				local upper = uupper(character)
				if not rfind(upper, "") then
					character = upper
				end
				table.insert(
					data.categories,
					data.lang:getCanonicalName() .. " terms spelled with " .. character
				)
			end
		end
	end

	-- Categorise for palindromes
	if not data.nopalindromecat and title.nsText ~= "Reconstruction" and ulen(title.subpageText) > 2
		and require("Module:palindromes").is_palindrome(
			title.subpageText, data.lang, data.sc
			) then
		table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
	end

	-- This may add more categories (e.g. gender categories), so make sure it gets
	-- evaluated first.
	local text = show_headword_line(data)
	return
		text ..
		require("Module:utilities").format_categories(
			data.categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			) ..
		require("Module:utilities").format_categories(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end

return export