Module:User:Theknightwho/cmn-pron

Hello, you have come here looking for the meaning of the word Module:User:Theknightwho/cmn-pron. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:Theknightwho/cmn-pron, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:Theknightwho/cmn-pron in singular and plural. Everything you need to know about the word Module:User:Theknightwho/cmn-pron you have here. The definition of the word Module:User:Theknightwho/cmn-pron will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:Theknightwho/cmn-pron, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local concat = table.concat
local error = error
local explode = require("Module:string utilities").explode_utf8
local insert = table.insert
local rawget = rawget
local rawset = rawset
local select = select
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local type = type
local _lower = string.lower
local _upper = string.upper
local ulower = string.ulower
local uupper = string.uupper

local data = mw.loadData("Module:User:Theknightwho/cmn-pron/data")
local num_to_segment = data.num_to_segment
local segment_to_num = data.segment_to_num

local export = {}

local function lower(str)
	return str and (#str > 1 and ulower or _lower)(str) or nil
end

local function upper(str)
	return str and (#str > 1 and uupper or _upper)(str) or nil
end

local function match(str, pat)
	return type(str) == "string" and str:match(pat) or nil
end

local m_parser = require("Module:parser")
local Parser = m_parser.Parser

-- Modified read method to add keep_capital parameter.
function Parser:read(delta, keep_capital)
	local this = self.text or ""
	return keep_capital and this or lower(this)
end

local Node = m_parser.Node
local Wikitext = m_parser.Wikitext

function Wikitext:iterate()
	local proxy = self:new_proxy()
	for node, parent, key in self:__pairs("next") do
		if type(node) == "string" or node.type == "syllable" then
			proxy:build(node, parent, key)
		end
	end
	return proxy.iter, proxy, 0
end

local Syllable = Node:new_class("syllable")
Syllable.next = Node.next_node

function Syllable:__index(k)
	local ret = Syllable or rawget(self, segment_to_num or k)
	return ret ~= "" and ret or nil
end

function Syllable:__newindex(k, v)
	local segment_k = segment_to_num
	rawset(self, segment_k or k, v and v or segment_k and "" or nil)
end

function Syllable:__tostring()
	return concat(self, nil, 1, 6)
end

function Syllable:normalize_keys(raw_syl)
	local i, len = 0, raw_syl.len
	while i < len do
		i = i + 1
		local v = raw_syl
		self[
			v == "o" and (
				self.nucleus and "glide2" or "nucleus"
			) or
			match(v, "\188?$") and ( -- iuüwy (final char)
				(self.glide1 or self.nucleus) and "glide2" or "glide1"
			) or
			match(v, "^\170?$") and "nucleus" or -- aeê
			match(v, "^") and (
				i > 1 or
				len == 1 or
				(len == 2 and match(raw_syl, "r"))
			) and "nasal" or
			i > 1 and match(v, "r") and "erhua" or
			"initial"
		] = v
	end
	self.tone = raw_syl.tone or 5
	return self
end

function Syllable:check_invalid()
	if (
		self.initial == "gn" and self.glide1 == "u" or
		not self.glide1 and (
			self.initial == "gn" or
			match(self.initial, "")
		)
	) then
		error("'" .. self.initial .. self.nucleus .. "' is not valid in pinyin.")
	elseif (
		self.nucleus == "o" and self.glide2 == "i" or
		match(self.nucleus, "^\170?$") and match(self.glide2, "")
	) then
		error("'" .. self.nucleus .. self.glide2 .. "' is not valid in pinyin.")
	end
	return self
end

function Syllable:normalize_glide1()
	local glide1 = self.glide1
	if not glide1 then
		if (
			self.nucleus == "o" and
			not (self.glide2 or self.nasal) and
			match(self.initial, "")
		) then
			self.glide1 = "u"
		end
	elseif match(glide1, "^y?i?$") then
		self.glide1 = "i"
	elseif match(glide1, "^w?u?$") then
		self.glide1 = match(self.initial, "") and "ü" or "u"
	elseif match(glide1, "^y??\188?$") then
		self.glide1 = "ü"
	else -- "wi" and "wü" are too weird to try to correct
		error("'" .. glide1 .. "' is not valid in pinyin.")
	end
	return self
end

function Syllable:normalize_e_nucleus()
	if self.glide2 or (self.glide1 and not self.nasal) then
		self.nucleus = "ê"
	end
end

function Syllable:normalize_o_nucleus()
	if self.glide2 or not self.nasal then
		return
	elseif match(self.glide1, "^\188?$") then
		self.glide1 = "ü"
		self.nucleus = "e"
	elseif self.initial or self.glide1 then -- not "on(g)"
		self.glide1 = "u"
		self.nucleus = "e"
	end
end

function Syllable:normalize_implicit_nucleus()
	if self.glide2 then
		if self.nasal then
			self.glide1 = "ü"
			self.nucleus = "e"
			self.glide2 = nil
		elseif self.glide1 == "i" then
			self.nucleus = "o"
		else
			self.nucleus = "ê"
		end
	elseif self.nasal then
		self.nucleus = "e"
	elseif self.glide1 == "i" and match(self.initial, "") then
		self.glide1 = nil
		self.nucleus = "ɨ"
	end
end

function Syllable:convert(funcs)
	local output = {}
	for i = 1, 7 do
		funcs(self, self, output)
	end
	if self.capitalize then
		output = output:gsub("^*", upper)
	end
	return concat(output)
end

function Syllable:new(raw_syl)
	local syl = setmetatable({"", "", "", "", "", ""}, Syllable)
		:normalize_keys(raw_syl)
		:check_invalid()
		:normalize_glide1()
	if syl.glide2 == "o" then
		syl.glide2 = "u"
	elseif syl.nucleus == "e" then
		syl:normalize_e_nucleus()
	elseif syl.nucleus == "o" then
		syl:normalize_o_nucleus()
	elseif syl.glide1 and not syl.nucleus then
		syl:normalize_implicit_nucleus()
	end
	if raw_syl.capitalize then
		syl.capitalize = true
	end
	return syl
end

do
	local tones = data.raw_tones
	
	local handle_initial
	local handle_glide1
	local handle_nucleus
	local handle_glide2
	local handle_nasal
	local handle_erhua
	local handle_number
	
	function handle_initial(self, this)
		self.n.handler = handle_glide1
		if not match(this, "^\139?$") then -- bcdfghjklmnŋpqrstvxz
			return self:consume()
		end
		local nxt = self:read(1)
		if (
			match(this, "^\139?$") and
			(tones or match(nxt, ""))
		) then
			self.n.handler = handle_nasal
			return self:consume()
		elseif match(this, "") and match(nxt, "^\130?$") then -- h + circumflex
			self:advance()
			this = this .. "h"
			nxt = self:read(1)
		elseif (
			this == "n" and nxt == "g" or
			(
				this == "g" and nxt == "n" and
				match(self:read(2), "^\139?$") -- aeimnŋou
			) -- not *gng etc.
		) then
			self:advance()
			this = this .. nxt
			nxt = self:read(1)
		elseif this == "ŋ" then
			this = "ng"
		end
		self:emit(this)
		if tones or match(nxt, "") then
			self:advance()
			self.n.tone = tones or tonumber(nxt == "0" and 5 or nxt)
			return self:pop()
		end
	end
	
	function handle_glide1(self, this)
		self.n.handler = handle_nucleus
		if not match(this, "") then
			return self:consume()
		end
		local nxt = self:read(1)
		if match(this, "") then
			if match(nxt, "") then
				self:advance()
				if nxt == "u" and self:read(1) == "\204\136" then -- diaeresis
					this = this .. "ü"
					self:advance()
				else
					this = this .. nxt
				end
				nxt = self:read(1)
			end
			self:emit(this)
		elseif this == "i" then
			self:emit(this)
		elseif this == "u" then
			if nxt == "\204\136" then -- diaeresis
				this = "ü"
				self:advance()
				nxt = self:read(1)
			end
			self:emit(this)
		end
		if tones then
			self:advance()
			self.n.tone = tones
		end
	end
	
	function handle_nucleus(self, this)
		self.n.handler = handle_glide2
		local nxt = self:read(1)
		if self.n.tone and (
			tones or
			nxt == "\204\130" and tones or -- circumflex
			match(nxt, "")
		) then
			self:advance(-1)
			return self:pop()
		elseif not match(this, "") then
			return self:consume()
		elseif this == "e" and self:read(1) == "\204\130" then -- circumflex
			this = "ê"
			self:advance()
		end
		self:emit(this)
		nxt = self:read(1)
		if tones then
			self:advance()
			self.n.tone = tones
		end
	end
	
	function handle_glide2(self, this)
		self.n.handler = handle_nasal
		local nxt = self:read(1)
		if (
			self.n.tone and (tones or match(nxt, "")) or
			this == "i" and match(self:emitted(), "i$") or
			this == "u" and match(self:emitted(), "\188?$") -- uü
		) then
			self:advance(-1)
			return self:pop()
		elseif match(this, "") then
			self:emit(this)
		else
			return self:consume()
		end
		if tones then
			self:advance()
			self.n.tone = tones
		end
	end
	
	function handle_nasal(self, this)
		self.n.handler = handle_erhua
		if not match(this, "^\139?$") then
			return self:consume()
		end
		local emitted = self:emitted()
		local nxt = self:read(1)
		if (
			match(emitted, "^") or
			match(nxt, "") or
			(tones) and match(emitted, "^?$") or -- aeêiouü
			self.n.tone and (tones or match(nxt, ""))
		) then
			self:advance(-1)
			return self:pop()
		elseif tones then
			self:advance()
			self.n.tone = tones
			nxt = self:read(1)
		end
		if (
			this == "n" and nxt == "g" and
			not match(self:read(2), "^?$") -- aeiou + tones
		) then
			this = "ng"
			self:advance()
		elseif this == "ŋ" then
			this = "ng"
		end
		self:emit(this)
	end
	
	function handle_erhua(self, this)
		self.n.handler = handle_number
		local nxt = self:read(1)
		if this ~= "r" and (this ~= "'" or nxt ~= "r") then
			return self:consume()
		elseif (
			self.n.tone and (tones or match(nxt, "")) or
			match(
				this == "'" and self:read(2) or nxt,
				"^?$" -- aeiou + tones
			)
		) then
			self:advance(-1)
			return self:pop()
		elseif this == "r" and (#self.n ~= 1 or self.n ~= "e") then
			this = "'r"
		elseif this == "'" and nxt == "r" then
			this = "'r"
			self:advance()
		end
		self:emit(this)
	end
	
	function handle_number(self, this)
		if not match(this, "^$") then
			self:advance(-1)
			return self:pop()
		end
		self.n.tone = tonumber(this == "0" and 5 or this)
		return self:pop()
	end
	
	function Parser:do_syllable(capitalize)
		self.n.capitalize = capitalize and true or nil
		rawset(self.n, "handler", handle_initial)
	end
end

do
	local function handle_syllable_break(self, this)
		if this ~= " " and this ~= "-" and this ~= "'" then
			self.n.override = nil
			return self:consume()
		end
	end
	
	local function main_handler(self, this)
		if match(this, "^\139?$") then
			a = true
			self:emit(Syllable:new(self:get(
				"do_syllable",
				self.n.allow_capital and match(self:read(0, true), "^\138?$")
			)))
			self.n.allow_capital = nil
		elseif this == " " or this == "-" then
			self:emit(this)
			self.n.allow_capital = true
			self.n.override = handle_syllable_break
		elseif this == "'" then
			self.n.override = handle_syllable_break
		elseif this == "" then
			return self:pop()
		else
			error("Invalid character (" .. this .. ") at position " .. self.head .. ".")
		end
	end
	
	function Parser:do_parse()
		self.n.allow_capital = true
		rawset(self.n, "handler", main_handler)
	end
	
	function export.normalize(text)
		return (select(2, Parser:parse{
			text = explode(toNFD(text)),
			node = {Wikitext, true},
			route = {"do_parse"}
		}))
	end
end

function Wikitext:convert(funcs)
	self.output = {}
	for i, syl, proxy in self:iterate() do
		--iteration(syl, prev)
	end
--	return output
end

local Converter = {}

do
	local function no_op(self, this, output)
		if this ~= "" then
			insert(output, this)
		end
	end
	
	function Converter:__index(k)
		return rawget(self, num_to_segment) or no_op
	end
end

function Converter:new()
	return setmetatable({}, Converter)
end

do
	local pinyin = Converter:new()
	local tones = data.pinyin_tones
	local tone_priority = data.pinyin_tone_priority
	
	function pinyin.glide1(self, this, output)
		if this == "" then
			return
		elseif self.nucleus == "e" and not self.glide2 and self.nasal == "ng" then
			if this == "u" and self.initial then
				return
			elseif this == "ü" then
				this = "i"
			end
		end
		if not self.initial then
			insert(output, this == "u" and "w" or "y")
			if this == "ü" then
				insert(output, "u")
			elseif not (self.nucleus or self.glide2 or self.nasal) then
				insert(output, this)
			end
			return
		elseif (
			this == "u" and
			self.nucleus == "o" and
			not (self.glide2 or self.nasal) and
			match(self.initial, "")
		) then
			return
		elseif this == "ü" and match(self.initial, "") then
			this = "u"
		end
		insert(output, this)
	end
	
	function pinyin.nucleus(self, this, output)
		if this == "" then
			return
		elseif this == "e" and self.glide1 and not self.glide2 and self.nasal then
			if self.glide1 == "u" and not self.initial then
				this = "e"
			elseif (
				self.nasal == "ng" and
				(self.glide1 == "u" or self.glide1 == "ü")
			) then
				this = "o"
			elseif (
				self.glide1 == "i" and
				not (self.initial or self.glide2) and
				self.nasal
			) then
				this = "i"
			else
				return
			end
		elseif this == "ê" then
			if (
				(self.initial or self.glide1 == "ü") and
				self.glide2 == "i" and
				not self.nasal and
				(self.glide1 == "u" or self.glide1 == "ü")
			) then
				return
			elseif self.glide2 or (self.glide1 and not self.nasal) then
				this = "e"
			end
		elseif (
			this == "o" and
			self.initial and
			self.glide1 == "i" and
			self.glide2 == "u" and
			not self.nasal
		) then
			return
		elseif this == "ɨ" then
			this = "i"
		end
		insert(output, this)
	end
	
	function pinyin.glide2(self, this, output)
		if this == "" then
			return
		elseif this == "u" and self.nucleus == "a" then
			this = "o"
		end
		insert(output, this)
	end
	
	function pinyin.erhua(self, this, output)
		if this == "" then
			return
		elseif this == "'r" and (
			self.glide1 or
			self.nucleus ~= "e" or
			self.glide2 or
			self.nasal
		) then
			this = "r"
		end
		insert(output, this)
	end
	
	function pinyin.tone(self, this, output)
		local best, pos = 0
		for i = 1, #output do
			local score = tone_priority] or 0
			if score >= best then
				best = score
				pos = i
			end
		end
		output = output:gsub(
			"^*",
			"%0" .. (tones or "")
		)
		if pos == 1 or output == "ng" or output == "gn" then
			insert(output, 1, "'")
		end
	end
	
	local function iteration(syl, output, prev)
		if type(syl) == "string" then
			insert(output, syl)
			return
		end
		syl = syl:convert(pinyin)
		if (
			syl:sub(1, 1) == "'" and
			(not prev or prev == " " or prev == "-")
		) then
			syl = syl:sub(2)
		end
		insert(output, syl)
	end
	
	function export.pinyin(text)
		local output, prev = {}
		for syl in text:iterate() do
			iteration(syl, output, prev)
			prev = syl
		end
		return toNFC(concat(output))
	end
end

do
	local zhuyin = Converter:new()
	local letters = data.zhuyin_letters
	local compounds = data.zhuyin_compounds
	local tones = data.zhuyin_tones
	
	function zhuyin.initial(self, this, output)
		if this == "" then
			return
		end
		insert(output, letters)
	end
	
	function zhuyin.glide1(self, this, output)
		if this == "" or (
			this == "u" and
			self.nucleus == "o" and
			not (self.glide2 or self.nasal) and
			match(self.initial, "")
		) then
			return
		end
		insert(output, letters)
	end
	
	zhuyin.nucleus = zhuyin.initial
	
	function zhuyin.glide2(self, this, output)
		if this == "" then
			return
		end
		this = letters
		local prev = output
		if prev then
			local compound = compounds
			if compound and compound ~= this then
				output = compound
				return
			end
		end
		insert(output, this)
	end
	
	zhuyin.nasal = zhuyin.glide2
	
	function zhuyin.erhua(self, this, output)
		if this == "r" then
			output = "ㄦ"
		elseif this == "'r" then
			insert(output, "ㄦ")
		end
	end
	
	function zhuyin.tone(self, this, output)
		if this == 5 then
			insert(output, 1, "˙")
		elseif self.erhua == "'r" then
			insert(output, #output, tones)
		else
			insert(output, tones)
		end
	end
	
	local function iteration(syl, output, prev)
		if type(syl) == "string" then
			insert(output, syl)
			return
		end
		syl = syl:convert(zhuyin)
		if syl == "ㄦ" and prev.tone == 1 and not prev.erhua then
			syl = "ㄦˉ"
		end
		insert(output, syl)
	end
	
	function export.zhuyin(text)
		local output, prev = {}
		for syl in text:iterate() do
			iteration(syl, output, prev)
			prev = syl
		end
		return toNFC(concat(output))
	end
end

return export