Module:za-pron

Hello, you have come here looking for the meaning of the word Module:za-pron. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:za-pron, but we will also tell you about its etymology, its characteristics and you will know how to say Module:za-pron in singular and plural. Everything you need to know about the word Module:za-pron you have here. The definition of the word Module:za-pron will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:za-pron, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

local export = {}

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local match = m_str_utils.match
local reverse = m_str_utils.reverse
local upper = m_str_utils.upper

local lang = require("Module:languages").getByCode("za")

-- FIXME: needs rewrite 
-- FIXME: 老壯文 seems to omit marks tones from new Mandarin borrowings (])

-- https://en.wikipedia.orghttps://dictious.com/en/Standard_Zhuang
-- https://baike.baidu.com/item/壮语/7703463
-- 在线学壮文 https://web.archive.org/web/0/http://www.gxmyw.com.cn/plus/list.php?tid=21
-- 基础壮文学习系列:壮文标点符号与书写规则 https://web.archive.org/web/0/http://www.gxmyw.com.cn/wsxzw/2013/1017/57.html

local initialConv = {
	   = 'p',
	  = 'ɓ',
	   = 'm',
	   = 'f',
	   = 'β',
	  = 'pʲ',
	  = 'mʲ',

	   = 't',
	  = 'ɗ',
	   = 'n',
	   = 'l',
	   = 'θ',

	  = 'ɲ',
	   = 'ɕ',
	   = 'j',

	   = 'k',
	  = 'ŋ',
	   = 'ɣ',
	  = 'kʲ',
	 = 'ŋʷ',
	  = 'kʷ',

	    = 'ʔ',
	   = 'h',
}
	-- ???

local vowelConv = {
	   = { alone = 'a', wfinal = 'aː' },
	   = { alone = 'e', wfinal = 'eː' },
	   = { alone = 'i', wfinal = 'i' },
	   = { alone = 'o', wfinal = 'oː' },
	   = { alone = 'u', wfinal = 'u' },
	   = { alone = 'ɯ', wfinal = 'ɯ' },

	  = { alone = 'aːi', wfinal = false },
	  = { alone = 'ei', wfinal = false },
	  = { alone = 'oːi', wfinal = false },
	  = { alone = 'uːi', wfinal = false },
	  = { alone = 'ɯːi', wfinal = false },

	  = { alone = 'ai', wfinal = 'a' },
	  = { alone = false, wfinal = 'iː' },
	  = { alone = false, wfinal = 'o' },
	  = { alone = false, wfinal = 'uː' },
	  = { alone = false, wfinal = 'ɯː' },

	  = { alone = 'aːu', wfinal = false },
	 = { alone = 'au', wfinal = false },
	  = { alone = 'eːu', wfinal = false },
	  = { alone = 'iu', wfinal = false },
	  = { alone = 'ou', wfinal = false },

	  = { alone = 'aɯ', wfinal = false },
}
	-- ??
	-- w/ final only: e?
	-- cannot be w/ final: ai, ei, oi, ui, wi, au, aeu, eu, iu, ou, aw // e?
	-- cannot be w/o final: ie, oe, ue // e

local finalConv = {
	   = '',
	  = 'm',

	  = 'n',
	 = 'ŋ',
	  = 'p',
	  = 'p',
	  = 't',
	  = 't',
	  = 'k',
	  = 'k',
}
	-- ?g?

local toneConv = {
	   = '˨˦', --24
	  = '˧˩', --31 z
	  = '˥', --55 j
	  = '˦˨', --42 x
	  = '˧˥', --35 q
	  = '˧', --33 h

	  = '˥', --55
	 = '˧˥', --35
	  = '˧', --33
}

local toneConvToNumbers = {
	   = '1',
	  = '2',
	  = '3',
	  = '4',
	  = '5',
	  = '6',
}

local toneConvFromNumbers = {
	  = '',
	  = 'z',
	  = 'j',
	  = 'x',
	  = 'q',
	  = 'h',

	  = '',
	  = '',
	  = '',
}

local consonantConv_1957 = {
	  = 'ƃ',
	  = 'ƌ',
	  = 'ŋ',
	 = 'ŋv',
}

local vowelConv_1957 = {
	 = 'ɵ',
	 = 'ə',
	  = 'ɯ',
}

local toneConv_1957 = {
	  = '',
	 = 'ƨ',
	 = 'з',
	 = 'ч',
	 = 'ƽ',
	 = 'ƅ',

	  = '',
	  = '',
	  = '',
}

local function fix(text)
	local output = {}

	for word in gmatch(text, '\'?+*') do
		local apostrophe, word, nonword = match(word, '(\'?)(+)(*)')

		word = gsub(word, '', toneConvToNumbers) -- excludes h which is ambiguously tone or consonant

		-- /CV-CV/...=<CVCV>...
		-- /CVC-V/...=<CVC'V>...
		-- regex (pattern?) wildcards are greedy from the beginning of the string
		-- so counteract this by reversing the string
		-- so if we look for "()" it will first match what was originally the last CVC sequence
		-- (or something)
		word = reverse(word)
		word = '|' .. gsub(word, '(g??)(??+)(???)', '%1%2%3|')
		-- "+" seems to be needed after ""
		-- correct: "daeuz"→"daeuz" wrong: "daeuz"→"da|euz"
		word = reverse(word)
		mw.log('za1>' .. word)

		-- fix bad initial consonant: "|hya"→"h|ya", "|ngya"→"n|gya"
		word = gsub(word, '(|)()()(?)()', function(x,a,b,c,d)
			if not initialConv then
				return a..x..b..c..d
			end
		end)
		word = gsub(word, '(+)(g?)(|)', function(v,c,x)
			-- if there is a final consonant,
			if c ~= '' then
				-- and vowel sequence is not a sequence that only appears before finals,
				if not match(v, '^e?$') then
					-- detect valid ...VC sequence at end of string
					return reverse(gsub(reverse(v..c..x), '(|)(+)(e?)', '%1%2%3|'))
				end
			end
		end)
		word = gsub(word, '|gvu', 'g|vu')
		mw.log('za2>' .. word)

		word = gsub(word, 'h|', '6|')
		word = gsub(word, '(+)|', function(a)
			if match(a, '$') then
				return a..'7|'
			elseif match(a, '$') and not match(a, 'ng$') then
				return a..'8|'
			else
				return a..'1|'
			end
		end)
		mw.log('za3>' .. word)

		table.insert(output, apostrophe .. gsub(word, '|', '') .. nonword)
	end

	return table.concat(output)
end

function export.convert(text, scheme, new_bor)
	if type(text) == "table" then
		text, scheme, new_bor = text.args, text.args, text.args
	end
	local converted = {}

	local extra_pre = match(text, '^*')

	text = fix(text)

	mw.log('za4>' .. text)

	for syllable in gmatch(text, '+%d*') do
		local initial, vowel, final, tone, extra = match(syllable, '^(???)(??)(?g?)(%d)(*)$')
		
		local caps = false
		mw.log('za5>' .. initial, vowel, final, tone, extra)

		if find(initial .. vowel .. final, '') then
			caps = true
			initial, vowel, final = lower(initial), lower(vowel), lower(final)
		end

		if scheme == 'IPA' then
			initial = initialConv
			vowel = final == '' and vowelConv.alone or vowelConv.wfinal
			final = finalConv
			if tone == '7' and find(vowel, 'ː') then
				tone = '7:'
			elseif new_bor and tone == '1' then
				tone = '5'
			end

			tone = toneConv

			syllable = initial .. vowel .. final .. tone

			table.insert(converted, syllable)
		elseif scheme == 'old' then
			initial = consonantConv_1957 or initial
			vowel = gsub(vowel, 'e', vowelConv_1957)
			vowel = gsub(vowel, 'w', vowelConv_1957)
			final = consonantConv_1957 or final
			tone = toneConv_1957

			if vowel == 'ə' and final == '' then
				vowel = 'əi'
			elseif vowel == 'aɯ' and final == '' then
				vowel = 'əɯ'
			end

			syllable = initial .. vowel .. final .. tone .. extra
			if caps then syllable = gsub(syllable, '^(.)', upper) end

			table.insert(converted, syllable)
		elseif scheme == 'hyphenation' then
			tone = toneConvFromNumbers

			extra = gsub(extra, '\'', '')
			syllable = initial .. vowel .. final .. tone .. extra
			if caps then syllable = gsub(syllable, '^(.)', upper) end

			table.insert(converted, syllable)
		elseif scheme == 'tone_numbers' then
			if new_bor and tone == '1' then
				tone = '5'
			end

			extra = gsub(extra, '\'', '')
			syllable = initial .. vowel .. final .. '<sup>' .. tone .. '</sup>' .. extra
			if caps then syllable = gsub(syllable, '^(.)', upper) end

			table.insert(converted, syllable)
		elseif scheme == 'raw_syllables' then
			table.insert(converted, syllable)
		else
			error('Convert to what representation?')
		end
	end

	if scheme == 'IPA' then
		converted = '/' .. table.concat(converted, ' ') .. '/'
	elseif scheme == 'old' then
		converted = extra_pre .. table.concat(converted, '')
		converted = mw.ustring.gsub(mw.ustring.gsub(converted, "()'", "%1"), "()&#39;", "%1")
	elseif scheme == 'hyphenation' then
		converted = gsub(extra_pre .. table.concat(converted, '‧'), ' ', '')
	elseif scheme == 'tone_numbers' then
		converted = extra_pre .. table.concat(converted, '')
	elseif scheme == 'raw_syllables' then
		-- (pass)
	end

	return converted
end

function export.show(frame)
	local params = {
		 = { },
		 = { type = "boolean" },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)

	local text, new_bor = args, args
	if not text then text = mw.title.getCurrentTitle().text end

	local ret = {}

	table.insert(
		ret,
		require("Module:accent qualifier").format_qualifiers(lang, {"Standard Zhuang"}) ..
		" " ..
		require("Module:IPA").format_IPA_full {
			lang = lang,
			items = {{ pron = export.convert(text, "IPA", new_bor) }}
		}
	)

	table.insert(
		ret,
		'Tone numbers: ' ..
		export.convert(text, 'tone_numbers', new_bor)
	)

	table.insert(
		ret,
		'Hyphenation: ' ..
		export.convert(text, 'hyphenation', new_bor) ..
		']'
	)

	return table.concat(ret, '\n* ')
end

function export.is_latin(frame)
	local text = frame.args
	if find(text, '') then
		return ''
	elseif find(text, '') then
		return 'y'
	else
		return '' -- CJK is too much of a pain to detect
	end
end

return export