Module:km-translit

Hello, you have come here looking for the meaning of the word Module:km-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:km-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:km-translit in singular and plural. Everything you need to know about the word Module:km-translit you have here. The definition of the word Module:km-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:km-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

This module will transliterate Khmer language text per WT:KM TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:km-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local toNFC = mw.ustring.toNFC
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub

local cons_conv = {
	 = { "k", "a" }, 
	 = { "kh", "a" }, 
	 = { "k", "o" }, 
	 = { "kh", "o" }, 
	 = { "ng", "o" }, 
	 = { "ch", "a" },
	 = { "chh", "a" }, 
	 = { "ch", "o" }, 
	 = { "chh", "o" }, 
	 = { "nh", "o" }, 
	 = { "d", "a" }, 
	 = { "th", "a" }, 
	 = { "d", "o" }, 
	 = { "th", "o" }, 
	 = { "n", "a" }, 
	 = { "t", "a" }, 
	 = { "th", "a" }, 
	 = { "t", "o" }, 
	 = { "th", "o" }, 
	 = { "n", "o" }, 
	 = { "b", "a" }, 
	 = { "ph", "a" }, 
	 = { "p", "o" }, 
	 = { "ph", "o" }, 
	 = { "m", "o" }, 
	 = { "y", "o" }, 
	 = { "r", "o" }, 
	 = { "l", "o" }, 
	 = { "v", "o" }, 
	 = { "sh", "a" }, 
	 = { "ss", "o" }, 
	 = { "s", "a" }, 
	 = { "h", "a" }, 
	 = { "l", "a" }, 
	 = { "ʼ", "a" }, 
	 = { "", "" },
	
	 = { "p", "a" },
}

local digraph = {
	 = "g",  = "n",  = "m",  = "l",  = "f",  = "z",
}

local indep_vowel = {
	 = "ʼĕ",  = "ʼei",
	 = "ʼŏ",  = "ʼŏk",  = "ʼŭ",  = "ʼŏu", 
	 = "rœ̆",  = "rœ",
	 = "lœ̆",  = "lœ",
	 = "ʼé",  = "ʼai",  = "ʼaô",  = "ʼaô",  = "ʼâu",
}

local vowel_conv = {
	 = {  = "â",  = "ô" }, 
	 = {  = "a",  = "éa" }, 
	 = {  = "ĕ",  = "ĭ" }, 
	 = {  = "ei",  = "i" }, 
	 = {  = "œ̆",  = "œ̆" }, 
	 = {  = "œ",  = "œ" }, 
	 = {  = "ŏ",  = "ŭ" }, 
	 = {  = "o",  = "u" }, 
	 = {  = "uŏ",  = "uŏ" }, 
	 = {  = "aeu",  = "eu" }, 
	 = {  = "eua",  = "eua" }, 
	 = {  = "iĕ",  = "iĕ" }, 
	 = {  = "é",  = "é" }, 
	 = {  = "ê",  = "ê" }, 
	 = {  = "ai",  = "ey" }, 
	 = {  = "aô",  = "oŭ" }, 
	 = {  = "au",  = "ŏu" }, 
	 = {  = "om",  = "ŭm" }, 
	 = {  = "âm",  = "um" }, 
	 = {  = "ăm",  = "ŏâm" }, 
	 = {  = "ăng",  = "eăng" }, 
	 = {  = "ăh",  = "eăh" }, 
	 = {  = "ŏh",  = "uh" }, 
	 = {  = "éh",  = "éh" }, 
	 = {  = "aŏh",  = "uŏh" }, 
	 = {  = "ĕh",  = "ĭh" },
	 = {  = "ĕh",  = "ĭh" },
	 = {  = "aʼ",  = "éaʼ" },
	 = {  = '<span style="font-color:#DCDCDC">â</span>',  = '<span style="font-color:#DCDCDC">ô</span>' },
}

local char_type = {
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "consonant",  = "consonant",  = "consonant",  = "consonant",  = "consonant", 
	 = "indep_vowel",  = "indep_vowel",  = "indep_vowel", 
	 = "indep_vowel",  = "indep_vowel",  = "indep_vowel",  = "indep_vowel",  = "indep_vowel", 
	 = "indep_vowel",  = "indep_vowel",  = "indep_vowel",  = "indep_vowel",  = "indep_vowel", 
	 = "indep_vowel",  = "indep_vowel",
	 = "vowel_sign",  = "vowel_sign",  = "vowel_sign",  = "vowel_sign",  = "vowel_sign", 
	 = "vowel_sign",  = "vowel_sign",  = "vowel_sign",  = "vowel_sign",  = "vowel_sign", 
	 = "vowel_sign",  = "vowel_sign",  = "vowel_sign", 
	 = "terminating_vowel", 
	 = "vowel_sign",  = "vowel_sign", 
	 = "terminating_vowel",  = "terminating_vowel",  = "terminating_vowel", 
	 = "consonant_shift",  = "consonant_shift", 
	 = "terminating_sign", 
	 = "sign",  = "sign",  = "sign",  = "sign",  = "sign",  = "sign", 
	 = "combining_sign", 
	 = "sign", 
	 = "punctuation",  = "punctuation", 
	 = "sign", 
	 = "punctuation",  = "punctuation",  = "punctuation",  = "punctuation",  = "punctuation", 
	 = "sign",  = "sign", 
	 = "ZWS",
}

local sp_symbols = {
	 = "0",  = "1",  = "2",  = "3",  = "4",
	 = "5",  = "6",  = "7",  = "8",  = "9",
	 = "0",  = "1",  = "2",  = "3",  = "4",
	 = "5",  = "6",  = "7",  = "8",  = "9",
}	

function export.tr(text, lang, sc)
	if not sc then
		sc = require("Module:languages").getByCode(lang):findBestScript(text)
	else
		sc = require("Module:scripts").getByCode(sc)
	end
	text = sc:fixDiscouragedSequences(text)
	text = sc:toFixedNFD(text)
	text = gsub(text, '', sp_symbols)
	text = gsub(text, '(.)្(.្.)', '%1​%2')
	text = gsub(text, '(្)()', '​%1%2')
	text = gsub(text, '()(្?)', '%1​%2')
	text = gsub(text, '(.៍)', '​%1')
	
	for word in mw.ustring.gmatch(text, '+') do
		local original_text = word
		local c, chartype, syl, curr_syl = {}, {}, {}, {}
		local progress = 'none'

		for i = 1, len(word) do
			c = sub(word, i, i)
			chartype = char_type]
		end
		
		for i = 1, #c + 1 do
			local next_types = {}
			if i == #c + 1 or chartype == 'ZWS' then
				progress = 'none'
				table.insert(syl, table.concat(curr_syl, ""))
				curr_syl = {}
			elseif progress == 'none' then
				if chartype == 'consonant' then
					table.insert(curr_syl, c)
					progress = 'initial'
				else
					table.insert(syl, c)
				end
			elseif progress == 'initial' then
				if chartype == 'combining_sign' then
					table.insert(curr_syl, c)
					progress = 'initial_combining'
				elseif chartype == 'sign' or chartype == 'consonant_shift' then
					table.insert(curr_syl, c)
				elseif chartype == 'vowel_sign' then
					table.insert(curr_syl, c)
					progress = 'vowel'
				elseif chartype == 'terminating_vowel' then
					if c .. c .. (c or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype == 'consonant')) then
						table.insert(curr_syl, c)
						progress = 'vowel'
					else
						table.insert(curr_syl, c)
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {}
						progress = 'none'
					end
				elseif chartype == 'consonant' then
					vowel_found = false
					local j, skipped = i, 0
					while not vowel_found do
						if not chartype or chartype == 'punctuation' or chartype == 'indep_vowel' or chartype == 'terminating_sign' or chartype == 'ZWS' then
							skipped = 1
							break
						elseif chartype == 'consonant' or chartype == 'combining_sign' or (chartype == 'sign' and c ~= '័') then
							table.insert(next_types, chartype)
						else
							vowel_found = true
						end
						j = j + 1
					end
					if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
						table.insert(curr_syl, c)
						progress = 'coda'
					else
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {c}
						progress = 'initial'
					end
				else
					table.insert(syl, c)
					progress = 'none'
				end
			elseif progress == 'initial_combining' then
				if chartype == 'consonant' then
					table.insert(curr_syl, c)
					progress = 'initial'
				else
					table.insert(syl, c)
					progress = 'none'
				end
			elseif progress == 'vowel' then
				if chartype == 'vowel_sign' then
					table.insert(curr_syl, c)
				elseif chartype == 'terminating_vowel' then
					if c .. c .. (c or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype == 'consonant')) then
						table.insert(curr_syl, c)
						progress = 'vowel'
					else
						table.insert(curr_syl, c)
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {}
						progress = 'none'
					end
				elseif chartype == 'consonant' then
					vowel_found = false
					local j, skipped = i, 0
					while not vowel_found do
						if not chartype or chartype == 'punctuation' or chartype == 'indep_vowel' or chartype == 'terminating_sign' or chartype == 'ZWS' then
							skipped = 1
							break
						elseif chartype == 'consonant' or chartype == 'combining_sign' or (chartype == 'sign' and c ~= '័') then
							table.insert(next_types, chartype)
						else
							vowel_found = true
						end
						j = j + 1
					end
					if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
						table.insert(curr_syl, c)
						progress = 'coda'
					else
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {c}
						progress = 'initial'
					end
				else
					table.insert(syl, c)
					progress = 'none'
				end
			elseif progress == 'coda' then
				if chartype == 'combining_sign' then
					table.insert(curr_syl, c)
					progress = 'coda_combining'
				elseif chartype == 'sign' or chartype == 'terminating_sign' then
					table.insert(curr_syl, c)
				else
					table.insert(syl, table.concat(curr_syl, ""))
					curr_syl = {}
					if chartype == 'consonant' then
						table.insert(curr_syl, c)
						progress = 'initial'
					else
						table.insert(syl, c)
						progress = 'none'
					end
				end
			elseif progress == 'coda_combining' then
				if chartype == 'consonant' then
					table.insert(curr_syl, c)
					progress = 'coda'
				else
					table.insert(syl, table.concat(curr_syl, ""))
					curr_syl = {}
					progress = 'none'
				end
			end
		end

		for i = 1, #syl do
			if match(syl, '៍') then
				syl = '<small><del>' .. gsub(syl, '.', function(consonant)
					if cons_conv then
						return cons_conv
					end end) .. '</del></small>'
				break
			end
			syl = gsub(syl, '់$', '')
			
			syl = gsub(syl, '^()្?(?)(?)(??)(?)(?៉?)្?(?)(៖?)$', function(initial_a, initial_b, cons_shifter_a, vowel, cons_shifter_b, coda_a, coda_b, optional_sign)
				if cons_shifter_a .. cons_shifter_b .. vowel .. coda_a .. coda_b == '' and initial_b ~= '' and not match(syl, '្') then
					coda_a = initial_b
					initial_b = ''
				end
				base = initial_a
				if initial_b ~= '' and not match(initial_b, '') then
					base = initial_b
				end
				if vowel .. coda_a .. coda_b == 'ាំង' then
					vowel, coda_a, coda_b = 'ាំង', '', ''
				end
				optional_sign = gsub(optional_sign, '៖', 'ː')
				
				cons_shifter = cons_shifter_a .. cons_shifter_b
				if cons_shifter == '' and cons_conv then
					vowel_class = cons_conv
				elseif cons_shifter == '៉' then
					vowel_class = 'a'
				elseif cons_shifter == '៊' then
					vowel_class = 'o'
				else
					return initial_a .. initial_b .. cons_shifter .. vowel .. coda_a .. coda_b .. optional_sign
				end
				
				if digraph and (digraph or (cons_conv and cons_conv)) and vowel_conv then
					return digraph .. vowel_conv .. (digraph or cons_conv .. cons_conv) .. optional_sign
			
				elseif cons_conv and cons_conv and vowel_conv and cons_conv and cons_conv then
					return cons_conv .. cons_conv .. vowel_conv .. cons_conv .. cons_conv .. optional_sign
				end	end)
			
			if syl == 'ៗ' and i > 1 then
				syl = syl
			end
		end
		word = table.concat(syl, "")
		text = gsub(text, original_text, word)
	end
	
	text = gsub(text, '.', indep_vowel)
	text = gsub(text, '(*) ៗ', '%1 %1')
	
	return toNFC(text)
	-- To do: other signs
end	

return export