Modül:pi-alfabeçeviri

Modül belgelemesi
Bu belgeleme Modül:pi-alfabeçeviri/belge (düzenle | geçmiş) sayfasından yansıtılmaktadır. Arayüz düzenleyicilerinin deney yapabilmeleri için ayrıca Modül:pi-alfabeçeviri/deneme tahtası sayfası kullanılabilir.
Bu modül şu Lua modüllerini kullanıyor:
Bu Lua modülü, şu şablonların çıktısını vermektedir:
{{pi-ad}}
{{pi-belirteç}}
{{pi-ön ad}}
local export = {}
local gsub = mw.ustring.gsub

function export.trwo(text, lang, sc, options)
	if (sc == 'Brah') then
		text = require('Modül:Brah-alfabeçeviri').tr(text, lang, sc)
	elseif (sc == 'Deva') then
		text = require('Modül:sa-alfabeçeviri').tr(text, lang, sc)
	elseif (sc == 'Sinh') then
		text = require('Modül:si-alfabeçeviri').tr(text, lang, sc)
	elseif sc == 'Beng' or sc == 'Mymr' or sc == 'Lana' or sc == 'Khmr'
		or sc == 'Thai' or sc == 'Laoo'
	then
		local u = mw.ustring.char
		local function dc(x) -- Use this to make marks legible.  The name 'dc' means 'drop carrier'.
			return gsub(x, "", "") -- These are the letter ka in the 9 supported Indic scripts.
		end
		local consonants = { -- And parts 1 of NFC-multipart independent vowels!
-- Bengali
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='ph', ='b', ='bh', ='m',
			='y', ='r', ='l', ='ḷ', -- xx='v',
			='ś', ='ṣ', ='s', ='h',
			='v', ='v',  = 'v',
-- Myanmar
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='ph', ='b', ='bh', ='m',
			='y', ='r', ='l', ='v', ='ḷ',
			='ś', ='ṣ', ='s', ='h',
		    ='ññ', ='ss',  = 'ʼ',
    -- Subscript consonants: 103B..103E, 105E..1060
			='y', ='r', ='v', ='h',
			='n', ='m', ='l',
    -- Mon Pali consonants
			='ṅ', ='jh',
    -- Shan (Pali) consonants - Excludes SHAN THA, MEDIAL WA, SIGN SHAW
			='k', ='kh', ='g', ='gh', -- ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
--			='t', ='th',
			='d', ='dh', ='n', 
--			='p',
			='ph', ='b', ='bh', -- ='m',
--			='y', ='r', ='l', ='v',
			='ḷ',
--			='ś', ='ṣ', ='s',
			='h',
--		    ='ññ', ='ss',
			 = 'ʼ',
	-- Other first parts of independent vowels.
			='i', ='u',
    -- Lanna
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='p', ='ph', ='b', ='bh', ='m',
			='y', ='r', ='l', ='v', ='ḷ',
			='ś', ='ṣ', ='s', ='h',
			 = 'ss',  = 'ʼ',  = 'ū',
    -- Subscript consonants: 1A55, 1A56, 1A5B to 1A5E
			='r', ='l', ='ṭh', ='m',
			='p', ='s', 
-- Khmer
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='ph', ='b', ='bh', ='m',
			='y', ='r', ='l', ='v', ='ḷ',
			='ś', ='ṣ', ='s', ='h',  = 'ʼ',
-- Thai
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='ph', ='b', ='bh', ='m',
			='y', ='r', ='l', ='v', ='ḷ',
			='ś', ='ṣ', ='s', ='h',  = '', --  = 'ʼ',

-- Lao
			='k', ='kh', ='g', ='gh', ='ṅ',
			='c', ='ch', ='j', ='jh', ='ñ', 
			='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ', 
			='t', ='th', ='d', ='dh', ='n', 
			='p', ='ph', ='b', ='bh', ='m',
			='y', ='y', ='r', ='l', ='v', ='ḷ',
			='ś', ='ṣ', ='s', ='h',  = '', --  = 'ʼ',
			='d',
		}

		local diacritics = {
-- Bengali - only NFC needed
			='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ', 
			='ḷ', ='ḹ', ='e', ='ai', ='o', ='au',  ='',
-- Myanmar
			='ā', ='ā', ='i', ='ī', ='u', ='ū',
			='ṛ', ='ṝ',  ='ḷ', ='ḹ',
			='e', ='ai',
		-- The following are multicharacter!
			='o', ='au',  ='', ='o', ='au',
			='',   ='',
	-- Mon - treatment of Sanskrit au is to be determined!
			='ī',
	-- Shan
			='ā', ='o',
-- Lanna
			='ā', ='ā', ='i', ='ī', ='u', ='ū',
			='ṛ', ='ḷ', -- Syllabic consonants may be very wrong!
			='e', ='ai', ='o',
    -- The next two rows are are multicharacter!
			='o', ='au', ='au',  = 'au',
			='o', ='au', ='au',  = 'au',
			='', ='', ='',
-- Khmer
			='ā', ='i', ='ī', ='u', ='ū',
			='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
			='e', ='ai', ='o', ='au',  ='', ='',
-- Thai
			='a', ='ā', ='i', ='ī', ='u', ='ū',
			='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
			='e', ='ai', ='o', ='au',  ='', ='', ='a',
			='iṃ', -- Induced by jackbooted I/O
-- Lao
			='a', ='ā', ='i', ='ī', ='u', ='ū',
--			='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
			='e', ='ai', ='o', ='au',  ='', ='',
			='a',
			='iṃ', -- Induced by jackbooted I/O
-- Results of subscripts - for 2nd level special subscripts.
			='ṭ', ='n', ='p', ='m', 
			='y', ='r', ='l', ='w', ='s', ='h',
		}

		local tt = {
	-- Bengali independent vowels
			='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
			='ḷ', ='ḹ', ='e', ='ai', ='o', ='au', 
	-- chandrabindu, anusvara, visarga & avagraha
			='m̐', ='ṃ', ='ḥ', ='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
	-- Myanmar independent vowels
			='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
			='ḷ', ='ḹ', ='e', ='ai', ='o', ='au', -- 2 of these are multi-character keys!
		-- Mon
			 = 'ī',  = 'ū',  = 'e',
		-- Shan
			 = 'a',  = 'ā',  = 'i',  = 'ī',  = 'u',  = 'ū',
			 = 'e',  = 'o',  = 'ai',  = 'au',
	-- chandrabindu, anusvara, visarga & avagraha
--			='m̐', 
			='ṃ', ='ḥ',
--  		='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
	-- Lanna independent vowels
			='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', -- ='ṝ',
			='ḷ',
--			='ḹ',
			='e', ='o',  = 'o', ='o',  = 'au',  = 'ai',
--			='ai', ='au', 
	-- chandrabindu, anusvara, visarga & avagraha
--			='m̐',
			='ṃ', ='ḥ', ='ṅ',
--			='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
	-- Khmer independent vowels
			='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
			='ḷ', ='ḹ', ='e', ='ai', ='o', ='o', ='au', 
	-- chandrabindu, anusvara, visarga & avagraha
--			='m̐',
			='ṃ', ='ḥ',
--			='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Thai miscellanea
	-- independent vowels
			='ṛ', ='ṝ', ='ḷ', ='ḹ',
	-- chandrabindu, anusvara, visarga & avagraha
--			='m̐',
			='ṃ', ='ḥ',
--			='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Lao miscellanea
	-- chandrabindu, anusvara, visarga & avagraha
--			='m̐',
			='ṃ', ='ḥ',
--			='’',
	--numerals
			='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- All scripts
	--punctuation        
    		='.', ='.', ='.', ='.', ='.', ='.', --double danda
			='.', ='.', ='.', ='.', ='.', ='.', ='.', --danda
    --Vedic extensions
    		='x', ='f',
    --Om
--  		='oṃ',
    --reconstructed
    		 = '',
		}
    -- Also handle subscript consonants encoded as marks.
		local S =	dc('ကျကြကွကှကၞကၟကၠ').. -- Myanmar subscripts
					dc('ᨠᩕᨠᩖᨠᩛᨠᩜᨠᩝᨠᩞ')     -- Lanna subscripts
    -- consonants and part 1 of NFC-multi-part independent vowels.
		local C =	'[কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলवळশষসহৰৱ'.. -- Bengali
					'ကခဂဃငစဆဇဈဉဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝဠၐၑသဟညဿအ'.. -- Myanmar Part 1
  				'ၚၛၵၶၷꧠၸꧡၹꧢၺꩦꩧꩨꩩꧣၻꩪၼၽၿꧤꩮႁဢဣဥ'.. -- Myanmar Part 2 (Mon and Shan)
				'ᨠᨡᨣᨥᨦᨧᨨᨩᨫᨬᨭᨮᨯᨰᨱᨲᨳᨴᨵᨶᨷᨸᨹᨻᨽᨾᨿᩁᩃᩅᩊᩆᩇᩈᩉᩔᩋᩐ'.. -- Lanna
				'កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឡឝសឞហអ'.. -- Khmer
				'กขคฆงจฉชฌญฏฐฑฒณตถทธนปผพภมยรลวศษสหฬอฤฦ'.. -- Thai.
				'ກຂຄຆງຈຉຊຌຎຏຐຑຒຓຕຖທຘນປຜພຠມຍຢຣລວຨຩສຫຬອ'.. -- Lao
				S..']?' -- And allow Bengali nukta or necessary ZWJ.

-- One character diacritics 
		local dia = 
			dc('[কাকিকীকুকূকৃকৄকৢকৣকেকৈকোকৌক্'.. -- Bengali
				'ကာကါကိကီကုကူကၖကၗကၘကၙကေကဲက္က်ကဳကႃ'.. -- Myanmar
				'ᨠᩣᨠᩤᨠᩥᨠᩦᨠᩩᨠᩪᩂᩄᨠᩮᨠᩱᨠᩰᨠ᩠ᨠ᩺ᨠ᩼'.. -- Lanna
				'ᨠᩫᩢ'..           -- Lanna diacritics in second or third place.
				'កាកិកីកុកូកេកៃកោកៅក្ក៑'.. -- Khmer
				'กักุิกูีเโไาๅฤฦกฺกึก์'.. -- Thai
				'ກັກຸິກູີເກົາໂໄກ຺ກຶກ໌'.. -- Lao
			']')
		local diax = {}
		local ti = table.insert;
		ti(diax, '(')
		ti(diax, dia) ti(diax, '?')
		ti(diax, dia) ti(diax, '?')
		ti(diax, dia) ti(diax, '?')
		ti(diax, dia) ti(diax, '?)')
		diax = table.concat(diax)
		local explicit = nil
		if options and options.impl then
			if options.impl == 'yes' then
				explicit = false
			elseif options.impl == 'no' then
				explicit = true
			end
		end
		if sc == 'Khmr' then
			dep_liquid = '('..C..dc(')(ក្)')..'()'; -- Avoid gsub in gsub bug.
			text = gsub(
				text, dep_liquid,
				function(c, j, d) return consonants..d end
			)
-- Regularise robat
			robat_fix3 = '('..C..dc('ក្')..C..dc('ក្')..C..')('..dc('ក៌)')
			robat_fix2 = '('..C..dc('ក្')..C..')('..dc('ក៌)')
			robat_fix1 = '('..C..')('..dc('ក៌)')
			derobatted = 'រ្'..'%1'
			text = gsub(text, robat_fix3, derobatted)
			text = gsub(text, robat_fix2, derobatted)
			text = gsub(text, robat_fix1, derobatted)
		elseif sc == 'Thai' or sc == 'Laoo' then
			local match = mw.ustring.match
			local v1 = dc('')
			local v2 = dc('')
			local va = dc('')
			if lang == 'sa' then
				if match(text, v1) then explicit = true end -- SARA A is visarga!
			else
				if match(text, v2) then explicit = true end
			end
			local yLao, nuktaed
			if sc == 'Laoo' then
				if options and options.y then
					if options.y == 'yaa' or options.y == 'ຢ' then
						yLao = 'yaa'
					elseif options.y == 'yung' or options.y == 'ຍ' then
						yLao = 'yung'
					end
				end
				if not yLao then
					if match(text, 'ຢ') then
						yLao = 'yaa'
					else
						yLao = 'yung'
					end
				end
				if explicit then
					nuktaed = match(text, u(0xeba))
				else
					local str
					if lang == 'sa' then
						str = dc('ກ຺')
					else
						str = dc('ກ຺')
					end
					nuktaed = match(text, str)
				end
				if nuktaed then -- Convert to extended alphabet
					rs = {
							 = "ຆ",  = "ຉ",  = "ຌ",  = "ຎ",
							 = "ຏ",  = "ຐ",  = "ຑ",  = "ຓ",
							 = "ຘ",  = "ຠ",  = "ຬ",  = dc("ໍກ"),
						}
					text = gsub(text, ''..u(0xeba)..'?', rs)
					explicit = true
				end
				if yLao == 'yaa' then text = gsub(text, 'ຍ', 'ຎ') end
			end
			if match(text, va) then explicit = false end
			if explicit == nil then
-- It looks as though gsub (from dc()) and match interfere, so need local variables.
				local ngf1=dc('$')
				local ngf2=dc(' ')
				if (match(text, ngf1) or match(text, ngf2)) then explicit = true end
			end
			if not explicit then
				diax = gsub(diax, '', 'า') -- Treat as ฤ and ฦ consonants.
				local search = '()('..C..')()'
				local longswap = function(p, c, v) return c..v..p end 
				text = gsub(text, search, longswap)
				text = gsub(text, search, longswap)
			end
			if false and sc == 'Laoo' then -- Keep around for future debugging
				local nr = 'F'
				if nuktaed then nr = 'T' end
				if yLao then
					nr = nr .. yLao
				else
					nr = nr .. 'y?'
				end
				nr = nr .. '-'
				if not options or not options.impl then
					if explicit == nil then
						text = 'GN-'..nr..text
					elseif explicit == false then
						text = 'GI-'..nr..text
					else
						text = 'GE-'..nr..text
					end
				elseif options.impl == 'both' then
					if explicit == nil then
						text = 'N-'..nr..text
					elseif explicit == false then
						text = 'I-'..nr..text
					else
						text = 'E-'..nr..text
					end
				elseif options.impl == 'yes' then
					text = 'Y-'..nr..text
				elseif options.impl == 'no' then
					text = 'N-'..nr..text
				elseif options.impl then
					text = options.impl .. nr .. text
				end
			end
			local pair = '()('..C..')'
			text = gsub(text, pair, '%2%1')
			if explicit and lang ~= 'sa' then -- SARA A is a vowel.
				diax = '([ะະ'..string.sub(diax,3)
			end
			if explicit and sc == 'Laoo' then -- Clean up clusters
				ass = {
						 = "ຄຄ",  = "ຈຈ",  = "ຊຊ", 
						 = "ຕຕ",  = "ຕຖ",  = "ທທ",  = "ສສ",
						 = "ຎຈ",  = "ຎຉ",  = "ຎຊ",
						 = "ນຈ",  = "ຎຉ",  = "ຎຊ",  = "ຎຎ",
						 = "ປປ",  = "ປຜ",  = "ພພ",  = "ປ",
					}
				text = gsub(text, '?', ass)
			end
		end
		if sc == 'Lana' then
-- Disambiguate lanna combining loop below.
			cl_search = dc('ᨠᩛ')
			text = gsub(text, cl_search,
				{='ᨲ᩠ᨳ', ='ᨻ᩠ᨻ', ='ᨾ᩠ᨻ'})  
		end
		if sc == 'Mymr' or sc == 'Lana' then
			local fn = function(c, d) return consonants..d end
			local search = '('..C..')()'
			text = gsub(text, search, fn);
			text = gsub(text, search, fn); -- and again
		end
		if sc == 'Beng' then
-- Aberrant conversion:
			text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9cd, 0x9b9), 'ḷহ') -- raw
			text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9cd, 0x9b9), 'ḷহ') -- NFC
-- Proper conversion:
			text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9b9), 'ḷহ') -- not NFC
--			text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9b9), 'ḷহ') -- NFC
		end
		text = gsub(text, '('..C..')'..diax,
			function(c, d)
				local val = tt
				if val then return val end
				local cn = consonants
				if not cn then return 'X('..c..')' end
				if d ~= "" then        
					return cn .. (diacritics or 'NIL('..d..')')
				elseif explicit then
					return cn
				else
					return cn .. 'a'
				end
			end
		)
		text = gsub(text, '.', tt)
-- Bodge alphabetic Thai and Lao anusvara
		if explicit and text then
			text = gsub(text, 'ṅ$', 'ṃ')
			text = gsub(text, 'ṅ()', 'ṃ%1')
		end
	else	
		text = nil -- Not ready for use yet!
	end
	return text
end

function export.tr(text, lang, sc)
	return export.trwo(text, lang, sc, {})
end

return export
Modül:pi-alfabeçeviri

Enciclo

Wikious

Sapientia

Scientia

Boobota

Anandapedia

Sagapedia

Wikithot