Bu Lua modülü, şu şablonların çıktısını vermektedir:
{{pi-ad}}
{{pi-belirteç}}
{{pi-ön ad}}
local export = {}
local gsub = mw.ustring.gsub
function export.trwo(text, lang, sc, options)
if (sc == 'Brah') then
text = require('Modül:Brah-alfabeçeviri').tr(text, lang, sc)
elseif (sc == 'Deva') then
text = require('Modül:sa-alfabeçeviri').tr(text, lang, sc)
elseif (sc == 'Sinh') then
text = require('Modül:si-alfabeçeviri').tr(text, lang, sc)
elseif sc == 'Beng' or sc == 'Mymr' or sc == 'Lana' or sc == 'Khmr'
or sc == 'Thai' or sc == 'Laoo'
then
local u = mw.ustring.char
local function dc(x) -- Use this to make marks legible. The name 'dc' means 'drop carrier'.
return gsub(x, "", "") -- These are the letter ka in the 9 supported Indic scripts.
end
local consonants = { -- And parts 1 of NFC-multipart independent vowels!
-- Bengali
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='ph', ='b', ='bh', ='m',
='y', ='r', ='l', ='ḷ', -- xx='v',
='ś', ='ṣ', ='s', ='h',
='v', ='v', = 'v',
-- Myanmar
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='ph', ='b', ='bh', ='m',
='y', ='r', ='l', ='v', ='ḷ',
='ś', ='ṣ', ='s', ='h',
='ññ', ='ss', = 'ʼ',
-- Subscript consonants: 103B..103E, 105E..1060
='y', ='r', ='v', ='h',
='n', ='m', ='l',
-- Mon Pali consonants
='ṅ', ='jh',
-- Shan (Pali) consonants - Excludes SHAN THA, MEDIAL WA, SIGN SHAW
='k', ='kh', ='g', ='gh', -- ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
-- ='t', ='th',
='d', ='dh', ='n',
-- ='p',
='ph', ='b', ='bh', -- ='m',
-- ='y', ='r', ='l', ='v',
='ḷ',
-- ='ś', ='ṣ', ='s',
='h',
-- ='ññ', ='ss',
= 'ʼ',
-- Other first parts of independent vowels.
='i', ='u',
-- Lanna
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='p', ='ph', ='b', ='bh', ='m',
='y', ='r', ='l', ='v', ='ḷ',
='ś', ='ṣ', ='s', ='h',
= 'ss', = 'ʼ', = 'ū',
-- Subscript consonants: 1A55, 1A56, 1A5B to 1A5E
='r', ='l', ='ṭh', ='m',
='p', ='s',
-- Khmer
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='ph', ='b', ='bh', ='m',
='y', ='r', ='l', ='v', ='ḷ',
='ś', ='ṣ', ='s', ='h', = 'ʼ',
-- Thai
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='ph', ='b', ='bh', ='m',
='y', ='r', ='l', ='v', ='ḷ',
='ś', ='ṣ', ='s', ='h', = '', -- = 'ʼ',
-- Lao
='k', ='kh', ='g', ='gh', ='ṅ',
='c', ='ch', ='j', ='jh', ='ñ',
='ṭ', ='ṭh', ='ḍ', ='ḍh', ='ṇ',
='t', ='th', ='d', ='dh', ='n',
='p', ='ph', ='b', ='bh', ='m',
='y', ='y', ='r', ='l', ='v', ='ḷ',
='ś', ='ṣ', ='s', ='h', = '', -- = 'ʼ',
='d',
}
local diacritics = {
-- Bengali - only NFC needed
='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
='ḷ', ='ḹ', ='e', ='ai', ='o', ='au', ='',
-- Myanmar
='ā', ='ā', ='i', ='ī', ='u', ='ū',
='ṛ', ='ṝ', ='ḷ', ='ḹ',
='e', ='ai',
-- The following are multicharacter!
='o', ='au', ='', ='o', ='au',
='', ='',
-- Mon - treatment of Sanskrit au is to be determined!
='ī',
-- Shan
='ā', ='o',
-- Lanna
='ā', ='ā', ='i', ='ī', ='u', ='ū',
='ṛ', ='ḷ', -- Syllabic consonants may be very wrong!
='e', ='ai', ='o',
-- The next two rows are are multicharacter!
='o', ='au', ='au', = 'au',
='o', ='au', ='au', = 'au',
='', ='', ='',
-- Khmer
='ā', ='i', ='ī', ='u', ='ū',
='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
='e', ='ai', ='o', ='au', ='', ='',
-- Thai
='a', ='ā', ='i', ='ī', ='u', ='ū',
='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
='e', ='ai', ='o', ='au', ='', ='', ='a',
='iṃ', -- Induced by jackbooted I/O
-- Lao
='a', ='ā', ='i', ='ī', ='u', ='ū',
-- ='ṛ', ='ṝ', ='ḷ', ='ḹ', -- Multipart
='e', ='ai', ='o', ='au', ='', ='',
='a',
='iṃ', -- Induced by jackbooted I/O
-- Results of subscripts - for 2nd level special subscripts.
='ṭ', ='n', ='p', ='m',
='y', ='r', ='l', ='w', ='s', ='h',
}
local tt = {
-- Bengali independent vowels
='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
='ḷ', ='ḹ', ='e', ='ai', ='o', ='au',
-- chandrabindu, anusvara, visarga & avagraha
='m̐', ='ṃ', ='ḥ', ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Myanmar independent vowels
='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
='ḷ', ='ḹ', ='e', ='ai', ='o', ='au', -- 2 of these are multi-character keys!
-- Mon
= 'ī', = 'ū', = 'e',
-- Shan
= 'a', = 'ā', = 'i', = 'ī', = 'u', = 'ū',
= 'e', = 'o', = 'ai', = 'au',
-- chandrabindu, anusvara, visarga & avagraha
-- ='m̐',
='ṃ', ='ḥ',
-- ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Lanna independent vowels
='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', -- ='ṝ',
='ḷ',
-- ='ḹ',
='e', ='o', = 'o', ='o', = 'au', = 'ai',
-- ='ai', ='au',
-- chandrabindu, anusvara, visarga & avagraha
-- ='m̐',
='ṃ', ='ḥ', ='ṅ',
-- ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Khmer independent vowels
='a', ='ā', ='i', ='ī', ='u', ='ū', ='ṛ', ='ṝ',
='ḷ', ='ḹ', ='e', ='ai', ='o', ='o', ='au',
-- chandrabindu, anusvara, visarga & avagraha
-- ='m̐',
='ṃ', ='ḥ',
-- ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Thai miscellanea
-- independent vowels
='ṛ', ='ṝ', ='ḷ', ='ḹ',
-- chandrabindu, anusvara, visarga & avagraha
-- ='m̐',
='ṃ', ='ḥ',
-- ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- Lao miscellanea
-- chandrabindu, anusvara, visarga & avagraha
-- ='m̐',
='ṃ', ='ḥ',
-- ='’',
--numerals
='0', ='1', ='2', ='3', ='4', ='5', ='6', ='7', ='8', ='9',
-- All scripts
--punctuation
='.', ='.', ='.', ='.', ='.', ='.', --double danda
='.', ='.', ='.', ='.', ='.', ='.', ='.', --danda
--Vedic extensions
='x', ='f',
--Om
-- ='oṃ',
--reconstructed
= '',
}
-- Also handle subscript consonants encoded as marks.
local S = dc('ကျကြကွကှကၞကၟကၠ').. -- Myanmar subscripts
dc('ᨠᩕᨠᩖᨠᩛᨠᩜᨠᩝᨠᩞ') -- Lanna subscripts
-- consonants and part 1 of NFC-multi-part independent vowels.
local C = '[কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলवळশষসহৰৱ'.. -- Bengali
'ကခဂဃငစဆဇဈဉဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝဠၐၑသဟညဿအ'.. -- Myanmar Part 1
'ၚၛၵၶၷꧠၸꧡၹꧢၺꩦꩧꩨꩩꧣၻꩪၼၽၿꧤꩮႁဢဣဥ'.. -- Myanmar Part 2 (Mon and Shan)
'ᨠᨡᨣᨥᨦᨧᨨᨩᨫᨬᨭᨮᨯᨰᨱᨲᨳᨴᨵᨶᨷᨸᨹᨻᨽᨾᨿᩁᩃᩅᩊᩆᩇᩈᩉᩔᩋᩐ'.. -- Lanna
'កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឡឝសឞហអ'.. -- Khmer
'กขคฆงจฉชฌญฏฐฑฒณตถทธนปผพภมยรลวศษสหฬอฤฦ'.. -- Thai.
'ກຂຄຆງຈຉຊຌຎຏຐຑຒຓຕຖທຘນປຜພຠມຍຢຣລວຨຩສຫຬອ'.. -- Lao
S..']?' -- And allow Bengali nukta or necessary ZWJ.
-- One character diacritics
local dia =
dc('[কাকিকীকুকূকৃকৄকৢকৣকেকৈকোকৌক্'.. -- Bengali
'ကာကါကိကီကုကူကၖကၗကၘကၙကေကဲက္က်ကဳကႃ'.. -- Myanmar
'ᨠᩣᨠᩤᨠᩥᨠᩦᨠᩩᨠᩪᩂᩄᨠᩮᨠᩱᨠᩰᨠ᩠ᨠ᩺ᨠ᩼'.. -- Lanna
'ᨠᩫᩢ'.. -- Lanna diacritics in second or third place.
'កាកិកីកុកូកេកៃកោកៅក្ក៑'.. -- Khmer
'กักุิกูีเโไาๅฤฦกฺกึก์'.. -- Thai
'ກັກຸິກູີເກົາໂໄກ຺ກຶກ໌'.. -- Lao
']')
local diax = {}
local ti = table.insert;
ti(diax, '(')
ti(diax, dia) ti(diax, '?')
ti(diax, dia) ti(diax, '?')
ti(diax, dia) ti(diax, '?')
ti(diax, dia) ti(diax, '?)')
diax = table.concat(diax)
local explicit = nil
if options and options.impl then
if options.impl == 'yes' then
explicit = false
elseif options.impl == 'no' then
explicit = true
end
end
if sc == 'Khmr' then
dep_liquid = '('..C..dc(')(ក្)')..'()'; -- Avoid gsub in gsub bug.
text = gsub(
text, dep_liquid,
function(c, j, d) return consonants..d end
)
-- Regularise robat
robat_fix3 = '('..C..dc('ក្')..C..dc('ក្')..C..')('..dc('ក៌)')
robat_fix2 = '('..C..dc('ក្')..C..')('..dc('ក៌)')
robat_fix1 = '('..C..')('..dc('ក៌)')
derobatted = 'រ្'..'%1'
text = gsub(text, robat_fix3, derobatted)
text = gsub(text, robat_fix2, derobatted)
text = gsub(text, robat_fix1, derobatted)
elseif sc == 'Thai' or sc == 'Laoo' then
local match = mw.ustring.match
local v1 = dc('')
local v2 = dc('')
local va = dc('')
if lang == 'sa' then
if match(text, v1) then explicit = true end -- SARA A is visarga!
else
if match(text, v2) then explicit = true end
end
local yLao, nuktaed
if sc == 'Laoo' then
if options and options.y then
if options.y == 'yaa' or options.y == 'ຢ' then
yLao = 'yaa'
elseif options.y == 'yung' or options.y == 'ຍ' then
yLao = 'yung'
end
end
if not yLao then
if match(text, 'ຢ') then
yLao = 'yaa'
else
yLao = 'yung'
end
end
if explicit then
nuktaed = match(text, u(0xeba))
else
local str
if lang == 'sa' then
str = dc('ກ຺')
else
str = dc('ກ຺')
end
nuktaed = match(text, str)
end
if nuktaed then -- Convert to extended alphabet
rs = {
= "ຆ", = "ຉ", = "ຌ", = "ຎ",
= "ຏ", = "ຐ", = "ຑ", = "ຓ",
= "ຘ", = "ຠ", = "ຬ", = dc("ໍກ"),
}
text = gsub(text, ''..u(0xeba)..'?', rs)
explicit = true
end
if yLao == 'yaa' then text = gsub(text, 'ຍ', 'ຎ') end
end
if match(text, va) then explicit = false end
if explicit == nil then
-- It looks as though gsub (from dc()) and match interfere, so need local variables.
local ngf1=dc('$')
local ngf2=dc(' ')
if (match(text, ngf1) or match(text, ngf2)) then explicit = true end
end
if not explicit then
diax = gsub(diax, '', 'า') -- Treat as ฤ and ฦ consonants.
local search = '()('..C..')()'
local longswap = function(p, c, v) return c..v..p end
text = gsub(text, search, longswap)
text = gsub(text, search, longswap)
end
if false and sc == 'Laoo' then -- Keep around for future debugging
local nr = 'F'
if nuktaed then nr = 'T' end
if yLao then
nr = nr .. yLao
else
nr = nr .. 'y?'
end
nr = nr .. '-'
if not options or not options.impl then
if explicit == nil then
text = 'GN-'..nr..text
elseif explicit == false then
text = 'GI-'..nr..text
else
text = 'GE-'..nr..text
end
elseif options.impl == 'both' then
if explicit == nil then
text = 'N-'..nr..text
elseif explicit == false then
text = 'I-'..nr..text
else
text = 'E-'..nr..text
end
elseif options.impl == 'yes' then
text = 'Y-'..nr..text
elseif options.impl == 'no' then
text = 'N-'..nr..text
elseif options.impl then
text = options.impl .. nr .. text
end
end
local pair = '()('..C..')'
text = gsub(text, pair, '%2%1')
if explicit and lang ~= 'sa' then -- SARA A is a vowel.
diax = '([ะະ'..string.sub(diax,3)
end
if explicit and sc == 'Laoo' then -- Clean up clusters
ass = {
= "ຄຄ", = "ຈຈ", = "ຊຊ",
= "ຕຕ", = "ຕຖ", = "ທທ", = "ສສ",
= "ຎຈ", = "ຎຉ", = "ຎຊ",
= "ນຈ", = "ຎຉ", = "ຎຊ", = "ຎຎ",
= "ປປ", = "ປຜ", = "ພພ", = "ປ",
}
text = gsub(text, '?', ass)
end
end
if sc == 'Lana' then
-- Disambiguate lanna combining loop below.
cl_search = dc('ᨠᩛ')
text = gsub(text, cl_search,
{='ᨲ᩠ᨳ', ='ᨻ᩠ᨻ', ='ᨾ᩠ᨻ'})
end
if sc == 'Mymr' or sc == 'Lana' then
local fn = function(c, d) return consonants..d end
local search = '('..C..')()'
text = gsub(text, search, fn);
text = gsub(text, search, fn); -- and again
end
if sc == 'Beng' then
-- Aberrant conversion:
text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9cd, 0x9b9), 'ḷহ') -- raw
text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9cd, 0x9b9), 'ḷহ') -- NFC
-- Proper conversion:
text = gsub(text, u(0x09b2, 0x9cd, 0x9bc, 0x9b9), 'ḷহ') -- not NFC
-- text = gsub(text, u(0x09b2, 0x9bc, 0x9cd, 0x9b9), 'ḷহ') -- NFC
end
text = gsub(text, '('..C..')'..diax,
function(c, d)
local val = tt
if val then return val end
local cn = consonants
if not cn then return 'X('..c..')' end
if d ~= "" then
return cn .. (diacritics or 'NIL('..d..')')
elseif explicit then
return cn
else
return cn .. 'a'
end
end
)
text = gsub(text, '.', tt)
-- Bodge alphabetic Thai and Lao anusvara
if explicit and text then
text = gsub(text, 'ṅ$', 'ṃ')
text = gsub(text, 'ṅ()', 'ṃ%1')
end
else
text = nil -- Not ready for use yet!
end
return text
end
function export.tr(text, lang, sc)
return export.trwo(text, lang, sc, {})
end
return export