-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak
local export = {}
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local mapping = {
= "a", = "b", = "ç", = "c", = "d", = "e", = "ê", = "f", = "g",
= "h", = "h", = "ḧ", = "j", = "k", = "ll", = "l", = "m", = "n",
= "o", = "p", = "q", = "r", = "r", = "s", = "ş", = "t", = "v",
= "x", = "ẍ", = "z", = "", = "'",
= "", -- ZWNJ (zero-width non-joiner)
= "", -- kashida, no sound
-- numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
-- persian variants to numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
}
-- punctuation (leave on separate lines)
local punctuation = {
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= '“', -- quotation mark
= '”', -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousand
}
-- translit
local function tr_word(word)
word = gsub(word, '.', punctuation)
--Remove punctuation at the end of the word.
if mw.ustring.find(word, '$') then
ponct = mw.ustring.sub(word, -1)
word = gsub(word, '$', '')
else
word = word
ponct = ''
end
word = gsub(word, 'ه', "ە") --correct unicode for letter ە
-- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner)
-- ↓
-- U+06D5 (Arabic letter ae)
-- diacritics
word = gsub(word, 'ْ', "i") -- U+0652, Arabic sukun
word = gsub(word, 'ِ', "i") -- U+0650, Arabic kasra
--managing 'و' and 'ی'
word = gsub(word, 'و()', "w%1") --و + vowel => w (e.g. wan)
word = gsub(word, 'ی()', "y%1") --ی + vowel => y (e.g. yas)
word = gsub(word, '()و', "%1w") --vowel + و => w (e.g. kew)
word = gsub(word, '()ی', "%1y") --vowel + ی => y (e.g. bey)
word = gsub(word, '^و$', "û") --non-letter + 'و' + non-letter => û (=and)
word = gsub(word, '()و', "%1w") --non-letter + 'و' => w (e.g. wtar)
word = gsub(word, '^و', "w") --first 'و' => w (e.g. wtar)
word = gsub(word, 'یو', "îw") --'ی' + 'و' => îw (e.g. nîw)
word = gsub(word, '()یی', "%1îy") --'ی' + 'ی' => îy (e.g. kanîy)
word = gsub(word, 'وی', "uy") --'و' + 'ی' => uy (e.g. buyn)
word = gsub(word, 'وو', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, 'ی', "î")
word = gsub(word, 'و', "u")
word = gsub(word, 'uu', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, '()ڕ', "%1rr") --when 'ڕ' not at the beginning of a word => rr
word = gsub(word, '()ئ', "%1'") --when 'ئ' not at the beginning of a word => '
word = gsub(word, '.', mapping)
--insert i where applicable
word = gsub(word, 'll', "Ľ") -- temporary conversion to avoid seeing ll as 2 letters
word = gsub(word, 'rr', "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters
word = gsub(word, '()()()()', "%1%2i%3%4") --e.g. grft -> grift
word = gsub(word, '()()()()$', "%1%2%3i%4") --e.g. cejnt -> cejnit
word = gsub(word, '()()()', "%1i%2%3") --e.g. wrd -> wird
word = gsub(word, '()()()', "%1i%2%3") --e.g. prd -> pird
word = gsub(word, '()()$', "%1i%2") --like above
word = gsub(word, '()()()()', "%1%2i%3%4") --repeat the latter expression, in case skipped
word = gsub(word, '()()()$', "%1%2i%3") --repeat the latter expression, in case skipped
word = gsub(word, '^()()()', "%1i%2%3") --e.g. ktk -> kitk
word = gsub(word, '^()()$', "%1i%2") --e.g. ktk -> kitk
word = gsub(word, '()()()()', "%1%2i%3%4") --e.g. ktk -> kitk
word = gsub(word, '()()()$', "%1%2i%3") --e.g. ktk -> kitk
word = gsub(word, '()()$', "%1%2i") --e.g. j -> ji
word = gsub(word, '^()$', "%1i") --e.g. j -> ji
--word = gsub(word, '()()()', "%1%2i%3") --e.g. bra -> bira
--word = gsub(word, '^()()', "%1i%2") --e.g. bra -> bira
--word = gsub(word, '()()', "%1i%2") --e.g. aşkra -> aşkira
--word = gsub(word, 'si()', "s%1") -- sp, st cluster
word = gsub(word, 'Ľ', "ll") --revert the temporary conversion
word = gsub(word, 'Ŕ', "rr") --revert the temporary conversion
-- Add the punctuation who had previously deleted.
word = word .. ponct
return word
end
function export.tr(text, lang, sc)
local textTab = {}
-- Create a word table separated by a space (%s).
for _, word in ipairs(mw.text.split(text, '%s+')) do
table.insert(textTab, word)
end
-- Tablo of translit.
for key, word in ipairs(textTab) do
textTab = tr_word(word)
end
return table.concat(textTab, ' ')
end
return export