local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"
local punctuation = "%-:%(%)%*&٫؛؟،ـ«\".\'!»٪؉۔"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ے'
local ye3 = 'ئ'
local vaw = "و"
local nasal = 'ں'
local nunghunna = 'ن٘'
local aspirate = 'ھ'
local consonants = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئیں"
local nonhe = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوھئیں"
local nonye = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئں"
local nonvaw = "ٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨہھئیں"
local anything = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئیںاےؤ"
local vowels = "ایئےۓوؤ"
local indvowels = "آایےوؤ"
local semivowel = "یو"
local hes = "ہح"
local diacritics = "ًَُِّْٰ"
local ZZP = "َُِ"
local mapping = {
= 'ā', = 'b', = 'b̤', = 'p', = 't', = 'ṭ', = 's̱',
= 'j', = 'j̈', = 'c', = 'ḥ', = 'x',
= 'd', = 'ḍ', = 'd̤', = 'ẕ', = 'r', = "ṛ", = 'z', = 'ž',
= 's', = 'ś', = 'ṣ', = 'ẓ',
= 't̤', = 'z̤', = 'ʻ', = 'ġ', = 'f', = 'q',
= 'k', = 'g', = 'g̈', = 'ŋ', = 'ṇ', = 'ḷ',
= 'l', = 'm', = 'n', = 'v', = 'h', = 'y', = ".", = 'ṉ',
= 'H',
= '',
-- diacritics
= "a",
= "i",
= "u",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
= "lā",
= "allāh",
-- kashida
= "-", -- kashida, no sound
-- numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= '“', -- quotation mark
= '”', -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousand
}
function export.tr(text, lang, sc)
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = gsub(text, "()" , "#".."%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
-- hastags now mark the beginning and end of a word
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, "الله", "allāh")
-- diacritics
text = gsub(text, '()' .. '()' .. aspirate, "%1h%2")
-- Initial alif
text = gsub(text, alif .. '()', "%1")
text = gsub(text, "#" .. alif .. vaw, "o")
text = gsub(text, "#" .. alif .. ye .. '()', "e")
text = gsub(text, "#" .. alif .. ye .. alif, "eyā")
-- Tashdeed
text = gsub(text, '()' .. tashdid, "%1%1")
text = gsub(text, '()' .. tashdid .. '()', "%1%1%2")
text = gsub(text, '()' .. ye .. '()' .. tashdid, "%1yy%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '()' .. '()' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '()' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '()' .. 'ً', "%1an")
-- tall zabar -- / no need to mess about
text = gsub(text, '()' .. 'ٰ', "á")
text = gsub(text, '()' .. 'ٰ' .. '()', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "ā‘")
text = gsub(text, ain .. alif .. '()', "ʻā%1")
text = gsub(text, '()' .. '(?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zer .. '()', "ʻi%1")
text = gsub(text, ain .. pesh .. '()', "ʻu%1")
text = gsub(text, ain .. zer .. ye .. '()', "ʻī%1")
text = gsub(text, ain .. pesh .. vaw .. '()', "ʻū%1")
--- alif
text = gsub(text, '()' .. alif, "%1ā")
-- vaw
-- medial/final consonants
--- (e) -- works
text = gsub(text, '()' .. ye .. jazm .. '()', "%1e%2")
text = gsub(text, '()' .. ye .. '()', "%1e%2")
text = gsub(text, '()' .. ye .. alif, "%1eyā")
text = gsub(text, '()' .. ye .. alif, "%1eyā")
--- he
--- vaw
--- ye
text = gsub(text, zabar .. ye2, "ai")
text = gsub(text, ye2, "e")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = gsub(text, '.', mapping)
text = gsub(text, 'n٘', "ṉ")
text = gsub(text, 'H', "'")
--
return text
end
return export