local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"
local punctuation = "%-:%(%)%*&٫؛؟،ـ«\".\'!»٪؉۔"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ے'
local ye3 = 'ئ'
local vaw = "و"
local nasal = 'ں'
local nunghunna = 'ن٘'
local aspirate = 'ھ'
local consonants = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئی"
local nonhe = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوھئی"
local nonye = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئ"
local nonvaw = "ٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨہھئی"
local vowels = "ایئےۓوؤ"
local indvowels = "آایےوؤ"
local semivowel = "یو"
local hes = "ہح"
local diacritics = "ًَُِّْٰ"
local ZZP = "َُِ"
local mapping = {
= 'ā', = 'b', = 'b̤', = 'p', = 't', = 'ṭ', = 's̱',
= 'j', = 'j̈', = 'c', = 'ḥ', = 'x',
= 'd', = 'ḍ', = 'd̤', = 'ẕ', = 'r', = "ṛ", = 'z', = 'ž',
= 's', = 'ś', = 'ṣ', = 'ẓ',
= 't̤', = 'z̤', = 'ʻ', = 'ġ', = 'f', = 'q',
= 'k', = 'g', = 'g̈', = 'ŋ', = 'ṇ', = 'ḷ',
= 'l', = 'm', = 'n', = 'v', = 'h', = 'y', = ".", = 'ṉ',
= 'H',
= '',
-- diacritics
= "a",
= "i",
= "u",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
= "lā",
= "allāh",
-- kashida
= "-", -- kashida, no sound
-- numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= '“', -- quotation mark
= '”', -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousand
}
function export.tr(text, lang, sc)
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = gsub(text, "()" , "#".."%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
-- hastags now mark the beginning and end of a word
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, '()' .. nunghunna, "%1ṉ")
text = gsub(text, "الله", "allāh")
-- diacritics
text = gsub(text, '()' .. '()' .. aspirate, "%1h%2")
text = gsub(text, pesh .. vaw .. jazm .. "", "ū")
text = gsub(text, zabar .. vaw .. jazm .. "", "au")
text = gsub(text, zer .. ye .. '()', "ī%1")
text = gsub(text, zer .. ye .. "#", "ī")
text = gsub(text, zer .. ye .. tashdid .. alif, "iyyā")
text = gsub(text, zabar .. ye .. '()', "ai")
text = gsub(text, jazm .. alif, "ā")
-- Initial alif
text = gsub(text, alif .. zabar, "a")
text = gsub(text, alif .. zer .. ye .. jazm .. "", "ī")
text = gsub(text, alif .. ye .. alif .. "", "eyā")
text = gsub(text, alif .. zer, "i")
text = gsub(text, alif .. vaw .. jazm .. "", "o")
text = gsub(text, alif .. ye .. '()', "e%1")
text = gsub(text, alif .. ye .. jazm .. "", "e")
text = gsub(text, alif .. pesh .. vaw .. jazm .. "", "u")
text = gsub(text, alif .. pesh, "u")
-- Tashdeed
text = gsub(text, '()' .. tashdid, "%1%1")
text = gsub(text, '()' .. tashdid .. '()', "%1%1%2")
text = gsub(text, '()' .. ye .. '()' .. tashdid, "%1yy%2")
text = gsub(text, '()' .. vaw .. '()' .. tashdid, "%1ww%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '()' .. '()' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '()' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '()' .. 'ً', "%1an")
-- tall zabar -- / no need to mess about
text = gsub(text, '()' .. 'ٰ', "á")
text = gsub(text, '()' .. 'ٰ' .. '()', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "ā‘")
text = gsub(text, ain .. alif .. '()', "ʻā%1")
text = gsub(text, '()' .. '(?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zer .. '()', "ʻi%1")
text = gsub(text, ain .. pesh .. '()', "ʻu%1")
text = gsub(text, ain .. zer .. ye .. '()', "ʻī%1")
text = gsub(text, ain .. pesh .. vaw .. '()', "ʻū%1")
--- alif
text = gsub(text, "" .. zabar .. alif, "%1ā")
text = gsub(text, '()' .. alif, "%1ā")
text = gsub(text, '()' .. tashdid .. alif, "%1%1ā")
text = gsub(text, "#" .. alif .. ye, "ī")
text = gsub(text, "#" .. alif .. vaw, "o")
text = gsub(text, "#" .. alif .. ye2, "e")
-- vaw
text = gsub(text, vaw .. '()', "v%1")
text = gsub(text, '()' .. vaw .. jazm, "%1o")
-- medial/final consonants
--- (e) -- works
text = gsub(text, '()' .. ye .. jazm .. '()', "%1e%2")
text = gsub(text, '()' .. ye .. '()', "%1e%2")
--- he
text = gsub(text, zabar .. he .. '()', "ah%1")
text = gsub(text, '()' .. zabar .. he, "%1ah")
text = gsub(text, '()' .. he .. "#", "%1ā")
text = gsub(text, '()' .. he .. he .. "#", "%1hā")
text = gsub(text, '()' .. jazm .. he .. he .. "#", "%1hā")
text = gsub(text, '()' .. vaw .. he .. "#", "%1oh")
text = gsub(text, '()' .. he .. "#", "%1ā")
--- vaw
text = gsub(text, '()' .. vaw .. jazm, "%1o")
text = gsub(text, '()' .. tashdid .. vaw, "%1%1o")
--- ye
text = gsub(text, '()' .. ye .. '()', "%1ī%2")
text = gsub(text, '()' .. ye .. "#", "%1ī")
text = gsub(text, '()' .. ye2, "%1e")
text = gsub(text, '()' .. zabar .. ye2, "%1ai")
text = gsub(text, '()' .. ye .. '()', "%1e%2")
text = gsub(text, '()' .. ye .. alif, "%1eyā")
text = gsub(text, '()' .. ye .. alif, "%1yā")
text = gsub(text, '()' .. ye .. alif, "%1yā")
text = gsub(text, "#" .. ye .. alif, "yā")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = gsub(text, '.', mapping)
text = gsub(text, 'ا', "ā")
text = gsub(text, 'īā', "iyā")
text = gsub(text, 'ehh', "eh")
text = gsub(text, 'īhh', "īh")
text = gsub(text, 'n٘', "ṉ")
text = gsub(text, 'ṉhh', "ṉh")
text = gsub(text, 'yṉ', "eṉ")
text = gsub(text, 'H', "'")
--
return text
end
return export