local export = {}
local m_string_utils = require("Module:string utilities")
local gcodepoint = m_string_utils.gcodepoint
local rfind = m_string_utils.find
local rsubn = m_string_utils.gsub
local rmatch = m_string_utils.match
local rsplit = m_string_utils.split
local U = m_string_utils.char
local fatHataan = U(0x64B) -- an
local Dammataan = U(0x64C) -- un
local kasrataan = U(0x64D) -- in
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652)
local he = U(0x647)
local zwnj = U(0x200C)
local highhmz = U(0x654)
local lrm = U(0x200e) -- left-to-right mark
local rlm = U(0x200f) -- right-to-left mark
local alif = "ا"
local alif_madd = "آ"
local hamza = "ء"
local yaa = "ی" -- farsi ye
local hamza_yaa = "ئ"
local hamza_waaw = "ؤ"
local waaw = "و"
local dagger_alif = U(0x670)
local marbuta = U(0x629)
local returning_yaa = "ے" -- baRi ye
local mapping = {
= "ā",
= "b",
= "p",
= "t",
= "s",
= "j",
= "č",
= "h",
= "x",
= "d",
= "z",
= "r",
= "z",
= "ž",
= "s",
= "š",
= "s",
= "z",
= "t",
= "z",
= "ğ",
= "f",
= "q",
= "k",
= "g",
= "l",
= "m",
= "n",
= "ō",
= "ē",
= ".",
= "h",
= "'",
= "'",
= "'",
= "'",
= "'",
-- diacritics
= "a",
= "i",
= "u",
= "an",
= "in",
= "un",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
= "-yi",
-- ligatures
= "lā",
= "allāh",
-- kashida
= "‐", -- kashida, no sound
-- alif_wasla
= "", -- nothing
-- numerals
= "1",
= "2",
= "3",
= "4",
= "5",
= "6",
= "7",
= "8",
= "9",
= "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= "“", -- quotation mark
= "”", -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousan
-- regional characters (FOR VERY SPECIFIC USECASES)
= "ṭ",
= "ṭ",
= "ḍ",
= "ḍ",
-- balti
-- cant do anything about ژ because it conflicts with persian
= "ž",
= "č̣",
= "ṛ",
= "dz",
= "ṣ",
= "ng",
= "ny",
= "h",
= "e",
}
local sun_letters = "تثدذرزسشصضطظلن"
local punctuation = ":%(%)%*&٫؛؟،ـ«\".'!»٪؉۔`,/–—%{%}"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local balticonsonants = "ڃڇڑڗݜݨݩǩ" -- for any other languages using this module
local consonants_needing_vowels = "بپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنؤهئء" .. balticonsonants
local rconsonants = consonants_needing_vowels .. malif .. "وی"
local lconsonants = consonants_needing_vowels -- yaa and waaw can be vowels w/o diacritics
local space_like = "%s'" .. '"'
local space_like_class = ""