This module will transliterate Pashto language text per WT:PS TR.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:ps-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zwar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local zwarakay = U(0x659) -- Pashto /ə/
local jazm = "ْ"
local he = "ه"
local ain = 'ع'
local alif = 'ا'
local ye = 'ي'
local ye2 = "ےی"
local ye3 = 'ې'
local waw = "و"
local nasal = 'ں'
local consonants = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼوه"
local consonantS = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼه"
local consonantS2 = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼوهي"
local vowels = "اآیېيۍئےو"
local vowels2 = "آیېيۍئےو"
local semivowels = "وي"
local hes = "هح"
local diacritics = "َُِّْٰٙ"
local ZZPZ = "َُِٙ"
local mapping = {
= 'â', = 'b', = 'p', = 't', = 'ṭ', = 's̱',
= 'j', = 'ź', = 'č', = 'ś', = 'ḥ', = 'x',
= 'd', = 'ḍ', = 'ẕ', = 'r', = "ṛ", = 'z', = 'ž', = 'ǵ',
= 's', = 'š', = 'x̌', = 'ṣ', = 'ẓ',
= 't̤', = 'z̤', = 'ʻ', = 'ǧ', = 'f', = 'q',
= 'k', = 'g', = 'ṇ',
= 'l', = 'm', = 'n', = 'w', = 'h', = 'y', = 'ṉ',
= "wə", = "əy", = 'əy', = 'e', = 'ə', = 'y', = 'y',
-- diacritics
= "a",
= "ĭ",
= "ŭ",
= "ə",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
= "lā",
= "allāh",
-- kashida
= "-", -- kashida, no sound
-- numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= '“', -- quotation mark
= '”', -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousand
}
function export.tr(text, lang, sc)
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
-- hastags now mark the beginning and end of a word
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, "ن٘", "ṉ")
text = gsub(text, "الله", "allâh")
-- diacritics
text = gsub(text, pesh .. waw .. jazm .. "", "u")
text = gsub(text, jazm .. alif, "â")
-- Initial alif
text = gsub(text, alif .. zwar .. '()', "a%1")
text = gsub(text, alif .. zer .. ye .. jazm .. "", "i")
text = gsub(text, alif .. zer, "ĭ")
text = gsub(text, alif .. waw .. jazm .. "", "o")
text = gsub(text, alif .. pesh .. waw, "u")
text = gsub(text, alif .. pesh .. waw .. jazm .. "", "u")
text = gsub(text, alif .. pesh, "ŭ")
-- Tashdeed
text = gsub(text, '()' .. tashdid, "%1%1")
text = gsub(text, '()' .. tashdid .. '()', "%1%1%2")
text = gsub(text, '()' .. ye .. '()' .. tashdid, "%1yy%2")
text = gsub(text, '()' .. waw .. '()' .. tashdid, "%1ww%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '()' .. '()' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '()' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '()' .. 'ً', "%1an")
-- tall zwar -- / no need to mess about
text = gsub(text, '()' .. 'ٰ', "á")
text = gsub(text, '()' .. 'ٰ' .. '()', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "â‘")
text = gsub(text, ain .. alif .. '()', "ʻâ%1")
text = gsub(text, '()' .. ain .. he, "%1ʻa")
text = gsub(text, '()' .. '(?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zer .. '()', "ʻĭ%1")
text = gsub(text, ain .. pesh .. '()', "ʻŭ%1")
text = gsub(text, ain .. zer .. ye .. '()', "ʻi%1")
text = gsub(text, ain .. pesh .. waw .. '()', "ʻu%1")
--- alif
text = gsub(text, '()' .. zwar .. alif, "%1â")
text = gsub(text, '()' .. alif, "%1â")
text = gsub(text, '()' .. tashdid .. alif, "%1%1â")
text = gsub(text, "#" .. alif .. ye, "i")
text = gsub(text, "#" .. alif .. waw, "o")
text = gsub(text, "#" .. alif .. ye3, "e")
text = gsub(text, '()' .. alif .. ye .. waw, "%1âyo")
-- waw
text = gsub(text, waw .. '()', "w%1")
text = gsub(text, 'ُو', "u")
-- medial/final consonants
--- (e) -- works
text = gsub(text, '()' .. ye .. jazm .. '()', "%1i%2")
--- he
text = gsub(text, '()' .. zwar .. he .. zer .. ye, "%1ahi")
text = gsub(text, '()' .. zwar .. he .. alif, "%1ahâ")
text = gsub(text, zwar .. he .. '()', "ah%1")
text = gsub(text, '()' .. zwar .. he, "%1ah")
text = gsub(text, '()' .. he .. "#", "%1a")
text = gsub(text, jazm .. waw .. he, "wa")
text = gsub(text, jazm .. ye .. he, "ya")
text = gsub(text, '()' .. '()' .. he .. "#", "%1%2a")
text = gsub(text, '()' .. waw .. he .. "#", "%1oh")
text = gsub(text, '()' .. ye .. he .. "#", "%1ih")
--- waw
text = gsub(text, '()' .. waw, "%1o")
text = gsub(text, '()' .. tashdid .. waw, "%1%1o")
text = gsub(text, zer .. ye .. waw, "io")
text = gsub(text, '()' .. ye .. waw .. jazm, "%1iw")
text = gsub(text, '()' .. ye .. waw .. "#", "%1yo")
text = gsub(text, '()' .. ye .. waw .. '()', "%1yo%2")
text = gsub(text, '()' .. '()' .. waw, "%1%2o")
text = gsub(text, ye .. waw, "yo")
text = gsub(text, "#" .. ye .. waw, "yo")
text = gsub(text, '()' .. tashdid .. zer .. ye .. jazm .. alif, "%1%1iyâ")
text = gsub(text, '()' .. zer .. ye .. alif, "%1iâ")
--- ye
--text = gsub(text, '()' .. ye .. waw .. ye, "%1iwi")
--text = gsub(text, '()' .. waw .. ye .. jazm, "%1oy")
text = gsub(text, ye .. zwar .. alif, "yâ")
text = gsub(text, '()' .. ye .. '()', "%1i%2")
text = gsub(text, '()' .. ye .. "#", "%1i")
text = gsub(text, '()' .. '()', "%1a%2")
text = gsub(text, '()' .. '()' .. '()', "%1%2a%3")
text = gsub(text, "#" .. '()' .. '()', "%1a%2")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = gsub(text, '.', mapping)
text = gsub(text, 'ĭy', "i")
text = gsub(text, 'ŭw', "u")
text = gsub(text, 'ĭi', "i")
text = gsub(text, 'ŭu', "u")
text = gsub(text, "اa", "a")
text = gsub(text, 'aa', "â")
--
return text
end
return export