This module will transliterate Classical Persian text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:fa-cls-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.This template transliterates Persian based how it would be read in Classical Persian. So keep in mind that this template should be used with classical-style vocalization, a guide explaining Classical vocalization can be seen here.
This template may also be used for most eastern dialects of Persian, which do not have any standard vocalizations, but commonly use the classical vocalization. However it will show vowel length for all dialects, even for dialects that no longer possess vowel length such as Hazaragi.
This template should not be used for Modern Iranian Persian, for which Module:fa-ira-translit should be used instead. However, classical Iranian texts should use this module (all Iranian texts before the 13 or 16th century).
Test:
گُرُسْنَه چِه کَارِی کُنَد چِهِل نَر، کِه دَهْ لَک بَرْآیَد بَر او بیخَبَر، کِه پَیْمَانشِکَسْت بیدَرَنْگ آمَدَنْد، مِیَانِ تیغ و تِیر و تُفَنْگ آمَدَنْد
Result:
gurusna či kārī kunad čihil nar, ki dah lak bar'āyad bar ō bē-xabar, ki paymān-šikast bē-darang āmadand, miyān-i tēğ u tīr u tufang āmadand
Expected:
gurusna či kārī kunad čihil nar, ki dah lak bar-āyad bar ō bē-xabar, ki paymān-šikast bē-darang āmadand, miyān-i tēğ u tīr u tufang āmadand
2 of 74 tests failed. (refresh)
Text | Expected | Actual | Differs at | |
---|---|---|---|---|
سَرْاَنْجَام | sar-anjām | sar-anjām | ||
کُروز | kurōz | kurōz | ||
دَهْ | dah | dah | ||
دَه | da | da | ||
سُؤَال | su'āl | su'āl | ||
کُرُوز | kurūz | kurūz | ||
وَاوْ | wāw | wāw | ||
نَوْروز | nawrōz | nawrōz | ||
قَهْوَهاِی | qahwa-ī | qahwa-ī | ||
قَهْوَهیِی | qahwa-yī | qahwa-yī | ||
خْوَانْدَن | xwāndan | xwāndan | ||
خْویش | xwēš | xwēš | ||
خْوَد | xwad | xwad | ||
چَامَهسَرَایِی | čāma-sarāyī | čāma-sarāyī | ||
طَنِین | tanīn | tanīn | ||
لِهٰذَا | lihāzā | lihāzā | ||
قَهْرًا | qahran | qahran | ||
عَصاً | asan | asan | ||
خَانَه | xāna | xāna | ||
کورِیَایِ شُمَالِی | kōriyā-yi šumālī | kōriyā-yi šumālī | ||
ضَمَّه | zamma | zamma | ||
ضَمِّهْ | zammih | zammih | ||
کِه | ki | ki | ||
کِهْ | kih | kih | ||
اَرْمَنِسْتَان | armanistān | armanistān | ||
بَاکُو | bākū | bākū | ||
کَسی | kasē | kasē | ||
بَرَادَرِ بُزُرْگ | barādar-i buzurg | barādar-i buzurg | ||
قُرُونِ وُسْطیٰ | qurūn-i wustā | qurūn-i wustā | ||
دَر-آمَد | dar-āmad | dar-āmad | ||
بَازِیِ شَطْرَنْج | bāzī-yi šatranj | bāzī-yi šatranj | ||
ایرَانِیَان | ērāniyān | ērāniyān | ||
سُؤَال | su'āl | su'āl | ||
صُبَاح | subāh | subāh | ||
صُبْح | subh | subh | ||
صُبْه | subh | subh | ||
دُروغ گویْ | durōğ gōy | durōğ gōy | ||
او | ō | ō | ||
وَ | wa | wa | ||
و | u | u | ||
بَه نَامِ خُدَا | ba nām-i xudā | ba nām-i xudā | ||
جَوَانِی | jawānī | jawānī | ||
شَاهْنَامَه | šāhnāma | šāhnāma | ||
زِنْدَگِی | zindagī | zindagī | ||
زِنْدَهگِی | zinda-gī | zinda-gī | ||
میوَهٔ جَاپَانِی | mēwa-yi jāpānī | mēwa-yi jāpānī | ||
نُوید | nuwēd | nuwēd | ||
دُخْتَرَْبَچَّه | duxtar-bačča | duxtar-bačča | ||
کِیَه | kiya | kiya | ||
کُرُوَاسِیَا | kuruwāsiyā | kuruwāsiyā | ||
مِیَایِین | miyāyīn | miyāyīn | ||
مْیَایین | myāyēn | myāyēn | ||
طِلَّا | tillā | tillā | ||
لیکِن | lēkin | lēkin | ||
بَچَّهٔ لَطِیفَه کَلَان اَسْت | bačča-yi latīfa kalān ast | bačča-yi latīfa kalān ast | ||
مَعْرُوف و مَجْهُول | ma'rūf u majhūl | ma'rūf u majhūl | ||
مَعْرُوف وَ مَجْهُول | ma'rūf wa majhūl | ma'rūf wa majhūl | ||
اَرمنستان | (nil) | (nil) | N/A | |
باکو | (nil) | (nil) | N/A | |
تصویر | (nil) | tswēr | N/A | |
کسی | (nil) | (nil) | N/A | |
برادر بزرگ | (nil) | (nil) | N/A | |
قرون وسطی | (nil) | (nil) | N/A | |
وَٱللّٰه | wal-lāh | wal-lāh | ||
کَسے | kasē | kasē | ||
کَٹَه | kaṭa | kaṭa | ||
آیَةُاللّٰه | āyatu-l-lāh | āyatu-l-lāh | ||
فِالْحَال | fi-l-hāl | fi-l-hāl | ||
بویِ تُو | bō-yi tū | bō-yi tū | ||
بِسْمِ اللّٰهِ الْرَّحْمٰنِ الْرَّحِیم | bismi l-lāhi r-rahmāni r-rahīm | bismi l-lāhi r-rahmāni r-rahīm | ||
اِیَالَاتِ مُتَّحِدَه | iyālāt-i muttahida | iyālāt-i muttahida | ||
دَارُ الخَلَافَه | dāru l-xalāfa | dāru l-xalāfa | ||
اَبُو الهَوْد | abū l-hawd | abū l-hawd | ||
یی | yē | ēy | 1 |
-- Authors: Sameerhameedy
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}
local fatHataan = U(0x64B) -- اً, tanvin-e nasb (تنوین نصب)
local Dammataan = U(0x64C) -- un
local kasrataan = U(0x64D) -- in
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local lrm = U(0x200e) -- left-to-right mark
local rlm = U(0x200f) -- right-to-left mark
local balticons = "ڃڇڑڗݜݨݩǩ"
local consonants = "بپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنؤهئء" .. balticons
local consonants2 = "ءبپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنوؤهیئywة" .. balticons -- including semivowels
local vowels = "āēīōū"
local semivowel = "یو"
local hes = "هح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"
local alif_wasla = "ٱ"
local space_like = "%s'" .. '"'
local space_like_class = ""
--- The characters ټ ٹ ډ ڈ ے are included only for Mughal Persian and Hazaragi.
local mapping = {
= "ā",
= "b",
= "p",
= "t",
= "s",
= "j",
= "č",
= "h",
= "x",
= "d",
= "z",
= "r",
= "z",
= "ž",
= "s",
= "š",
= "s",
= "z",
= "t",
= "z",
= "ğ",
= "f",
= "q",
= "k",
= "g",
= "l",
= "m",
= "n",
= "ō",
= "ē",
= ".",
= "h",
= "'",
= "'",
= "'",
= "'",
= "'",
-- diacritics
= "a",
= "i",
= "u",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
= "-yi",
-- ligatures
= "lā",
= "allāh",
-- kashida
= "-", -- kashida, no sound
-- alif_wasla
= "", -- nothing
-- numerals
= "1",
= "2",
= "3",
= "4",
= "5",
= "6",
= "7",
= "8",
= "9",
= "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= "“", -- quotation mark
= "”", -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousan
-- regional characters (FOR VERY SPECIFIC USECASES)
= "ṭ",
= "ṭ",
= "ḍ",
= "ḍ",
-- balti
-- cant do anything about ژ because it conflicts with persian
= "ž",
= "č̣",
= "ṛ",
= "dz",
= "ṣ",
= "ng",
= "ny",
= "h",
= "e",
}
local punctuation = ":%(%)%*&٫؛؟،ـ«\".'!»٪؉۔`,/–—%{%}"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local ain = "ع"
local alif = "ا"
local malif = "آ"
local hamza = "ء"
local ye = "ی"
local ye2 = "ئ"
local vao = "و"
local dagger_alif = U(0x670)
local marbuta = U(0x629)
local te = "ت"
local ye3 = "ے"
local laam = "ل"
local vowel = ""
local sun_letters = "تثدذرزسشصضطظلن"
local before_diacritic_checking_subs = {
------------ transformations prior to checking for diacritics --------------
{ U(0x06E5), "و" },
{ U(0x06E6), "ی" },
{ "ہ", he }, -- get rid of balti he (allows balti to transliterate)
{ "ک" .. highhmz, "ǩ" },
{ "()" .. tashdid, tashdid .. "%1" },
{ alif .. fatHataan, zabar .. "ن" },
{ fatHataan .. alif, zabar .. "ن" },
{ jazm .. ye .. dagger_alif, jazm .. ye .. zabar .. alif },
{ zabar .. "" .. dagger_alif, zabar .. alif },
{ ye .. dagger_alif, zabar .. alif }, -- the first letter is U+06CC
{ ye3, ye },
{ "", ye2 },
-- kashiida
{ "^" .. "ـ" .. zabar .. alif , "ـ" .. malif },
{ "^" .. "ـ" .. "()" , "ـ" .. alif .. "%1" },
{ zabar .. dagger_alif, zabar .. alif },
{ dagger_alif, zabar .. alif },
{ fatHataan, zabar .. "ن" }, -- fatḥatan
{ Dammataan, pesh .. "ن" }, -- ḍammatan
{ kasrataan, zer .. "ن" }, -- kasratan
-- allah ligatures and arabic al
{ alif_wasla .. laam , "l-" },
{ alif_wasla, "" },
{ "(" .. tashdid .. "?" .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1-l-%2" },
{ "(" .. tashdid .. "?" .. "" .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1-l-%2" },
{ "(" .. tashdid .. "?" .. "" .. space_like_class .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1l-%2" },
{ "(" .. tashdid .. "?" .. "" .. "" .. space_like_class .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1l-%2" },
{ marbuta .. "()" .. alif .. laam , te .. "%1-" .. laam .. "%-" },
{ "l%-" .. "()" .. tashdid, "%1" .. jazm .. "-%1" },
{ "l%-" .. laam .. tashdid, laam .. laam },
{ "l%-" .. laam, laam .. laam },
{ "l%-", laam .. "-" },
{ marbuta .. "()" .. alif, te .. "%1-" },
{ marbuta .. "()", te .. "%1" },
{ marbuta, he },
{
"(["
.. consonants2
.. "]["
.. ZZP
.. "])("
.. space_like_class
.. ")"
.. alif
.. laam
.. "(["
.. jazm
.. laam
.. "])",
"%1%2" .. laam .. "%3",
},
{ laam .. laam .. tashdid, laam .. tashdid },
-- use jazm/sukoon to prevent this conversion
{ "(خ)" .. vao .. zabar .. alif, "%1" .. zabar .. alif },
{ "(خ)" .. vao .. zabar, "%1" .. pesh },
{ "(خ)" .. vao .. ye .. "()", "%1" .. ye .. "%2" },
-- izāfa
{ zwnj, "-" },
{ jazm .. alif, jazm .. "-" .. alif }, -- vowel killing, invisible ZWNJ
{ zabar .. jazm, "-" }, -- vowel killing, invisible ZWNJ
}
local has_diacritics_subs = {
-- this ensure allah ligatures and al- work
{ "l%-", "" },
{ "" .. jazm .. "%-" , "" },
{ "" .. "()" .. space_like_class .. alif .. laam , "" },
-- remove punctuation and tashdid
{ "", "" },
{ "$", "" },
{ "(" .. space_like_class .. ")", "%1" },
{ "%-", "-" },
-- these are required for arabic al- to work
{ "" .. "()" .. alif .. laam, laam },
{ "()%-" .. alif .. laam, laam },
-- remove CV pairs
-- consonants paired to alif
{ "" .. jazm, "" },
{ "" .. jazm .. malif, "" },
{ "" .. zabar .. alif, "" },
-- consonants paired to a semivowel
{
"()()",
"%1%2",
},
{ "", "" },
{ "", "" },
{ "", "" },
{ malif, "" }, -- counts as a CV pair
{ jazm .. alif .. "", "" },
{ "", "" },
{ "", "" },
-- remove numbers, hamzatu l-waṣl, alif madda and ZWNJ
{ "", "" },
{ "%s", "" },
{ "%-", "" },
{ "", "" },
{ "(" .. vowel .. ")", "" },
}
local function has_diacritics(text)
local count
text, count = gsub(text, "", "")
if count > 0 then
require("Module:debug").track("fa-translit/lrm or rlm")
end
for _, sub in ipairs(has_diacritics_subs) do
text = gsub(text, unpack(sub))
end
return #text == 0
end
function export.tr(text, lang, sc)
if type(text) == "table" then
local function f(x)
return (x ~= "") and x or nil
end
text, lang, sc, omit_i3raab, force_translit =
f(text.args), f(text.args), f(text.args), f(text.args), f(text.args)
end
for _, sub in ipairs(before_diacritic_checking_subs) do
text = gsub(text, sub, sub)
end
if not force_translit and not has_diacritics(text) then
require("Module:debug").track("fa-translit/lacking diacritics")
return nil
end
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, "^", "#")
text = gsub(text, "$", "#")
text = gsub(text, " | ", "# | #")
text = gsub(text, "%s", "# #")
text = gsub(text, "\n", "#" .. "\n" .. "#")
text = gsub(text, "()", "#" .. "%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
text = gsub(text, "%-", "#-#")
-- hastags now mark the beginning and end of a word
--character reformatting and exceptions
text = gsub(text, highhmz, "#" .. highhmz .. "#")
--this ensures "and" is transliterated as a short vowel
text = gsub(text, "#" .. vao .. "#", "#u#")
text = gsub(text, "#" .. vao .. jazm .. malif, "#w-" .. malif )
-- prevent izafa from converting until later
-- Tashdeed
text = gsub(text, "()" .. tashdid, "%1%1")
text = gsub(text, "()" .. tashdid .. "()", "%1%1%2")
text = gsub(text, "()" .. "()" .. tashdid, "%1%1%2")
text = gsub(text, ye .. "()" .. tashdid, "yy%1")
text = gsub(text, vao .. "()" .. tashdid, "ww%1")
text = gsub(text, ye .. tashdid .. "()", "yy%1")
text = gsub(text, vao .. tashdid .. "()", "ww%1")
-- distinguish initial alif from vowel alif
text = gsub(text, "()" .. zabar .. alif, "%1ā")
text = gsub(text, "()" .. alif, "%1ā")
text = gsub(text, jazm .. malif, "'ā") -- invisible ZWNJ
text = gsub(text, "()" .. malif, "%1'ā")
text = gsub(text, alif .. ye, "ē")
text = gsub(text, alif .. vao, "ō")
text = gsub(text, alif .. zer .. ye, "ī")
text = gsub(text, alif .. pesh .. vao, "ū")
text = gsub(text, tashdid .. alif, tashdid .. "ā")
-- convert semi vowels
text = gsub(text, ye .. "ā", "yā")
text = gsub(text, vao .. "ā", "wā")
text = gsub(text, vao .. "()", "w%1")
text = gsub(text, ye .. "()", "y%1")
text = gsub(text, ye .. "()()", "ē%1%2")
text = gsub(text, vao .. "()()", "ō%1%2")
text = gsub(text, "()" .. ye .. "()", "%1y%2")
text = gsub(text, "()" .. vao .. "()", "%1w%2")
text = gsub(text, "()" .. ye .. "()", "%1y%2")
text = gsub(text, "()" .. vao .. "()", "%1w%2")
-- conversions for vaav/waaw/vao
text = gsub(text, pesh .. vao, "ū")
text = gsub(text, vao .. "()", "w%1")
text = gsub(text, "(" .. vowel .. ")" .. vao, "%1w")
-- conversions for ye
text = gsub(text, zer .. ye, "ī")
text = gsub(text, ye .. "()", "y%1")
text = gsub(text, "(" .. vowel .. ")" .. ye, "%1y")
--Alif with short vowel
text = gsub(text, alif .. "()", "%1")
-- final changes
-- izafa
text = gsub(text, "ē" .. zer .. "#", "ē-yi#")
text = gsub(text, zer .. "y" .. zer .. "#", "ī-yi#")
text = gsub(text, "()" .. "y" .. zer .. "#", "%1-yi#")
text = gsub(text, "()" .. zer .. "#", "%1-i#")
text = gsub(text, '("\'")' .. "##" .. zer .. "#", "%1-i#")
-- do not count zer as izafa before silent alif
text = gsub(text, "%-i" .. "##" .. "(" .. space_like_class .. ")" .. "##" .. "(" .. jazm .. "#%-#" .. ")", "i%1%2")
text = gsub(text, "%-i" .. "#%-#" .. "(" .. "#%-#" .. ")", "i-%1")
-- he deletion
text = gsub(text, "()" .. he .. "#" .. zwnj, "%1-")
text = gsub(text, "()" .. he .. "#", "%1#")
text = gsub(text, "#" .. ain , "#")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = string.gsub(text, lrm, "")
text = string.gsub(text, rlm, "")
-- convert all characters
text = mw.ustring.gsub(text, ".", mapping)
-- alif
-- Final corrections
text = mw.ustring.gsub(text, "āa", "ā")
text = mw.ustring.gsub(text, "aaa", "ā")
text = mw.ustring.gsub(text, "āā", "ā")
text = mw.ustring.gsub(text, "aa", "ā")
text = mw.ustring.gsub(text, "ī" .. "()", "iy%1")
text = mw.ustring.gsub(text, "ū" .. "()", "uw%1")
text = mw.ustring.toNFC(text)
return text
end
return export