15 of 75 tests failed. (refresh)
Text | Expected | Actual | Differs at | |
---|---|---|---|---|
![]() | سَرْاَنْجَام | sar-anjām | sar-anjām | |
![]() | کُروز | kurōz | kurwz | 4 |
![]() | دَهْ | dah | dah | |
![]() | دَه | da | da | |
![]() | سُؤَال | su'āl | su'āl | |
![]() | کُرُوز | kurūz | kurūz | |
![]() | وَاوْ | wāw | wāw | |
![]() | نَوْروز | nawrōz | nawrwz | 5 |
![]() | قَهْوَهاِی | qahwa-ī | qahwa-ī | |
![]() | قَهْوَهیِی | qahwa-yī | qahwa-yī | |
![]() | خْوَانْدَن | xwāndan | xwāndan | |
![]() | خْویش | xwēš | xwyš | 3 |
![]() | خْوَد | xwad | xwad | |
![]() | چَامَهسَرَایِی | čāma-sarāyī | čāma-sarāyī | |
![]() | طَنِین | tanīn | tanīn | |
![]() | لِهٰذَا | lihāzā | lihāzā | |
![]() | قَهْرًا | qahran | qahran | |
![]() | عَصاً | asan | asan | |
![]() | خَانَه | xāna | xāna | |
![]() | کورِیَایِ شُمَالِی | kōriyā-yi šumālī | kwriyā-yi šumālī | 2 |
![]() | ضَمَّه | zamma | zamma | |
![]() | ضَمِّهْ | zammih | zammih | |
![]() | کِه | ki | ki | |
![]() | کِهْ | kih | kih | |
![]() | اَرْمَنِسْتَان | armanistān | armanistān | |
![]() | بَاکُو | bākū | bākū | |
![]() | کَسی | kasē | kasy | 4 |
![]() | بَرَادَرِ بُزُرْگ | barādar-i buzurg | barādar-i buzurg | |
![]() | قُرُونِ وُسْطیٰ | qurūn-i wustā | qurūn-i wustā | |
![]() | دَر-آمَد | dar-āmad | dar-āmad | |
![]() | بَازِیِ شَطْرَنْج | bāzī-yi šatranj | bāzī-yi šatranj | |
![]() | ایرَانِیَان | ērāniyān | ērāniyān | |
![]() | سُؤَال | su'āl | su'āl | |
![]() | صُبَاح | subāh | subāh | |
![]() | صُبْح | subh | subh | |
![]() | صُبْه | subh | subh | |
![]() | دُروغ گویْ | durōğ gōy | durwğ gwy | 4 |
![]() | او | ō | ō | |
![]() | وَ | wa | wa | |
![]() | و | u | u | |
![]() | بَه نَامِ خُدَا | ba nām-i xudā | ba nām-i xudā | |
![]() | جَوَانِی | jawānī | jawānī | |
![]() | شَاهْنَامَه | šāhnāma | šāhnāma | |
![]() | زِنْدَگِی | zindagī | zindagī | |
![]() | زِنْدَهگِی | zinda-gī | zinda-gī | |
![]() | میوَهٔ جَاپَانِی | mēwa-yi jāpānī | mywa-yi jāpānī | 2 |
![]() | نُوید | nuwēd | nuwyd | 4 |
![]() | دُخْتَرَْبَچَّه | duxtar-bačča | duxtar-bačča | |
![]() | کِیَه | kiya | kiya | |
![]() | کُرُوَاسِیَا | kuruwāsiyā | kuruwāsiyā | |
![]() | مِیَایِین | miyāyīn | miyāyīn | |
![]() | مْیَایین | myāyēn | myāyyn | 5 |
![]() | طِلَّا | tillā | tillā | |
![]() | لیکِن | lēkin | lykin | 2 |
![]() | بَچَّهٔ لَطِیفَه کَلَان اَسْت | bačča-yi latīfa kalān ast | bačča-yi latīfa kalān ast | |
![]() | مَعْرُوف و مَجْهُول | ma'rūf u majhūl | ma'rūf u majhūl | |
![]() | مَعْرُوف وَ مَجْهُول | ma'rūf wa majhūl | ma'rūf wa majhūl | |
![]() | اَرمنستان | (nil) | (nil) | N/A |
![]() | باکو | (nil) | (nil) | N/A |
![]() | تصویر | (nil) | tswyr | N/A |
![]() | کسی | (nil) | (nil) | N/A |
![]() | برادر بزرگ | (nil) | (nil) | N/A |
![]() | قرون وسطی | (nil) | (nil) | N/A |
![]() | وَٱللّٰه | wal-lāh | wal-lāh | |
![]() | کَسے | kasē | kasy | 4 |
![]() | کَٹَه | kaṭa | kaṭa | |
![]() | آیَةُاللّٰه | āyatu-l-lāh | āyatu-l-lāh | |
![]() | فِالْحَال | fi-l-hāl | fi-l-hāl | |
![]() | بویِ تُو | bō-yi tū | bw-yi tū | 2 |
![]() | بِسْمِ اللّٰهِ الْرَّحْمٰنِ الْرَّحِیم | bismi l-lāhi r-rahmāni r-rahīm | bismi l-lāhi r-rahmāni r-rahīm | |
![]() | اِیَالَاتِ مُتَّحِدَه | iyālāt-i muttahida | iyālāt-i muttahida | |
![]() | دَارُ الخَلَافَه | dāru l-xalāfa | dāru l-xalāfa | |
![]() | اَبُو الهَوْد | abū l-hawd | abū l-hawd | |
![]() | یی | yē | yy | 2 |
![]() | ویژَه | vēža | wyža | 1 |
-- Authors: Sameerhameedy
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}
local fatHataan = U(0x64B) -- اً, tanvin-e nasb (تنوین نصب)
local Dammataan = U(0x64C) -- un
local kasrataan = U(0x64D) -- in
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local lrm = U(0x200e) -- left-to-right mark
local rlm = U(0x200f) -- right-to-left mark
local balticons = "ڃڇڑڗݜݨݩǩ"
local consonants = "بپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنؤهئء" .. balticons
local consonants2 = "ءبپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنوؤهیئywة" .. balticons -- including semivowels
local vowels = "āēīōū"
local semivowel = "یو"
local hes = "هح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"
local alif_wasla = "ٱ"
local space_like = "%s'" .. '"'
local space_like_class = ""
--- The characters ټ ٹ ډ ڈ ے are included only for Mughal Persian and Hazaragi.
local mapping = {
= "ā",
= "b",
= "p",
= "t",
= "s",
= "j",
= "č",
= "h",
= "x",
= "d",
= "z",
= "r",
= "z",
= "ž",
= "s",
= "š",
= "s",
= "z",
= "t",
= "z",
= "ğ",
= "f",
= "q",
= "k",
= "g",
= "l",
= "m",
= "n",
= "w",
= "y",
= ".",
= "h",
= "'",
= "'",
= "'",
= "'",
= "'",
-- diacritics
= "a",
= "i",
= "u",
= "", -- also sukun - no vowel
= "-", -- ZWNJ (zero-width non-joiner)
= "-yi",
-- ligatures
= "lā",
= "allāh",
-- kashida
= "-", -- kashida, no sound
-- alif_wasla
= "", -- nothing
-- numerals
= "1",
= "2",
= "3",
= "4",
= "5",
= "6",
= "7",
= "8",
= "9",
= "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ",", -- comma
= ";", -- semicolon
= "“", -- quotation mark
= "”", -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousan
-- regional characters (FOR VERY SPECIFIC USECASES)
= "ṭ",
= "ṭ",
= "ḍ",
= "ḍ",
-- balti
-- cant do anything about ژ because it conflicts with persian
= "ž",
= "č̣",
= "ṛ",
= "dz",
= "ṣ",
= "ng",
= "ny",
= "h",
= "e",
}
local punctuation = ":%(%)%*&٫؛؟،ـ«\".'!»٪؉۔`,/–—%{%}"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local ain = "ع"
local alif = "ا"
local malif = "آ"
local hamza = "ء"
local ye = "ی"
local ye2 = "ئ"
local vao = "و"
local dagger_alif = U(0x670)
local marbuta = U(0x629)
local te = "ت"
local ye3 = "ے"
local laam = "ل"
local vowel = ""
local sun_letters = "تثدذرزسشصضطظلن"
local before_diacritic_checking_subs = {
------------ transformations prior to checking for diacritics --------------
{ "()" .. tashdid, tashdid .. "%1" },
{ alif .. fatHataan, zabar .. "ن" },
{ fatHataan .. alif, zabar .. "ن" },
{ jazm .. ye .. dagger_alif, jazm .. ye .. zabar .. alif },
{ zabar .. "" .. dagger_alif, zabar .. alif },
{ ye .. dagger_alif, zabar .. alif }, -- the first letter is U+06CC
{ ye3, ye },
{ "", ye2 },
-- kashiida
{ "^" .. "ـ" .. zabar .. alif , "ـ" .. malif },
{ "^" .. "ـ" .. "()" , "ـ" .. alif .. "%1" },
{ zabar .. dagger_alif, zabar .. alif },
{ dagger_alif, zabar .. alif },
{ fatHataan, zabar .. "ن" }, -- fatḥatan
{ Dammataan, pesh .. "ن" }, -- ḍammatan
{ kasrataan, zer .. "ن" }, -- kasratan
-- allah ligatures and arabic al
{ alif_wasla .. laam , "l-" },
{ alif_wasla, "" },
{ "(" .. tashdid .. "?" .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1-l-%2" },
{ "(" .. tashdid .. "?" .. "" .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1-l-%2" },
{ "(" .. tashdid .. "?" .. "" .. space_like_class .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1l-%2" },
{ "(" .. tashdid .. "?" .. "" .. "" .. space_like_class .. ")" .. alif .. laam .. jazm .. "?" .. "()", "%1l-%2" },
{ marbuta .. "()" .. alif .. laam , te .. "%1-" .. laam .. "%-" },
{ "l%-" .. "()" .. tashdid, "%1" .. jazm .. "-%1" },
{ "l%-" .. laam .. tashdid, laam .. laam },
{ "l%-" .. laam, laam .. laam },
{ "l%-", laam .. "-" },
{ marbuta .. "()" .. alif, te .. "%1-" },
{ marbuta .. "()", te .. "%1" },
{ marbuta, he },
{
"(["
.. consonants2
.. "]["
.. ZZP
.. "])("
.. space_like_class
.. ")"
.. alif
.. laam
.. "(["
.. jazm
.. laam
.. "])",
"%1%2" .. laam .. "%3",
},
{ laam .. laam .. tashdid, laam .. tashdid },
-- use jazm/sukoon to prevent this conversion
{ "(خ)" .. vao .. zabar .. alif, "%1" .. zabar .. alif },
{ "(خ)" .. vao .. zabar, "%1" .. pesh },
{ "(خ)" .. vao .. ye .. "()", "%1" .. ye .. "%2" },
-- izāfa
{ zwnj, "-" },
{ jazm .. alif, jazm .. "-" .. alif }, -- vowel killing, invisible ZWNJ
{ zabar .. jazm, "-" }, -- vowel killing, invisible ZWNJ
}
local has_diacritics_subs = {
-- this ensure allah ligatures and al- work
{ "l%-", "" },
{ "" .. jazm .. "%-" , "" },
{ "" .. "()" .. space_like_class .. alif .. laam , "" },
-- remove punctuation and tashdid
{ "", "" },
{ "$", "" },
{ "(" .. space_like_class .. ")", "%1" },
{ "%-", "-" },
-- these are required for arabic al- to work
{ "" .. "()" .. alif .. laam, laam },
{ "()%-" .. alif .. laam, laam },
-- remove CV pairs
-- consonants paired to alif
{ "" .. jazm, "" },
{ "" .. jazm .. malif, "" },
{ "" .. zabar .. alif, "" },
-- consonants paired to a semivowel
{
"()()",
"%1%2",
},
{ "", "" },
{ "", "" },
{ "", "" },
{ malif, "" }, -- counts as a CV pair
{ jazm .. alif .. "", "" },
{ "", "" },
{ "", "" },
-- remove numbers, hamzatu l-waṣl, alif madda and ZWNJ
{ "", "" },
{ "%s", "" },
{ "%-", "" },
{ "", "" },
{ "(" .. vowel .. ")", "" },
}
local function has_diacritics(text)
local count
text, count = gsub(text, "", "")
if count > 0 then
require("Module:debug").track("fa-translit/lrm or rlm")
end
for _, sub in ipairs(has_diacritics_subs) do
text = gsub(text, unpack(sub))
end
return #text == 0
end
function export.tr(text, lang, sc)
if type(text) == "table" then
local function f(x)
return (x ~= "") and x or nil
end
text, lang, sc, omit_i3raab, force_translit =
f(text.args), f(text.args), f(text.args), f(text.args), f(text.args)
end
for _, sub in ipairs(before_diacritic_checking_subs) do
text = gsub(text, sub, sub)
end
if not force_translit and not has_diacritics(text) then
require("Module:debug").track("fa-translit/lacking diacritics")
return nil
end
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, "^", "#")
text = gsub(text, "$", "#")
text = gsub(text, " | ", "# | #")
text = gsub(text, "%s", "# #")
text = gsub(text, "\n", "#" .. "\n" .. "#")
text = gsub(text, "()", "#" .. "%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
text = gsub(text, "%-", "#-#")
-- hastags now mark the beginning and end of a word
--character reformatting and exceptions
text = gsub(text, highhmz, "#" .. highhmz .. "#")
--this ensures "and" is transliterated as a short vowel
text = gsub(text, "#" .. vao .. "#", "#u#")
text = gsub(text, "#" .. vao .. jazm .. malif, "#w-" .. malif )
-- prevent izafa from converting until later
-- Tashdeed
text = gsub(text, "()" .. tashdid, "%1%1")
text = gsub(text, "()" .. tashdid .. "()", "%1%1%2")
text = gsub(text, "()" .. "()" .. tashdid, "%1%1%2")
text = gsub(text, ye .. "()" .. tashdid, "yy%1")
text = gsub(text, vao .. "()" .. tashdid, "ww%1")
text = gsub(text, ye .. tashdid .. "()", "yy%1")
text = gsub(text, vao .. tashdid .. "()", "ww%1")
-- distinguish initial alif from vowel alif
text = gsub(text, "()" .. zabar .. alif, "%1ā")
text = gsub(text, "()" .. alif, "%1ā")
text = gsub(text, jazm .. malif, "'ā") -- invisible ZWNJ
text = gsub(text, "()" .. malif, "%1'ā")
text = gsub(text, alif .. ye, "ē")
text = gsub(text, alif .. vao, "ō")
text = gsub(text, alif .. zer .. ye, "ī")
text = gsub(text, alif .. pesh .. vao, "ū")
text = gsub(text, tashdid .. alif, tashdid .. "ā")
-- convert semi vowels
text = gsub(text, ye .. "ā", "yā")
text = gsub(text, vao .. "ā", "wā")
text = gsub(text, vao .. "()", "w%1")
text = gsub(text, ye .. "()", "y%1")
text = gsub(text, ye .. "()()", "ē%1%2")
text = gsub(text, vao .. "()()", "ō%1%2")
text = gsub(text, "()" .. ye .. "()", "%1y%2")
text = gsub(text, "()" .. vao .. "()", "%1w%2")
text = gsub(text, "()" .. ye .. "()", "%1y%2")
text = gsub(text, "()" .. vao .. "()", "%1w%2")
-- conversions for vaav/waaw/vao
text = gsub(text, pesh .. vao, "ū")
text = gsub(text, vao .. "()", "w%1")
text = gsub(text, "(" .. vowel .. ")" .. vao, "%1w")
-- conversions for ye
text = gsub(text, zer .. ye, "ī")
text = gsub(text, ye .. "()", "y%1")
text = gsub(text, "(" .. vowel .. ")" .. ye, "%1y")
--Alif with short vowel
text = gsub(text, alif .. "()", "%1")
-- final changes
-- izafa
text = gsub(text, "ē" .. zer .. "#", "ē-yi#")
text = gsub(text, zer .. "y" .. zer .. "#", "ī-yi#")
text = gsub(text, "()" .. "y" .. zer .. "#", "%1-yi#")
text = gsub(text, "()" .. zer .. "#", "%1-i#")
text = gsub(text, '("\'")' .. "##" .. zer .. "#", "%1-i#")
-- do not count zer as izafa before silent alif
text = gsub(text, "%-i" .. "##" .. "(" .. space_like_class .. ")" .. "##" .. "(" .. jazm .. "#%-#" .. ")", "i%1%2")
text = gsub(text, "%-i" .. "#%-#" .. "(" .. "#%-#" .. ")", "i-%1")
-- he deletion
text = gsub(text, "()" .. he .. "#" .. zwnj, "%1-")
text = gsub(text, "()" .. he .. "#", "%1#")
text = gsub(text, "#" .. ain , "#")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = string.gsub(text, lrm, "")
text = string.gsub(text, rlm, "")
-- convert all characters
text = mw.ustring.gsub(text, ".", mapping)
-- alif
-- Final corrections
text = mw.ustring.gsub(text, "āa", "ā")
text = mw.ustring.gsub(text, "aaa", "ā")
text = mw.ustring.gsub(text, "āā", "ā")
text = mw.ustring.gsub(text, "aa", "ā")
text = mw.ustring.gsub(text, "ī" .. "()", "iy%1")
text = mw.ustring.gsub(text, "ū" .. "()", "uw%1")
text = mw.ustring.toNFC(text)
return text
end
return export