local tests = require('Module:UnitTests')
local fa_translit = require('Module:User:Babr/fa-translit-fix')
local full_link = require('Module:links').full_link
local fa = require('Module:languages').getByCode('fa')
local rlm = require("Module:string/char")(0x200F) -- right-to-left mark
local function link(word)
return full_link{ term = word, lang = fa, tr = "-" }
end
--TO DO
function tests:do_test_translit(arab, roman)
self:equals(link(arab), fa_translit.tr(arab, 'fa', 'Persian'), roman)
end
function tests:test_translit_persian()
local examples = {
{ 'سَرْاَنْجَام' .. rlm, "sar-anjām" },
{ 'کُروز', "kurōz" },
{ 'دَهْ', "dah"}, --always "h" near a vowel
{ 'دَه', "da"}, --always a when theres no vowel
{ 'سُؤَال', "su'āl" },
{ 'کُرُوز', "kurūz" }, -- this word is intentionally incorrect
{ 'وَاوْ', "wāw" },
{'نَوْروز', "nawrōz"},
{ 'قَهْوَهاِی', "qahwa-ī" }, -- case for هَای as a-'ī can be changed to yī
{ 'قَهْوَهیِی', "qahwa-yī" }, -- case for هیِی as a-'ī
{'خْوَانْدَن', "xwāndan"}, -- case for خوَا as xwā
{'خْویش', "xwēš"},
{'خْوَد', "xwad"}, -- case for خو as xo (Iranian Persian) and xwad (Classical Persian)
{ 'چَامَهسَرَایِی', "čāma-sarāyī" },
{ 'طَنِین', "tanīn" },
{ 'لِهٰذَا', "lihāzā" }, -- can be lehāzā if majhul diacritics are shown
{ 'قَهْرًا', "qahran" },
{ 'عَصاً', "asan" }, --either placement works
{ 'خَانَه', "xāna" },
{ 'کورِیَایِ شُمَالِی', "kōriyā-yi šumālī" },
{ 'ضَمَّه', "zamma" },
{ 'ضَمِّهْ', "zammih" }, -- force include final -h with diacritics
{ 'کِه', "ki"},
{ 'کِهْ', "kih" }, -- another example of forcing the h
{ 'اَرْمَنِسْتَان', "armanistān" },
{ 'بَاکُو', "bākū" },
{ 'کَسی', "kasē"}, --word final -ē
{"بَرَادَرِ بُزُرْگ", "barādar-i buzurg"}, -- izafa/ezafe marked with a dash -
{'قُرُونِ وُسْطیٰ', "qurūn-i wustā" }, -- a dagger alif case
{'دَر-آمَد', "dar-āmad" }, -- alif madda test
{ 'بَازِیِ شَطْرَنْج', "bāzī-yi šatranj" },
{ 'ایرَانِیَان', "ērāniyān"},
{ 'سُؤَال', "su'āl"}, --if majhul diacritics are used then so'āl
{ 'صُبَاح', "subāh" },
{ 'صُبْح', "subh"},
{ 'صُبْه', "subh"}, --purposefully mispelt
{ "دُروغ گویْ", "durōğ gōy"}, --have semivowels become consonts with jazm
{ 'او', "ō" }, -- transliteration of wa, beginning of sentence
{ 'وَ', "wa" }, --transliteration of wa, elsewhere (requires two spaces on both sides)
{ ' و ', " u " },
{ 'بَه نَامِ خُدَا', "ba nām-i xudā"},
{ 'جَوَانِی', "jawānī"},
{ 'شَاهْنَامَه', "šāhnāma"},
{ 'زِنْدَگِی', "zindagī"},
{ 'زِنْدَهگِی', "zinda-gī"},
{ 'میوَهٔ جَاپَانِی', "mēwa-yi jāpānī" },
{ 'نُوید', "nuwēd"}, -- nawīd/navid in modern Persian
{ 'دُخْتَرَْبَچَّه',"duxtar-bačča"},
{ 'کِیَه', "kiya" },
{ 'کُرُوَاسِیَا', "kuruwāsiyā" }, --moderate vowel test
{ 'مِیَایِین', "miyāyīn"}, -- more complex vowel test
{ 'مْیَایین', "myāyēn"}, -- more complex vowel test
{ 'طِلَّا', "tillā" },
{ 'لیکِن', "lēkin" },
{ 'بَچَّهٔ لَطِیفَه کَلَان اَسْت', "bačča-yi latīfa kalān ast" },
{ 'مَعْرُوف و مَجْهُول', "ma'rūf u majhūl"},
{ 'مَعْرُوف وَ مَجْهُول', "ma'rūf wa majhūl"},
{ 'اَرمنستان', nil },
{ 'باکو', nil },
{ 'تصویر', nil },
{ 'کسی', nil }, --word final -ē
{"برادر بزرگ", nil }, -- izafa/ezafe marked with a dash -
{'قرون وسطی', nil }, -- a dagger alif
{'وَٱللّٰه', "wal-lāh"}, --alef_wasla
-- uncommon characters
{'کَسے',
'kasē'}, -- treated like normal ye
{'کَٹَه',
'kaṭa'},--hazaragi retroflex's
-- Arabic al-
{'آیَةُاللّٰه',
'āyatu-l-lāh'},
{'فِالْحَال',
'fi-l-hāl'},
{'بویِ تُو',
'bō-yi tū'},
{'بِسْمِ اللّٰهِ الْرَّحْمٰنِ الْرَّحِیم',
'bismi l-lāhi r-rahmāni r-rahīm'},
{'اِیَالَاتِ مُتَّحِدَه',
'iyālāt-i muttahida'},
{'دَارُ الخَلَافَه',
'dāru l-xalāfa'},
{'اَبُو الهَوْد',
'abū l-hawd'},
{'یی', 'yē'},
{'ویژَه', 'vēža'}
}
self:iterate(examples, "do_test_translit")
end
return tests