Language code in page name (User:Sinonquoi/ks
) not recognized.
local u = mw.ustring.char -- unicode
local gsub = mw.ustring.gsub -- string manipulation
-- TODO
-- 1. Aspirate aspirables
-- 2. Vowels with fixed characters (ā, ạ̄, ū, o, ō, e, ē, ọ, ọ̄)
-- 3. Initial variants of vowels (with alef)
-- 4. Medial variants of vowels (diacritics or standalone)
-- 5. Final variants of vowels (same as medial except: e, ē)
-- 6. Treat final yē and vāv differently depending on what comes before
-- 7. Treat final hē with vowel diacritic
-- 8. Fixed consonants and provided vowels
-- 9. Disregard hat for nūn
-- 10. Work around hat for rē
-- 11. yē with hat is a palatal
-- 12. Kashmiri yē medial is 'a; final '
-- 12b. Check support for compound words
-- 13. Add vowels to consonants
-- 14. Add vowels to dual role characters
-- Disallow characters or sequences thereof including
-- 'Combined' unicode endpoints as with vav + hamza, ye + hamza, etc.
-- Final consonant + he + short vowel; should be Consonant + short vowel + he
-- Vowel + Vowel
-- Consonant + consonant (-r with ihat, -y with ihat)
-- Character that cannot take hat/ihat + hat/ihat
-- Long standalone vowel carriers at the beginning without ALIF preceding
-- Consonant that is not aspirable preceded by ducheshm
-- How to add vowels
-- Make list of consonants and vowels
-- Check if conditions are met (C+V; alef initial+V; vowel carrier + V)
-- Change
local export = {}
-- helper functions
local disallow
local aspirable = "پتٹچژک"
local aspirable_extended = aspirable .. "زجبگدذ"
local ducheshm = "ھ"
local hat = u(0x065A) -- V
local ihat = u(0x065B) -- inverted V
local hats = hat .. ihat
-- characters that can take an inverted hat
local ihattable = "رنی"
-- important characters
local alif = "ا" -- either long ā post-initially or vowel carrier initially
local ye = "ی" -- either long ī terminally or ē medially, consonant otherwise
local he = "ہ" -- either ø finally before a consonant + short vowel or a consonant otherwise
local vav = "و"
-- short vowel diacritics
local sv_a = u(0x064E) -- zabar (a)
local sv_u = u(0x064F) -- zer (u)
local sv_i = u(0x0650) -- pesh (i)
local sv_a2 = u(0x0654) -- hamza above (ạ)
local sv_u2 = u(0x0655) -- hamza below (ụ)
-- long vowel diacritics
local lv_u2_long = u(0x065F) -- with consonant post-initially, with alif initially (ụ̄)
local lv_u_long = u(0x0657) -- only with vav post-initial, inverted zer (ū)
local lv_i_long = u(0x0656) -- only with medial ye, subscript alif (ī)
local short_vowel_diacritics = sv_a .. sv_u .. sv_i .. sv_a2 .. sv_u2
local vowel_diacritics = short_vowel_diacritics .. lv_u2_long
-- carrier + diacritic combos
-- will this even work?
local long_u = vav .. lv_u_long -- ū, preceded by consonant medially or ALIF initially
local long_i = ye .. lv_i_long -- ī, preceded by consonant medially or ALIF initially
local long_vowels = long_u .. long_i
local short_o = vav .. ihat -- o, preceded by consonant medially or ALIF initially
local short_ye = ye .. ihat -- e, preceded by consonant medially or ALIF initially
-- ye on its own at the end is ī
-- DIACRITICS
local v_sign = u(0x065A) -- V
local inverted_v_sign = u(0x065B) -- inverted V
local hats = v_sign .. inverted_v_sign
-- numerals
local numeral_sub = {
= '0', = '1', = '2', = '3', = '4', = '5', = '6', = '7', = '8', = '9',
}
local presubs = {
}
-- local conv = {
-- = 'b', = 'p', = 't', = 'ṭ', = 's',
-- = 'j', = 'c', = 'h', = 'kh',
-- = 'd', = 'ḍ', = 'z',
-- = 'r', = 'ḍ', = 'z', = 'ċ',
-- = 's', = 'ś', = 's', = 'z',
-- = 't', = 'z',
-- = 'ʿ', = 'ġ',
-- = 'f', = 'q',
-- = 'k', = 'g',
-- = 'l', = 'm', = 'n',
-- = 'h',
-- -- treatment varies
-- = 'h',
-- -- extended set
-- = 'y', = 'v',
-- }
-- local vowels_conv = {
-- = 'a', = 'u', = 'i', = 'ạ', = 'ụ', = 'ụ̄',
-- }
-- local consonants = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھ'
-- local consonants_extended = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھوی'
function export.transliterate(text)
-- convert all numerals
text = gsub(text, ".", numeral_sub)
if disallow(text) then
return nil
end
-- ASPIRATE
-- text = gsub(text, aspirable .. aspirate_h, "hhhh")
-- REMOVE HAT FROM NŪN and RĒ
-- text = gsub(text, '()' .. inverted_v_sign, "%1")
-- -- C2=r/palatal
-- text = gsub(text, '()()' .. inverted_v_sign, "%1%2")
-- -- YĒ with INVERTED HAT
-- text = gsub(text, 'ی' .. inverted_v_sign, "\'")
-- -- FINAL HALF-YĒ IS A PALATAL
-- text = gsub(text, 'ؠ$', "\'")
-- -- BEFORE A SPACE
-- text = gsub(text, 'ؠ+', "\' ")
-- -- MEDIAL HALF-YĒ IS 'a
-- text = gsub(text, '()ؠ()', "%1\'a%2")
-- -- CONSONANT + VOWEL
-- text = gsub(text,
-- '()()',
-- function(c,v)
-- return conv .. vowels_conv
-- end)
-- -- text = gsub(text, '()', vowels_conv)
-- -- FINAL HE + VOWEL
-- -- text = gsub(text, 'ہ()$', short_vowels)
-- text = gsub(text, '', conv)
return text
end
function disallow(text)
local bad_initial = ""
-- if mw.ustring.find(text, bad_initial) then
local count
text, count = gsub(text, "", "")
if count > 0 then
require("Module:debug").track("failed")
mw.log("failed")
end
return #text == 0
end
return export