Language code in page name (User:Santi2222/bsk
) not recognized.
local U = require("Module:string/char")
local m_string_utilities = require("Module:string utilities")
local gsub = mw.ustring.gsub
local rfind = m_string_utilities.find
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local zwnj = U(0x200C) -- Is this even used in Burushaski?
local tashdid = U(0x651) -- also called tashdid
local jazm = U(0x652)
local ghunna = U(0x658)
local consonants = "تٹثجحڅدذڎرڑزسغفکنملگݣقعظطڞصضشژڈخچپب"
local cons_lat = "mnṅptṭkqbdḍgcćċjʐsśṣhzġlyỵw" --internally we use ċ/ʐ instead of c̣/j̣
local vowels_lat = "aáeéiíoóuú"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"
local lrm = U(0x200e) -- left-to-right mark
local rlm = U(0x200f) -- right-to-left mark
-- basic mapping
local mapping = {
= 'b', = 'p', = 't', = 'ṭ', = 's',
= 'j', = 'ć', = 'h', = "ch", = 'qh', = "ċ",
= 'd', = 'ḍ', = 'z', = "c", = 'r', = "ṛ", = 'z', = 'ż',
= 's', = "ṣ", = 'ś', = 's', = 'z', = "ċh",
= 't', = 'z', = 'ġ', = 'f', = 'q',
= 'k', = "ṅ", = 'g', = "h",
= 'l', = 'm', = 'n', = "ṇ",
= "ỵ", = "y", --FIXME: is this correct?
= "'", -- FIXME: do we need this?
= "A", = "H", = "W", = "U", = "Ú",
= "Y", = "I", = "Í",
--unambiguous vowels
= "a", = "e", = "áa", = "ée", = "aá", = "eé",
-- diacritics
= "ă",
= "ĭ",
= "ŭ",
= "@", -- also sukun - no vowel. we use a placeholder
= "-", -- ZWNJ (zero-width non-joiner)
-- kashida
= "-", -- kashida, no sound
-- numerals
= "1", = "2", = "3", = "4", = "5",
= "6", = "7", = "8", = "9", = "0",
-- punctuation (leave on separate lines)
= "?", -- question mark
= ".", -- period
= ",", -- comma
= ";", -- semicolon
= '“', -- quotation mark
= '”', -- quotation mark
= "%", -- percent
= "‰", -- per mille
= ".", -- decimals
= ",", -- thousand
}
local punctuation = "%-:%(%)%*&٫؛؟،ـ«\".\'!»٪؉۔"
local ye = 'ی'
local vao = "و"
local aspirate = 'ھ'
local aiu = "āīūآ"
-- declared as local above
local function is_valid(text)
if rfind(text, "") then
return false
end
text = gsub(text, "()h", "%1")
if rfind(text, "") then
return false
end
return true
end
function export.tr(text, lang, sc)
if type(text) == "table" then
local function f(x) return (x ~= "") and x or nil end
text, lang, sc, omit_i3raab, force_translit =
f(text.args), f(text.args), f(text.args), f(text.args), f(text.args)
end
--define the "end" of a word
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = gsub(text, "()" , "#".."%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
text = gsub(text, zwnj, "#"..zwnj.."#")
-- hastags now mark the beginning and end of a word
text = gsub(text, 'لا', "ﻻ")
-- Tashdeed. FIXME: is this used?
text = gsub(text, '()' .. tashdid, "%1@%1")
text = gsub(text, '()' .. '()' .. tashdid, "%1@%1%2") --if it gets pushed after diacritics
text = gsub(text, '()' .. aspirate, aspirate.."%1")
text = gsub(text, ye .. '(?)' .. tashdid, "y@y%1")
text = gsub(text, vao .. '(?)' .. tashdid, "w@w%1")
text = gsub(text, "ن" .. ghunna, "ṇ")
-- convert all characters
text = string.gsub(text, lrm, "")
text = string.gsub(text, rlm, "")
text = gsub(text, '.', mapping)
-- vowel fixes, unambiguous vowels have been processed already
local function process_vowels(word, init, final, med)
for k, v in pairs(init) do
word = gsub(word, "#" .. k, "#" .. v)
end
for k, v in pairs(final) do
word = gsub(word, k .. "#", v .. "#")
end
for k, v in pairs(med) do
word = gsub(word, k, v)
end
return word
end
--long, falling
local init_f = { = "óo", = "úu", = "ée", = "íi"}
local final_f = { = "áa", = "úu", = "óo", = "íi"}
local med_f = { = "úu", = "íi"} --we can't do A-> áa yet, Y -> ée, W -> óo
text = process_vowels(text, init_f, final_f, med_f)
--long, rising
local init_r = { = "oó", = "uú", = "eé", = "ií"}
local final_r = { = "uú", = "oó", = "ií"}
local med_r = { = "uú", = "oó", = "ií", = "eé"}
text = process_vowels(text, init_r, final_r, med_r)
--short
local init_s = { = "a", = "o", = "u", = "e", = "i"}
local final_s = { = "a", = "u", = "o", = "e", = "i"}
local med_s = { = "a", = "o", = "u", = "e", = "i"}
text = process_vowels(text, init_s, final_s, med_s)
--remaining characters
text = gsub(text, "H", "h")
text = gsub(text, "A", "áa")
text = gsub(text, "W@", "w@")
text = gsub(text, "Y@", "y@")
text = gsub(text, "W()", "w%1")
text = gsub(text, "W()", "w%1")
text = gsub(text, "Y()", "y%1")
text = gsub(text, "Y()", "y%1")
text = gsub(text, "W", "óo")
text = gsub(text, "Y", "ée")
-- get rid of hashtags
text = gsub(text, "#", "")
if not force_translit and not is_valid(text) then
--return nil
end
--final conversions
text = gsub(text, "ċ","c̣")
text = gsub(text, "ʐ", "j̣")
text = gsub(text, "@", "")
text = mw.ustring.toNFC(text)
return text
end
return export