local export = {}
local m_data = require('Modül:grc-araçlar/veri')
local tokenize = require('Modül:grc-araçlar').tokenize
local ufind = mw.ustring.find
local ugsub = mw.ustring.gsub
local U = mw.ustring.char
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local UTF8char = '*'
-- Diacritics
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local diaeresis = diacritics.diaeresis
local smooth = diacritics.smooth
local rough = diacritics.rough
local macron = diacritics.macron
local breve = diacritics.breve
local subscript = diacritics.subscript
-- Latin
local hat = diacritics.Latin_circum
local macron_diaeresis = macron .. diaeresis .. "?" .. hat
local a_subscript = '^.*' .. subscript .. '$'
local velar = 'κγχξ'
local tt = {
-- Vowels
= "a",
= "e",
= "e" .. macron,
= "i",
= "o",
= "u",
= "o" .. macron,
-- Consonants
= "b",
= "g",
= "d",
= "z",
= "th",
= "k",
= "l",
= "m",
= "n",
= "x",
= "p",
= "r",
= "s",
= "s",
= "t",
= "ph",
= "kh",
= "ps",
-- Archaic letters
= "w",
= "ś",
= "q",
= "š",
= "v",
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
= '',
= '',
= '',
= hat,
= 'i',
}
function export.tr(text, lang, sc)
-- If the script is given as Cprt, then forward the transliteration to that module.
-- This should not be necessary, as ] redirects
-- to this module only if script is polytonic.
if sc == "Cprt" then
-- ]
require('Modül:debug').track('grc-translit/Cprt')
return require('Modül:Cprt-translit').tr(text, lang, sc)
end
if text == '῾' then
return 'h'
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = ugsub(text, "()", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = ulower(token):gsub(UTF8char, tt)
local next_token = tokens
if token == 'γ' and next_token and velar:find(next_token, 1, true) then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif ufind(token, a_subscript) then
-- add macron to ᾳ
translit = ugsub(translit, '()', '%1' .. macron)
end
if token:find(rough) then
if ufind(token, '^') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
-- Capitalize first character of transliteration.
if token ~= ulower(token) then
translit = translit:gsub("^" .. UTF8char, uupper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
return export