User:Sarri.greek (CAT) » grk-translit-modern doc » test?
-- 2024.03.06. ]
-- tests at ]
-- This is version of ]
-- See ]
--[=[
* grk-translit-classic = for Ancient Greek grc, Koine, grc-koi, learned Medieval & their dialects, Katharevousa el-kth
-- script polytonic Greek
* grk-translit-modern = for any Medieval Greek gkm, Modern Greek & their dialects,
-- script monotonic or polytonic Greek (any script may be found in quotations)
* Learned Medieval Greek is transliterated exactly as Ancient Greek script (rho with daseia/rough, hypogegrammeni)
* Main Medieval Greek (vulgar) rho was written with or without daseia.
Trasliterate like Modern Greek ISO843 (TypeB, slightly more phonemic than TypeA,
i macron ī for eta, o macron ō for omega
with corrections γ=gh, δ=dh, χ=kh as proposed for a mixed type C)
Pronunciation as at ]
In modern, prosody marks are not needed, but are kept for possible examples of metrics in poetry.
CORRECTIONS - PROLBEMS
* add ligatures for quotations only? -- no, we can use param substitute at Template:quote
HOW it is USED?? ] has:
<onlyinclude>{{{{{|safesubst:}}}#invoke:languages/templates|getByCode|{{{1|und}}}|transliterate|{{{{{|safesubst:}}}#invoke:links|remove_links|{{{2}}}}}|{{{sc|}}}|{{{module|}}}}}</onlyinclude>
]=]--
local export = {}
local m_data = require('Module:grc-utilities/data')
-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics
local tokenize = require('Module:grc-utilities').tokenize
--local ufind = mw.ustring.find --
--local ugsub = mw.ustring.gsub --
--local U = mw.ustring.char --
--local ulower = mw.ustring.lower --
--local uupper = mw.ustring.upper --
-- This means: ??
local UTF8char = '*'
-- Diacritics from Module:grc-utilities/data
--[=[ it says:
local U = require("Module:string/char")
]=]--
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute -- U(0x301) this is okseia ´ and the overall tonos
local grave = diacritics.grave -- U(0x300) this is bareia `
local circumflex = diacritics.circum -- U(0x342) this is perispomeni ῀
-- Latin_circum = U(0x302)
local diaeresis = diacritics.diaeresis -- U(0x308) this are the dialytics ¨
local smooth = diacritics.smooth -- U(0x313) this is psile ᾿
local rough = diacritics.rough -- U(0x314) this is daseia ῾
local macron = diacritics.macron -- U(0x304) this is macron ˉ , normally not needed, needed exceptionally in quotations
-- spacing_macron = U(0xAF)
-- modifier_macron = U(0x2C9)
local breve = diacritics.breve -- U(0x306) this is brachy ˘ , normally not needed
-- spacing_breve = U(0x2D8)
local subscript = diacritics.subscript -- U(0x345) this is hypogegrammene
--?? (adscript prosgegrammene is written out with i??) see below, a_subscript
-- ALSO has
-- coronis = U(0x343)
-- undertie = U(0x35C) -- actually "combining double breve below"
-- Latin
local hat = diacritics.Latin_circum -- Latin_circum = U(0x302)
local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this
local a_subscript = '^.*' .. subscript .. '$'
local velar = 'κγχξ'
local tt = {
-- Vowels
= "a",
= "e",
= "i" .. macron, -- the 'ī' with macron looks bad, like perispomeni / The classic ē reminds more of 'eta'
= "i",
= "o",
= "u",
= "o" .. macron, -- ō
-- Consonants
= "v", -- instead of ancient = b
= "gh", -- instead of g
= "dh", -- instead of d
= "z",
= "th",
= "k",
= "l",
= "m",
= "n",
= "ks", --?? instead of x
= "p",
= "r",
= "s",
= "s",
= "t",
= "f", -- instead of latinization ph ?
= "kh",
= "ps",
-- Archaic letters (AncGr) -- at modern, may be found in quotations and some, for numbering system
= "Ϝ", -- do not transliterate to "w" -- this is always the capital Ϝ
= "ϻ", -- do not transliterate to "ś"
= "Ϙ", -- do not transliterate to "q" -- this is always the capital Ϙ
= "ϡ", -- do not transliterate to "š"
= "ͷ", -- do not transliterate to "v" number
-- special characters, for quotations only
-- Incorrect characters: see ].
-- These are tracked by ].
= "v", -- instead of 'b'
= "th",
= "k",
= "r",
= "s",
= "f", -- instead of ph
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
= '', -- brachy
= '', -- psile
= '', -- daseia
= hat, -- perispomene
= 'i', -- hypogegrammene
}
-- change name from export.tr to export.translit
function export.translit(text, lang, sc)
-- daseia -- ANCE if rough daseia: return h, in Koine a grey h, in MedGr onwards nothing
if text == '῾' then
return '' -- instead of h
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = mw.ustring.gsub(text, "()", "%1?")
-- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = mw.ustring.lower(token):gsub(UTF8char, tt)
local next_token = tokens
-- the previous is tokens
-- CONDITIONS for modern transliteration
-- tests ]
-- #mp = mu and pi μπ -- capitals are taken care of
--[=[ this does not work. when i write gsub it has error. When i write sub it just does not work
whattt must i use? there are:
string.gsub
string.sub
mw.ustring.gsub
mw.ustring.sub
I want to say: If you find ^ at the beginning of a word, substitute them with b else... mb
]=]--
-- if token:find('^') then
if token == "μ" and tokens == "π" then
-- if mw.ustring.find(token, '^') then
if mw.ustring.find(text, '^μ') then
-- if token:find('^') then
-- if mw.ustring.find(token, '^') then
token = 'μ'
tokens = ""
translit = "b"
else
token = 'μ'
tokens = ""
translit = "mb"
end
end
--if mw.ustring.find(text, '^') then
-- translit = mw.ustring.sub(text, '', "b")
-- end
--[=[noooooooooooooooooooooo
if token == "μ" and tokens == "π"
then
text = string.sub(token, "(.?)()()", -- capitals are ok "(.?)()()"
function (before, mupi)
--?? what example is before == "-" ????
if before == "" or before == " " or before == "-"
then
translit = before .. "b"
else -- not at beginning
translit = before .. "mb"
end
end)
end -- close mu
]=]--
-- nu and ντ -- capitals are taken care of
--todo
-- gamma ?? Please, could you correct this, so that it works?
if token == 'γ' or token == 'Γ' -- capitals are ok
then
-- γκ -- capitals are taken care of
if token == "γ" and tokens == "κ"
then
-- γ before a velar = 'κγχξ' should be <n> BUT NOT at beginning of word
--ANC-- if next_token and velar:find(next_token, 1, true) then
-- arctic ]
text = gsub(token, "(.?)()()", -- capitals are ok "(.?)()()"
function (before, gammakappa)
--?? what example is before == "-" ????
if before == "" or before == " " or before == "-"
then
translit = before .. "g"
end
end)
-- γγ = ng NOT ngh -- ] = ]
elseif (token == "γ" and tokens == "γ")
then
text = gsub(token, "(.?)()()", -- capitals are ok "(.?)()()"
function (before, gammagamma)
--?? what is before == "-" --assumed median?
if before == "" or before == " " or before == "-"
then
translit = before .. "ng"
end
end)
-- γχ nkh γξ = nks with normal translit of 2nd letter. These are always median ] ]
elseif (token == "γ" and tokens == "") -- capitals are ok ""
then
translit = "n"
end -- close elseifs
end -- close gamma
if token == 'ρ' and tokens == 'ρ' then
--ANC-- -- ρ after ρ should be <rh>
translit = 'r'
elseif mw.ustring.find(token, a_subscript) then
-- add macron to ᾳ --??should we keep this for examples of metrics?
--ANC-- translit = mw.ustring.gsub(translit, '()', '%1' .. macron)
translit = 'a'
end
if token:find(rough) then
if mw.ustring.find(token, '^') then
--ANC-- translit = translit .. 'h'
translit = translit
else -- vowel
--ANC-- translit = 'h' .. translit
translit = translit
end
end
-- AncGr -- keep it just in case...
-- Remove macron from a vowel that has a circumflex.
if mw.ustring.find(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
--[=[ CONDITIONS for classic ancient transliteration
if token == 'γ' and next_token and velar:find(next_token, 1, true) then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif ufind(token, a_subscript) then
-- add macron to ᾳ
translit = ugsub(translit, '()', '%1' .. macron)
end
if token:find(rough) then
if ufind(token, '^') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
]=]--
-- Capitalize first character of transliteration.
if token ~= mw.ustring.lower(token) then
translit = translit:gsub("^" .. UTF8char, mw.ustring.upper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
-- Module_talk:User:Sarri.greek/grk-translit-modern
-- ============= use it with arguemtns =============== --
function export.get_tr(frame)
-- local args = frame:getParent().args -- for Templates
local args = frame.args -- invoke
-- lemma
local text = args or ''
if args ~= '' and args ~= nil then
text = export.translit(args)
end
return text
end
return export
-- check ] for αυ, ευ, ηυ, μπ inital,
-- add ντ initial, γκ initial (we have delta = dh, and gamma = gh)
--[=[
text = gsub(text, "()()()",
function (vowel, upsilon, position)
-- Find next character that is not whitespace or punctuation.
local following = ""
while true do
local next = mw.ustring.sub(text, position, position)
if next == "" then -- reached end of string
break
elseif next:find "" then
position = position + 1
else
following = next
break
end
end
return tt
.. (upsilon == "ύ" and acute or "")
.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
end)
text = gsub(text, "()()",
function (vowel, ita)
if ita == "ή" then
return tt .. "i" .. diaeresis .. acute
else
return tt .. "i" .. diaeresis
end
end)
text = gsub(text, "",
{ = "oï", = "oḯ",
= "Oï", = "Oḯ"})
text = gsub(text, "",
{ = "ou", = "oú",
= "Ou", = "Oú"})
text = gsub(text, "(.?)()π",
function (before, mi)
if before == "" or before == " " or before == "-" then
if mi == "Μ" then
return before .. "B"
else
return before .. "b"
end
end
end)
]=]--