-- 2024.04.05. ]
--[=[
-- This is version of el-IPA, with accents ON vowels, no syllabification
-- IPA symbols have no capitals
-- It is used for parameter ts=, as in texts, for quotations, at inflectional tables etc
]=]--
local export = {}
function export.ts(frame)
local args = frame:getParent().args -- for Templates
-- local args = frame.args -- invoke
-- need
-- text = any greek text
-- result = the transcribed IPA symbols
local following_gr = '' -- is the following Greek letter (in text)
local following_ipa = '' -- is next IPA symbol, at the transcription, not at the text.
local punctuation_gr = "" -- and more
local punctuation_ipa = "" -- and more
--?? do I write return blah, or do I write result = blah
-- args for override (at my ts= ])
-- ============ first do Conditions and Specials then do isolated letters.
-- space is space
if mw.ustring.find(text, " ") then return ' '
-- do nothing for a diacritic, spirtits, accents found alone (some editions place them after a letter)
-- ow no, accents should be applied to the previous vowel - o bufff never mind. DO it manually (like the capitals at du Cange)
elseif mw.ustring.find(text, "") then return '' -- accents tonos=okseia, bareia, perisopomene
elseif mw.ustring.find(text, "") then return '' -- with psile
elseif mw.ustring.find(text, "") then return '' -- with daseia
elseif mw.ustring.find(text, "") then return '' -- with dialytics
elseif mw.ustring.find(text, "") then return '' -- apostroph, curly apostroph, macron, brachy (breve)
-- punctuation
elseif mw.ustring.find(text, "") then return ',' -- comma
elseif mw.ustring.find(text, "") then return '.' -- full stop (period)
elseif mw.ustring.find(text, "") then return '!' -- exclmation mark
elseif mw.ustring.find(text, "") then return '?' -- question mark -- BUT cf #semicolonalphanumberic
elseif mw.ustring.find(text, "") then return ':' -- colon
elseif mw.ustring.find(text, "") then return ';' -- middot is semicolon
elseif mw.ustring.find(text, "") then return '…' -- hellip
-- brackets (parenthesis, square brackets, curly brackets)
-- do not change -- do I have to write this?
-- SPECIAL WORDS ==================== -- AND override needed
-- special words with accent NOT pronounced (monotonic or for polytonic see the AncGr page)
-- ] ]
elseif mw.ustring.find(text, "") then return 'ana'
elseif mw.ustring.find(text, "") then return 'kata'
-- ] or ] has 2 or 3 options, Do manually? I can give standard 1st option
-- ] has 2 options, the more common is normal, the other is without accent
-- special words not accented, but pronounced WITH accent
-- ] cf options for δια
elseif mw.ustring.find(text, "") then
if following_gr == ' ' or following_gr == punctuation then return 'ʝá'
end
--?? todo
-- ============== SPECIAL COMBINATIONS
-- for AncGr marked prosodies on accented letters = ignore the prosody
--?? how?todo
-- SUFFIXES with or without synizesis
-- κ- -γ- γκ- γγ- χ- + any e or i sound
--?? must I make first the e sounds and the i sounds?
-- kappa
elseif mw.ustring.find(text, "") then
if following_ipa == '' or following_ipa == '' then return 'c'
else return 'k'
end
-- gamma & gammakappa
elseif mw.ustring.find(text, "") then
if following_gr == "" then
if following_ipa == '' or following_ipa == '' then return 'ɟ' end
else
if following_ipa == '' or following_ipa == '' then return 'ʝ'
else return 'ɣ'
end
end
-- chi
elseif mw.ustring.find(text, "") then
if following_ipa == '' or following_ipa == '' then return 'ç'
else return 'x'
end
-- DIGRAPHS ====================
-- sound e with alpha iota
elseif mw.ustring.find(text, "") then return 'e' -- alpha + iota psile, daseia
elseif mw.ustring.find(text, "") then return 'é' -- +iota tonos/okseia, bareia perispomene & spirits
-- other alpha digraphs are ok as isolated
-- elseif mw.ustring.find(text, "") then return 'ái' -- alpha+accent, iota
-- elseif mw.ustring.find(text, "") then return 'ai' -- alphas + iota with dialytics
-- elseif mw.ustring.find(text, "") then return 'aí' -- alphas + iota with dialytics+accent
-- sound i epsilon iota, omicron iota, upsilon iota
elseif mw.ustring.find(text, "") then return 'i' -- +iota psile, daseia
elseif mw.ustring.find(text, "") then return 'í' -- +iota tonos/okseia, bareia perispomene & spirits
-- alpha/epsilon/eta + upsilon αυ (ευ ηυ too)
elseif mw.ustring.find(text, "") then return 'af'
elseif mw.ustring.find(text, "") then return 'av'
-- elseif mw.ustring.find(text, "") then return 'af' -- alpha ypsilon space ] punctuation
-- elseif mw.ustring.find(text, "") then return 'af' .. ""
elseif mw.ustring.find(text, "") then
--return 'af' .. ts("")
-- function (alpha, upsilon, position)
local alpha = "" -- do aei αΑεΕηΗ
-- Find next character that is not whitespace or punctuation.
local following = ""
while true do
local next = mw.ustring.sub(text, position, position)
if next == "" then -- reached end of string
break
elseif next:find "" then
position = position + 1
else
following = next
break
end
end
if following == "" or following == "" then
return "af"
elseif following == "" then
return "av" -- for vowel or γδζλμνρ
end
-- άυ άϋ as normal
-- same with accents:
elseif mw.ustring.find(text, "") then return 'áf'
elseif mw.ustring.find(text, "") then return 'áv'
elseif mw.ustring.find(text, "") then
local alpha = "" -- do aei αΑεΕηΗ
-- Find next character that is not whitespace or punctuation.
local following = ""
while true do
local next = mw.ustring.sub(text, position, position)
if next == "" then -- reached end of string
break
elseif next:find "" then
position = position + 1
else
following = next
break
end
end
if following == "" or following == "" then
return "áf"
elseif following == "" then
return "áv" -- for vowel or γδζλμνρ
end
-- do the same with ευ
--?? todo
-- do the same with ηυ
--?? todo
-- ου ού
elseif mw.ustring.find(text, "") then return 'u'
elseif mw.ustring.find(text, "") then return 'ú'
-- μπ arctic = b, median is mostly mb and override needed
-- elseif mw.ustring.find(text, "") then
--?? todo
-- ντ arctic = d, median is mostly nd and overrided needed
--?? todo
-- γκ arctic = g, median is mostly ŋɡ (+ e or i sounds is ŋɟ NOT the mistake ɲɟ see Macridge and notes)
-- ] is aŋɡóla
-- ] is áŋɟelos
-- and override needed e.g. ] is NOT aléŋɡɾo but aléɡɾo
--?? todo
-- γχ (always median) is ŋx or (+ e or i sound is ŋç NOT mistake ɲç the same as above with γκ)
--?? todo
-- γξ (always median) is ŋks
elseif mw.ustring.find(text, "") then return 'ŋks'
-- μβ always median is
elseif mw.ustring.find(text, "") then return 'ɱv'
-- μφ always median is
elseif mw.ustring.find(text, "") then return 'ɱf'
-- σμ usually is zm and override needed
elseif mw.ustring.find(text, "") then return 'zm'
-- τσ
elseif mw.ustring.find(text, "") then return "t͡s"
-- τζ
elseif mw.ustring.find(text, "") then return "d͡z"
-- =================== ISOLATED LETTERS =========== --
-- special characters, for quotations only
-- Incorrect characters: see ].
-- These are tracked by ].
elseif mw.ustring.find(text, "ϐ") then return "v" -- calligraphic beta -- AngGr as 'b'
elseif mw.ustring.find(text, "ϑ") then return "θ" -- calligraphic theta
elseif mw.ustring.find(text, "ϰ") then return "k" -- calligraphic kappa
elseif mw.ustring.find(text, "ϱ") then return "r" -- calligraphic rho
elseif mw.ustring.find(text, "ϕ") then return "f" -- calligraphic phi
-- add calligraphic epsilon
-- ligatures?? for και = ce
-- Archaic letters (AncGr) digamma san koppa sampi and the dated stigma, lunated stigma etc
elseif mw.ustring.find(text, "") then return "w" -- this should always be the capital Ϝ
elseif mw.ustring.find(text, "ϻ") then return "s"
elseif mw.ustring.find(text, "") then return "k" -- this should always be the capital Ϙ
elseif mw.ustring.find(text, "") then return "st" -- stigma -- stigma capital? rare
elseif mw.ustring.find(text, "") then return "s" -- lunate sigma
-- elseif mw.ustring.find(text, "ϡ") then return "ϡ", -- do nothing??
-- elseif mw.ustring.find(text, "ͷ") then return "ͷ", -- do nothing? transliterate to "v" number
-- Vowels (all unaccented, all accented, both monotonic polytonic) macron brachy do as grc?todo
elseif mw.ustring.find(text, "") then return "a" --?? should ᾱ be double 'aa'
elseif mw.ustring.find(text, "") then return "á"
-- DO MORE
elseif mw.ustring.find(text, "") then return "e" --always brachy
elseif mw.ustring.find(text, "") then return "é"
elseif mw.ustring.find(text, "") then return "i" --alywas macron
elseif mw.ustring.find(text, "") then return "í"
elseif mw.ustring.find(text, "") then return "i" --?? should ῑ be double 'ii'
elseif mw.ustring.find(text, "") then return "í"
elseif mw.ustring.find(text, "") then return "o" --always brachy
elseif mw.ustring.find(text, "") then return "ó"
elseif mw.ustring.find(text, "") then return "i" --has no psile --?? should ῡ be double 'ii'
elseif mw.ustring.find(text, "") then return "í"
elseif mw.ustring.find(text, "") then return "o" --always macron
elseif mw.ustring.find(text, "") then return "ó"
-- Consonants
elseif mw.ustring.find(text, "β") then return "v"
-- gamma, see above
elseif mw.ustring.find(text, "δ") then return "ð"
elseif mw.ustring.find(text, "ζ") then return "z"
elseif mw.ustring.find(text, "θ") then return "θ"
--kappa, see above
elseif mw.ustring.find(text, "λ") then return "l"
elseif mw.ustring.find(text, "μ") then return "m"
elseif mw.ustring.find(text, "ν") then return "n"
elseif mw.ustring.find(text, "ξ") then return "ks"
elseif mw.ustring.find(text, "π") then return "p"
elseif mw.ustring.find(text, "ρ") then return "r"
elseif mw.ustring.find(text, "σ") then return "s"
elseif mw.ustring.find(text, "τ") then return "t"
elseif mw.ustring.find(text, "φ") then return "f"
--chi, see above
elseif mw.ustring.find(text, "ψ") then return "ps"
end -- CLOSE ALL ifs
-- .. spirit .. accent, but I need to SEE them too
local m_data = require('Module:grc-utilities/data')
-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics
local tokenize = require('Module:grc-utilities').tokenize
--local ufind = mw.ustring.find --
--local ugsub = mw.ustring.gsub --
--local U = mw.ustring.char --
--local ulower = mw.ustring.lower --
--local uupper = mw.ustring.upper --
-- This means: ??
--local UTF8char = '*'
-- Diacritics from Module:grc-utilities/data
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute -- this is okseia ´ and the overall tonos
local grave = diacritics.grave -- this is bareia `
local circumflex = diacritics.circum -- this is perispomeni ῀
local diaeresis = diacritics.diaeresis -- this is the dialytics ¨
local smooth = diacritics.smooth -- this is psile ᾿
local rough = diacritics.rough -- this is daseia ῾
local macron = diacritics.macron -- this is macron ˉ , normally not needed, needed exceptionally in quotations
local breve = diacritics.breve -- this is brachy ˘ , normally not needed
local subscript = diacritics.subscript -- this is hypogegrammene
--?? (adscript prosgegrammene is written out with i??) see below, a_subscript
-- Latin
local hat = diacritics.Latin_circum
-- local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this
local a_subscript = '^.*' .. subscript .. '$'
]=]--
--[[#semicolonalphanumberic
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
-- text = mw.ustring.gsub(text, "()", "%1?")
-- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common.
-- text = text:gsub("·", ";")
-- CONDITIONS for modern transliteration at ]
-- check ] for αυ, ευ, ηυ, μπ inital,
-- add ντ initial, γκ initial (we have delta = dh, and gamma = gh)
--[=[
text = gsub(text, "()()()",
function (vowel, upsilon, position)
-- Find next character that is not whitespace or punctuation.
local following = ""
while true do
local next = mw.ustring.sub(text, position, position)
if next == "" then -- reached end of string
break
elseif next:find "" then
position = position + 1
else
following = next
break
end
end
return tt
.. (upsilon == "ύ" and acute or "")
.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
end)
text = gsub(text, "()()",
function (vowel, ita)
if ita == "ή" then
return tt .. "i" .. diaeresis .. acute
else
return tt .. "i" .. diaeresis
end
end)
text = gsub(text, "(.?)()π",
function (before, mi)
if before == "" or before == " " or before == "-" then
if mi == "Μ" then
return before .. "B"
else
return before .. "b"
end
end
end)
]=]--
end
return export