This module generates pronunciations for {{grc-IPA}}
, which is used in Ancient Greek entries.
Use the sandbox module to try things out without causing problems in entries.
The biggest part of the data is contained in the data module.
local export = {}
local m_a = require("Module:accent qualifier")
local m_data = mw.loadData("Module:grc-pronunciation/data")
local m_grc_accent = require("Module:grc-accent")
local m_grc_utils = require("Module:grc-utilities")
local m_grc_utils_data = require("Module:grc-utilities/data")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
-- ] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local diacritic = m_grc_utils_data.diacritic
local diacritics = m_grc_utils_data.diacritics
local canonicalize = m_grc_utils.canonicalize
local concat = table.concat
local find_ambig = m_grc_accent.find_ambig
local floor = math.floor
local format_categories = require("Module:utilities").format_categories
local full_link = m_grc_utils.link
local gsplit = m_str_utils.gsplit
local insert = table.insert
local is_preview = require("Module:pages").is_preview
local list_to_text = mw.text.listToText
local mark_implied_length = m_grc_accent.mark_implied_length
local max = math.max
local split = m_str_utils.split
local rearrangeDiacritics = m_grc_utils.pronunciationOrder
local rfind = m_str_utils.find
local usub = m_str_utils.sub
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local strip_accent = m_grc_accent.strip_accent
local toNFD = mw.ustring.toNFD
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local U = m_str_utils.char
local pagename = mw.loadData("Module:headword/data").pagename
local lang = require("Module:languages").getByCode("grc")
local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}
local function fetch(s, i)
--[==[
because we fetch a single character at a time so often
out of bounds fetch gives ''
]==]
i = tonumber(i)
if type(i) ~= "number" then
error("fetch requires a number or a string equivalent to a number as its second argument.")
end
if i == 0 then
return ""
end
local n = 0
for ch in s:gmatch(".*") do
n = n + 1
if n == i then
return ch
end
end
return ""
end
--Combining diacritics are tricky.
local tie = U(0x35C) -- tie bar
local nonsyllabic = U(0x32F) -- combining inverted breve below
local voiceless = U(0x325) -- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'
local function is(text, X)
if not text or not X then
return false
end
local pattern = m_data.chars or error('No data for "' .. X .. '".', 2)
if X == "frontDiphth" or X == "Greekdiacritic" then
pattern = "^" .. pattern .. "$"
else
pattern = "^$"
end
return rfind(text, pattern)
end
local env_functions = {
preFront = function(term, index)
local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
end,
isIDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'ι' and not m_data.diaer
end,
isUDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'υ' and not m_data.diaer
end,
hasMacronBreve = function(term, index)
return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
end,
}
local function decode(condition, x, term)
--[==[
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
If + ("and") or / ("or") is found, the function is called again,
until if-statements are found.
In if-statements:
* A number represents the character under consideration:
-1 is the previous character, 0 is the current, and 1 is the next.
* Equals sign (=) checks to see if the character under consideration
is equal to a character.
* Period (.) plus a word sends the module to the corresponding entry
in the letter's data table.
* Tilde (~) calls a function on the character under consideration,
if the function exists.
]==]
if condition:find("/", nil, true) then -- logical or
for or_condition in gsplit(condition, "/", true, true) do
if decode(or_condition, x, term) then
return true
end
end
return false
elseif condition:find("+", nil, true) then -- logical and
for and_condition in gsplit(condition, "+", true, true) do
if not decode(and_condition, x, term) then
return false
end
end
return true
elseif condition:find("=", nil, true) then
local offset, ch = condition:match("^(.-)=(.*)$")
return ch == fetch(term, x + offset) -- out of bounds fetch gives ''
elseif condition:find(".", nil, true) then
local offset, quality = condition:match("^(.-)%.(.*)$")
local character = fetch(term, x + offset)
return m_data and m_data or false
elseif condition:find("~", nil, true) then
local offset, func = condition:match("^(.-)~(.*)$")
return env_functions and env_functions(term, x + offset) or false
end
end
local function check(p, x, term)
if type(p) ~= "table" then
return p
end
--This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) ~= "table" then
return possP
end
--This table is paired, with two values: a condition and a result.
if decode(possP, x, term) then
return check(possP, x, term)
end
end
end
local function convert_term(term, periodstart)
if not term then error('The variable "term" in the function "convert_term" is nil.') end
local IPAs = {}
local start
local outPeriods = {}
if periodstart and periodstart ~= "" then
start = false
else
start = true
end
for _, period in ipairs(periods) do
if period == periodstart then
start = true
end
if start then
IPAs = {}
insert(outPeriods, period)
end
end
local length, x, advance, letter, p = ulen(term), 1, 0, '', nil
while x <= length do
letter = fetch(term, x)
local data = m_data
if not data then -- no data found
-- explicit pass
else
-- check to see if a multicharacter search is warranted
advance = data.pre and check(data.pre, x, term) or 0
p = (advance ~= 0) and m_data.p or data.p
for _, period in ipairs(outPeriods) do
insert(IPAs, check(p, x, term))
end
x = x + advance
end
x = x + 1
end
--Concatenate the IPAs
for _, period in ipairs(outPeriods) do
IPAs = { IPA = concat(IPAs, '')}
end
return IPAs, outPeriods
end
local function find_syllable_break(word, nVowel, wordEnd)
if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
if wordEnd then
return ulen(word)
elseif is(fetch(word, nVowel - 1), "liquid") then
if is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
return nVowel - 4
else
return nVowel - 2
end
elseif is(fetch(word, nVowel - 1), "cons") then
return nVowel - 2
elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
return nVowel - 3
else
return nVowel - 1
end
end
local function syllabify_word(word)
local syllables = {}
--[[ cVowel means "current vowel", nVowel "next vowel",
sBreak "syllable break". ]]--
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
local cVowelFound = false
while not cVowel do
local letter = fetch(word, searching)
local nextLetter = fetch(word, searching + 1)
if cVowelFound then
if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif is(letter, "diacritic") then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
else
searching = searching + 1
end
else
if is(letter, "vowel") then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
end
searching = searching + 1
end
end
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
local letter = fetch(word, searching)
if is(letter, "vowel") or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
else
searching = searching + 1
end
end
--Finally we find the syllable break point.
sBreak = find_syllable_break(word, nVowel, wordEnd)
--Pull everything up to and including the syllable Break.
local syllable = usub(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
else
syllable = rsubn(syllable, 'ˈ', '')
end
stress = false
end
insert(syllables, syllable)
word = usub(word, sBreak + 1)
end
local out = nil
if #syllables > 0 then
out = concat(syllables, '.')
out = rsubn(out, '%.ˈ', 'ˈ')
end
return out
end
local function syllabify(IPAs, periods)
--Syllabify
local word_ipa = ''
local ipa = {}
for _, period in ipairs(periods) do
ipa = {}
for _, word in ipairs(split(IPAs.IPA, ' ')) do
word_ipa = syllabify_word(word)
if word_ipa then
insert(ipa, word_ipa)
end
end
IPAs.IPA = concat(ipa, ' ')
end
return IPAs
end
local function make_ambig_note(ambig)
-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
local ambig_note = ""
if ambig and #ambig > 0 then
-- Generate warning text in preview mode.
if is_preview() then
local s, pronoun
if #ambig > 1 then
s, pronoun = "s", "each one"
else
s, pronoun = "", "it"
end
ambig_note = ("\n<p>Mark the vowel length of the ambiguous vowel%s %s by adding a macron after %s if it is long, or a breve if it is short. By default, ] assumes it is short if unmarked." ..
"<br/><small></small></p>\n"):format(s, list_to_text(ambig), pronoun)
end
ambig_note = ambig_note .. format_categories({"Ancient Greek terms with incomplete pronunciation"}, lang)
end
return ambig_note
end
local function make_table(IPAs, ambig, periods)
--Final format
local inlineProns = {}
local listOfProns = {}
local fullProns = {}
local periods2 = {}
for _, period in ipairs(periods) do
insert(fullProns, '* ' .. m_a.format_qualifiers(lang, {"grc-" .. period}) .. ' ' ..
m_IPA.format_IPA_full {
lang = lang,
items = {{pron = '/' .. IPAs.IPA .. '/'}},
})
periods2 = true
end
for _, period in ipairs(inlinePeriods) do
if periods2 then
local pron = '/' .. IPAs.IPA .. '/'
insert(inlineProns, {pron = pron})
insert(listOfProns, pron)
end
end
local inlineIPAlength = floor(max(ulen("IPA(key): " .. concat(listOfProns, ' → ') or "") * 0.68, ulen("(15th AD Constantinopolitan) IPA(key): /" .. IPAs.byz2.IPA .. "/") * 0.68))
local inline = '\n<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full {
lang = lang,
items = inlineProns,
separator = " → ",
} .. "</div>"
local full = '\n<div class="vsHide">\n' .. concat(fullProns, '\n') .. make_ambig_note(ambig) .. '</div>'
return '<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' .. inlineIPAlength .. 'em; max-width:100%;"><span class="vsToggleElement" style="float: right;"> </span>' .. inline .. full .. '\n</div>'
end
local function get_IPA(term, period)
local ambig
if period == "cla" then
ambig = find_ambig(term)
end
term = canonicalize(ulower(term))
for alias, letter in pairs(m_data.aliases) do
term = term:gsub(alias, letter)
end
term = mark_implied_length(term)
local decomposed = toNFD(term)
if rfind(decomposed, "" .. diacritic .. "*") then
error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
end
term = rearrangeDiacritics(term)
local IPAs, periods = convert_term(term, period)
IPAs = syllabify(IPAs, periods)
return make_table(IPAs, ambig, periods)
end
function export.create(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
= {default = pagename},
= {default = "cla"},
})
return get_IPA(args, args.period)
end
function export.example(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
= true
})
local output = { '{| class="wikitable"' }
local terms = mw.text.split(args, ",%s+")
for _, term in pairs(terms) do
local period = rmatch(term, "%(period ?= ?(+)%)") or "cla"
local entry = rmatch(term, "(+) %(") or term or error('No term found in "' .. term .. '".')
local link = full_link(entry)
local IPA = get_IPA(entry, period)
insert(output, "\n|-\n| " .. link .. " || " .. IPA)
end
insert(output, "\n|}")
return concat(output)
end
return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.