--[[
This module implements the template {{gd-IPA}}, featuring multiple dialectal pronunciations.
Author: AmazingJus
Sources:
General:
- Bauer, Michael (2011). Blas na Gàidhlig: The Practical Guide to Scottish Gaelic Pronunciation.
- Nance, Claire; Ó Maolalaigh, Roibeard (2021), "Scottish Gaelic". Journal of the International Phonetic Association.
- Ó Maolalaigh, Roibeard; MacAonghuis, Iain (1997). Scottish Gaelic in Three Months.
- Ternes, Elmar (1973). The Phonemic Analysis of Scottish Gaelic: based on the dialect of Applecross, Ross-shire.
- Ó Dochartaigh, Cathair (1997). Survey of the Gaelic Dialects of Scotland I-V.
Dialect-specific:
Outer Hebrides:
- Borgstrøm, Carl H.J. (1937). The Dialect of Barra.
- Borgstrøm, Carl H.J. (1940). "The Dialects of the Outer Hebrides". A Linguistic Survey of the Gaelic Dialects of Scotland.
- Mac Gill-Fhinnein, Gordon (1966). Gàidhlig Uidhist a Deas.
- MacPherson, John (1945). The Gaelic dialect of North Uist.
- Oftedal, Magne. (1956). "The Gaelic of Leurbost, Isle of Lewis". A Linguistic Survey of the Gaelic Dialects of Scotland.
- Scouller, Alastair (2017). The Gaelic Dialect of Colonsay.
Inner Hebrides:
- Borgstrøm, Carl H.J. (1941) "The Dialects of Skye and Ross-shire". A Linguistic Survey of the Gaelic Dialects of Scotland.
Mainland Highlands:
- Dorian, Nancy C. (1978), East Sutherland Gaelic: The Dialect of the Brora, Golspie, and Embo Fishing Communities
Argyll:
- Grannd, Seumas (2000). "The Gaelic of Islay: A Comparative Study". Scottish Gaelic Studies Monograph Series 2.
- Holmer, Nils (1938). Studies on Argyllshire Gaelic.
- Holmer, Nils (1957). The Gaelic of Arran.
- Holmer, Nils (1962). The Gaelic of Kintyre.
--]]
local export = {}
local lang = require("Module:languages").getByCode("gd")
local m_IPA = require("Module:IPA")
local tbl = require("Module:table")
local comp = mw.ustring.toNFC
local decomp = mw.ustring.toNFD
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower
local match = mw.ustring.match
local split = mw.text.split
local sub = mw.ustring.sub
local u = require("Module:string/char")
local upper = mw.ustring.upper
--[[
various lists of character category sets and alises
]]--
-- letter sets
CONS = "bBcCdDfFgGhjklLmMnNŋpPqrRsStTvwxyz" -- consonants
FRNT = "eɛièéìEƐI" -- front vowels
BACK = "aoɔɤuɯàáòóùAⱯOƆUƜ" -- back vowels (note Ɐ = /ɤː/)
UNST = "əɪ"
VOWL = FRNT .. BACK .. UNST -- all vowels (front + back + unstressed vowels)
LONG = "àáèéìòóùAⱯEƐIOƆUƜ" -- all long vowels
SYLL = "" -- syllable boundary (i.e. not a vowel)
-- special characters
PRIM = "ˈ" -- primary stress
SCND = "ˌ" -- secondary stress
STRS = PRIM .. SCND -- all stress marks
HIAT = "‧" -- hiatus marker
JOIN = "‿" -- liaison mark
BROD = "ˠ" -- broad mark
SLND = "ʲ" -- slender mark
ASPR = "ʰ" -- aspiration mark
BRSL = BROD .. SLND -- broad and slender marks
HINT = "_=%+" -- lenited hint marks
-- combining marks
DENT = u(0x032A) -- dental mark
ALVE = u(0x0331) -- alveolar mark
GRAV = u(0x0300) -- combining grave
ACUT = u(0x0301) -- combining acute
BREV = u(0x0306) -- combining breve
MACR = u(0x0304) -- combining macron
--[[
various helper functions
]]--
-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
while true do
local new_term = gsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- get single character of string in a given position
local function get_char_at_pos(string, p)
return sub(string, p, p);
end
-- prioritise longer matches for each table
local function longest_match_table(t)
local sorted = {}
for k, v in pairs(t) do
table.insert(sorted, {k, v})
end
table.sort(sorted, function(a, b) return #a > #b end)
return sorted
end
-- decompose all accents except for grave and acute accents
local function decomp_selected(text)
-- first decompose all composed characters in string
text = decomp(text)
-- recompose grave and acute accents if not composed yet
return gsub(text, "(.)()", function(c, ga)
return comp(c .. ga)
end)
end
--[[
various lists of letter sequences
]]--
-- unstressed words, including particles (before parse)
local unstressed = tbl.listToSet({
"n?a?",
"o",
"r",
"is",
"le",
"'?'?"
})
-- consonant clusters broken by an epenthetic vowel (before parse)
local split_combos = {
= "bBCgGmM", -- combinations starting with alveolar consonant
= "Clrs" -- combinations starting with m
}
--[[
various lists of pronunciation rules
]]--
-- lenited consonants (during parse)
local lenition_rules = {
-- bh
= {
{ "", "", "_" }, -- pronounced /∅/ between a long vowel and a consonant
{ "", "", "=" }, -- pronounced /v/ between a long vowel and a vowel
{ "", "", "=" }, -- pronounced /v/ between a diphthong and a vowel
{ false, "", "+" }, -- otherwise pronounced /u/ before a consonant
{ false, false, "=" } -- otherwise pronounced /v/
},
-- ch
= {
{ false, false, "=" } -- pronounced /x/ in the majority of cases
},
-- dh
= {
{ "", "", "_" }, -- pronounced /∅/ between a short vowel and a non-long phoneme
{ "", "#", "_" }, -- pronounced /∅/ after a long vowel word-finally
{ "ai?", "#", "_" }, -- pronounced /∅/ after ai/ao/aoi word-finally
{ false, false, "=" } -- otherwise pronounced /ɣ/
},
-- fh
= {
{ false, false, "_" } -- pronounced /∅/ in the majority of cases
},
-- gh
= {
{ "", "", "_" }, -- pronounced /∅/ between a short vowel and a non-long phoneme
{ "", "#", "_" }, -- pronounced /∅/ after u/ù word-finally
{ false, false, "=" } -- otherwise pronounced /ɣ/
},
-- mh
= {
{ false, "", "_" }, -- pronounced /∅/ before a consonant
{ "io", false, "=" }, -- pronounced /v/ after io
{ "", "", "_" }, -- pronounced /∅/ between certain back vowels and other vowels
{ false, false, "=" } -- otherwise pronounced /v/
},
-- ph
= {
{ false, false, "=" } -- pronounced /f/ in the majority of cases
},
-- sh
= {
{ false, false, "=" } -- pronounced /h/ in the majority of cases
},
-- th
= {
{ false, "", "_" }, -- pronounced /∅/ before a consonant
{ "", false, "_" }, -- pronounced /∅/ after a long vowel
{ "aoi?", false, "_" }, -- pronounced /∅/ after ao/aoi
{ false, false, "=" } -- otherwise pronounced /h/
}
}
-- stressed vowel sequences with variying pronunciations depending on surrounding letters (during parse)
-- NOTE: capital letters stand for long vowels
local vowels = {
-- a
= {
{ false, "" .. SYLL, "ɯi" }, -- before syllable-final bh/mh
{ false, false, "Ɯ" } -- otherwise
},
= {
{ false, "D?", "ai" }, -- before dh + another consonant
{ false, "G?", "ɤ" }, -- before gh + another consonant
{ false, "" .. SYLL, "ai" }, -- before syllable-final ll/m/nn
{ false, "ŋ", "ai" }, -- as well as before ng
{ false, "", "ɛ" }, -- before a voiceless cononsonant
{ false, ".", "ɛ" },-- as well as before a voiceless fricative or epenthetic vowel
{ false, "R" .. SYLL, "A" }, -- before syllable-final r
{ false, "r", "A" }, -- as well as before rd/rn
{ false, false, "a" } -- otherwise
},
= "Ɯ", -- all cases
= {
{ false, "?", "au" }, -- before bh/mh + another consonant
{ false, "?", "Ɐ" }, -- before dh/gh + another consonant
{ false, "", "au" }, -- before dh/gh elsewhere
{ false, "" .. SYLL, "au" }, -- before syllable-final ll/m/nn
{ false, "R" .. SYLL, "A" }, -- before syllable-final rr
{ false, "r", "A" }, -- before rd/rn
{ false, false, "a" } -- otherwise
},
-- e
= {
{ false, "G?", "Ɐ" }, -- before gh + another consonant
{ false, "M?", "ɛu" }, -- before mh + another consonant
{ false, "?#", "ɤ" }, -- before word-final dh/gh
{ false, "", "e" }, -- before d/g/s
{ false, "" .. SYLL, "au" }, -- before syllable-final ll/nn
{ false, "R" .. SYLL, "A" }, -- before syllable-final rr
{ false, "rn", "A" }, -- before rn
{ "", "", "ja" }, -- before ll/nn/rr word-initially or after a lenited consonant
{ false, "", "a" }, -- before ll/nn/rr elsewhere
{ false, false, "ɛ" } -- otherwise
},
= {
{ false, "" .. SYLL, "ei" }, -- before syllable-final ll/m/nn
{ false, false, "e" } -- otherwise
},
= "ɔ", -- all cases
= {
{ false, "", "E" }, -- before m or in literary words
{ false, false, "ia" } -- most cases
},
= "e", -- all cases
-- i
= {
{ false, "%^", "ia" }, -- in some words
{ false, false, "iə" } -- most cases
},
= {
{ "", "", "jU" }, -- before ll/nn word-initially or after a lenited consonant
{ false, "?", "iə" }, -- before dh/gh + another consonant
{ false, "" .. SYLL, "U" }, -- before syllable-final ll/nn
{ false, "", "u" }, -- before ll/nn elsewhere
{ false, false, "i" } -- otherwise
},
= {
{ "", false, "ju" }, -- word-initially or after a lenited consonant
{ false, "R", "U" }, -- before rr
{ false, "rs", "U" }, -- as well as rs
{ false, false, "u" } -- otherwise
},
= {
{ false, "" .. SYLL, "I" }, -- before syllable-final ll/m/nn
{ false, false, "i" } -- otherwise
},
-- o
= {
{ false, "?", "ɤi" }, -- before bh/gh + another consonant
{ false, "M?", "ɔi" }, -- before mh + another consonant
{ false, "" .. SYLL, "əi" }, -- before syllable-final ll/m/nn
{ false, "", "ɤ" }, -- before ll/nn elsewhere, bh/d/dh/gh or l/r in certain words
{ false, ".%*", "ɤ" }, -- as well as before epenthetic vowel
{ false, false, "ɔ" } -- otherwise
},
= {
{ false, "G?", "O" }, -- before gh + another consonant
{ false, "", "o" }, -- before b/bh/g/gh/mh
{ false, "" .. SYLL, "ɔu" }, -- before syllable-final ll/m/nn
{ false, "m", "o" }, -- before m
{ false, "R" .. SYLL, "Ɔ" }, -- before syllable-final rr
{ false, "r", "Ɔ" }, -- before syllable-final rd/rn
{ false, false, "ɔ" } -- otherwise
},
-- u
= {
{ false, "", "ua" }, -- before m/n
{ false, false, "uə" } -- otherwise
},
= {
{ false, "", "ua" }, -- before m/n
{ false, false, "uə" } -- otherwise
},
= {
{ false, "%$", "u" }, -- in some words
{ false, "%^", "ɯ" }, -- in some words
{ "", false, "ɯ" }, -- word-initially or after an alveolar consonant
{ false, "D", "ɯ" }, -- before dh
{ false, "M?", "ui" }, -- before mh + another consonant
{ false, "" .. SYLL, "ɯi" }, -- before syllable-final ll/m/nn
{ false, "", "ɯ" }, -- before ll/m/nn elsewhere
{ false, false, "u" } -- otherwise
},
= {
{ false, "" .. SYLL, "U" }, -- before syllable-final ll/m/nn
{ false, "R", "U" }, -- before rr
{ false, "rn", "U" }, -- as well as rn
{ false, false, "u" } -- otherwise
},
-- à/á
= "a", -- all cases
= "a", -- all cases
= {
{ false, "%^", "Ɛ" }, -- in some words
{ false, false, "A" } -- otherwise
},
= "A", -- all cases
-- è/é
= "ia", -- all cases
= "Ɛ", -- all cases
= "Ɛ", -- all cases
= "E", -- all cases
= "E", -- all cases
-- ì
= "iə", -- all cases
= "I", -- all cases
-- ò/ó
= {
{ "", false, "jƆ" }, -- word-initially or after a lenited consonant
{ false, false, "Ɔ" }, -- otherwise
},
= {
{ "", false, "jƆ" }, -- word-initially or after a lenited consonant
{ false, false, "Ɔ" }, -- otherwise
},
= "Ɔ", -- all cases
= "Ɔ", -- all cases
= "O", -- all cases
= "O", -- all cases
-- ù
= "U", -- all cases
= "U", -- all cases
= "U", -- all cases
= "U" -- all cases
}
-- unique unstressed forms (during parse)
local vowels_uns = {
-- plural suffixes (ending in -an)
{ "e?agan#", "a", "pl" }, -- in -(e)agan
{ "e?anan#", "a", "pl" }, -- in -(e)anan
{ "e?an#", "ə", "pl" }, -- word-final -(e)an
-- non-plural suffixes
{ "e?ag#", "a" }, -- word-final -(e)ag
{ "e?an#", "a" }, -- word-final -(e)an
{ "ail#", "a" }, -- word-final -ail
{ "o#", "o" }, -- word-final o
{ "u#", "u" }, -- word-final u
-- general cases
{ "ai", "ɪ" }, -- ai before a palatalised slender consonant
{ "i", "ɪ" }, -- i in all cases
{ false, "ə" } -- otherwise
}
-- echo value forms (after parse)
local echo_vowel = {
{ "ɛ", false, "a" }, -- echo vowel is /a/ if current vowel is /ɛ/
{ false, "s", "ə" } -- echo vowel is /ə/ after s as in Glaschu /ˈkl̪ˠas̪əxu/
}
-- consonant mutations, including mutated forms (after parse)
local alt = {
voiced = {
-- = "B",
-- = "D",
-- = "F",
-- = "G",
= "g",
-- = "M",
= "b",
= "d"
},
unvoiced = {
= "p",
= "t",
= "k"
}
}
--[[
functions before parse
]]--
-- handle general replacement respelling spec
local function respell_spec(text, pagename)
local subs = split(match(text, "^%$"), ",")
text = pagename
for _, s in ipairs(subs) do
local fromto = split(s, ":")
if #fromto ~= 2 then
error("Bad substitution spec " .. s .. " in {{gd-IPA}}")
end
local from, to = fromto, fromto
local newtext = text
if find(from, "^%^") then
-- whole-word match
from = match(from, "^%^(.*)$")
newtext = gsub(text, "%f" .. require("Module:string utilities").pattern_escape(from) .. "%f", to)
else
newtext = gsub(text, require("Module:string utilities").pattern_escape(from), to)
end
if newtext == text then
error("Substitution spec " .. s .. " didn't match respelling '" .. text .. "'")
end
text = newtext
end
return text
end
-- create the full respelt spec for lenited consonant combinations
local function lenited_spec(text, pagename)
local cons = match(text, "h") or match(pagename, "h")
local symb = match(text, "")
local newtext, count = gsub(pagename, cons, cons .. symb)
if count == 1 then
return newtext
elseif count == 0 then
error("No lenited consonant combination has been found.")
else
error(count .. " consonant combinations have been found. Please specify that combination before the symbol.")
end
end
-- create the full substitution spec for consonants split by an epenthetic vowel
local function split_cons_spec(text, pagename)
-- extract left and right parts
return gsub(text, "(h?)(h?)", function(a, b)
if match(a, "^h?$") and (match(b, "^h?$") or b == "") then
return a .. "*" .. b
end
-- return original text otherwise
return a .. b
end)
end
-- consonants which are split by an epenthetic vowel by default are marked with *
local function split_consonants(term)
-- loop over each combination
for left, right in pairs(split_combos) do
term = gsub(term, "()()", function(a, b)
return a .. "*" .. b
end)
end
-- return original text otherwise
return term
end
-- stress over each word
local function stress_word(word)
-- add stress if there is no primary stressed syllable mark
if not match(word, "()") then
-- add stress to the first long vowel
if match(word, "()") then
word = gsub(word, "()", PRIM .. "%1", 1)
else
-- otherwise add stress to the first syllable
return PRIM .. word
end
end
-- shift stress before any possible slender e/i + ò/ù
word = gsub(word, "()()()", "%2%1%3")
-- then before s + consonant + l/r, consonant + l/r, otherwise the first consonant
word = gsub(word, "(s??)()", "%2%1")
-- and at the start for the first syllable
return gsub(word, "(#*)()", "%2%1")
end
-- perform further respellings
local function respell_further(term)
-- single letters
term = gsub(term, "", { = "k", = "k", = "gs", = "j" }) -- subsitute single letters with their respelt forms
-- multiple letters
term = gsub(term, "C(?)d", "C%1g") -- chd is pronounced like chg
term = gsub(term, "sr", "str") -- sr is pronounced like str
term = gsub(term, "(??)()", function(a, b) -- voiceless consonants are voiced after another consonant (k after l/r is an exception)
return match(a, "") and b == "k" and (a .. b) or (a .. alt.voiced)
end)
term = gsub(term, "(#?)()", function(a, b) return a .. upper(b) end) -- coronal l/n/r are fortis initially in a stressed syllable
return gsub(term, "n()", "N%1") -- as well as before d/t
end
-- check if word is unstressed
local function is_unstressed(word)
-- remove #
word = gsub(word, "#", "")
-- loop over unstressed particle list
for particle, _ in pairs(unstressed) do
if match(word, "^" .. particle .. "$") then
return true
end
end
-- false otherwise
return false
end
-- generate normalised form for the generated term for easier parsing
local function normalise(term)
-- add # at the start and end of each word
term = gsub(term, "+", "#%1#")
-- treat commas as a pause
term = gsub(term, ",#", "# |")
-- make all text lowercase
term = lower(term)
-- decompose accents except for grave and acute
term = decomp_selected(term)
-- respell consonant + h as as a single capital letter
term = gsub(term, "()h", function(l) return upper(l) end)
-- likewise, respell double l, n and r as a single capital letter
term = gsub(term, "()%1", function(l) return upper(l) end)
-- and respell ng as ŋ
term = gsub(term, "ng", "ŋ")
-- go over each word and check if it should be unstressed
term = gsub(term, "#+#", function(word)
if is_unstressed(word) then
-- add a liaison marker for unstressed words including particles
word = gsub(word, "$", JOIN)
else
-- mark stress for other words
word = stress_word(word)
end
return word
end)
-- mark epenthetic vowels with a * between certain consonants
term = split_consonants(term)
-- perform further respellings
term = respell_further(term)
-- remove liaison markers and fuse word to the next
term = gsub(term, JOIN .. "$", "")
return gsub(term, JOIN .. "%s+", "")
end
--[[
functions during and after parse
]]--
-- determine whether following consonant is broad/slender
local function get_brsl(vowel)
return match(FRNT, vowel) and SLND or ""
end
-- evaluate the value of certain lenited consonants
local function handle_lenition(term, i, pos, etyl)
-- initialise some variables
local char = get_char_at_pos(term, i)
local before_match = sub(term, 1, i - 1) or ""
local after_match = sub(term, i + 1) or ""
-- ignore char if already marked
if match(after_match, "^" .. SLND .. "?") then
return char
end
-- otherwise go over each condition for given char
for _, rule in ipairs(lenition_rules) do
local before_cond, after_cond = rule or "", rule or ""
local replacement = rule
-- check if both conditions match
if (before_cond == false or match(before_match, before_cond .. "$")) and
(after_cond == false or match(after_match, "^" .. after_cond)) then
return char .. replacement
end
end
-- otherwise return char as-is if no condition found
return char
end
-- change the length of given vowel
local function change_length(vowel, to_long)
-- special case for /ɤ/
if vowel == "Ɐ" then
return to_long and "Ɐ" or "ɤ"
elseif vowel == "ɤ" then
return to_long and "Ɐ" or "ɤ"
end
-- otherwise check if uppercase or lowercase and change if needed
if to_long then
return upper(vowel) == vowel and vowel or upper(vowel)
else
return upper(vowel) == vowel and lower(vowel) or upper(vowel)
end
end
-- process the value for epenthetic vowels as marked by *
local function process_split(term)
return gsub_repeatedly(term, "(+)(" .. SYLL .. "*)%*", function(v, c)
local echo = ""
-- no echo vowel if a long vowel or diphthong
if not (match(v, "") or len(v) > 1) then
-- loop over each combination
for _, pair in ipairs(echo_vowel) do
-- extract following vowels and consonants and their results
local vowel_cond, consonant_cond = pair, pair
local result = pair
-- echo vowel is identical to current vowel by default
echo = v
-- then check if both vowel and consonant conditions match
if (not vowel_cond or match(v, vowel_cond)) and (not consonant_cond or match(c, consonant_cond)) then
echo = result
break
end
end
end
return v .. c .. echo
end)
end
-- parse phonemes
local function parse(term, pos, etyl)
-- initialise some variables
pos, etyl = pos or "", etyl or ""
local seq, brsl = "", ""
local i = 1
local char = get_char_at_pos(term, 1)
local stressed = false
-- loop over every character until reached end of string
while i <= len(term) do
char = get_char_at_pos(term, i)
-- match vowel forms first
if match(char, "") then
for _, pair in ipairs(longest_match_table(vowels)) do
local pattern, value = pair, pair
local pattern_matched = match(sub(term, i), "^" .. pattern)
if pattern_matched then
-- first check if vowel is unstressed and not followed by a breve or macron
if not stressed and not match(get_char_at_pos(term, i + len(pattern_matched)), "") then
for _, subpair in ipairs(vowels_uns) do
local subpattern, subvalue = subpair or "", subpair
local subpos = subpair or ""
if subpattern ~= false and match(sub(term, i), "^" .. subpattern) and match(pos, subpos) then
value = subvalue
break
end
end
-- otherwise look ahead for following char(s)
elseif type(value) == "table" then
for _, rule in ipairs(value) do
-- extract before and after subpattern table values and its result
local before, after = rule or "", rule or ""
local result = rule
-- then check if both subpattern structures match
local before_match = sub(term, 1, i - 1) or ""
local after_match = sub(term, i + len(pattern_matched)) or ""
if match(before_match, before .. "$") and match(after_match, "^" .. after) then
value = result
break
end
end
end
-- force default vowel value with breve or macron
if get_char_at_pos(term, i) == BREV then -- breve makes vowel short
value = change_length(value, false)
elseif get_char_at_pos(term, i) == MACR then -- and macron makes vowel long
value = change_length(value, true)
end
-- move by length of matched pattern
i = i + len(pattern_matched)
-- turn off stressed unless there's another stress mark
stressed = false
-- add appropriate pronunciation to string
seq = seq .. value .. (match(get_char_at_pos(term, i), "") and JOIN or "")
-- update broad/slender value
brsl = get_brsl(sub(pattern_matched, -1))
break
end
end
-- ...or a consonant
elseif match(char, "") then
local lenited_char = char
-- evaluate value of lenited consonant (i.e. if current/first char is B/C/D/F/G/M/P/S/T)
if match(char, "") then
lenited_char = handle_lenition(term, i, pos, etyl)
end
-- capture multiple consonants word-initially or single consonant otherwise
local pattern_matched = match(get_char_at_pos(term, i - 1), "#")
and match(sub(term, i), "+")
or match(sub(term, i), "?")
i = i + len(pattern_matched)
-- sub in lenited character into mattched pattern
pattern_matched = gsub(pattern_matched, char, lenited_char)
-- add broad/slender mark to final consonant
pattern_matched = pattern_matched .. brsl
seq = seq .. pattern_matched
-- ...or a stress mark
elseif match(char, "") then
i = i + 1
stressed = true
seq = seq .. char
-- ...or certain diacritics
elseif match(char, "") then
-- don't add char, just move forward
i = i + 1
-- or certain punctuation marks
elseif match(char, "") then
i = i + 1
-- refresh broad/slender value if boundary # for next word
if char == "#" then brsl = get_brsl(match(sub(term, i), "") or "") end
seq = seq .. char
-- ...otherwise return an error
else
i = i + 1
seq = seq .. char
-- error("Invalid character at position " .. i .. ": " .. char)
end
end
return seq
end
--[[
functions after parse
]]--
-- finalise parsed string
local function finalise(term, pos, etyl)
-- replace * with its true value
term = process_split(term)
-- shift stress mark after #
term = gsub(term, "()#", "#%1")
-- substitute h early as h is never slender
term = gsub(term, "h" .. SLND, "h")
-- lenited consonant combination rules (mh will be substituted after handling nasalisation)
term = gsub(term, "_" .. SLND .. "?", HIAT) -- underscored cononsonants are silent, possibly as a hiatus
term = gsub(term, "()=", { = "v", = "x", = "ɣ", = "h", = "ɣ", = "f", = "h", = "h" }) -- otherwise they have these pronuniciations
term = gsub(term, "B%+", "u") -- bh+ represents /u/
-- coronal consonant rules
term = gsub(term, "l()", "L%1") -- non-slender l is always fortis
term = gsub(term, "(ə?)n" .. SLND, "%1N" .. SLND) -- slender n is fortis after a back vowel or schwa
term = gsub(term, "R" .. SLND, "R") -- fortis r is always broad
-- consonant combination rules
term = gsub(term, "(" .. SLND .."?)n", "%1ɾ") -- cn/gn is pronounced like cr/gr respectively
term = gsub(term, "r" .. SLND .."?()" .. SLND .. "?", "rʃ%1") -- add ʃ in between rd/rt
-- aspiration and voicing rules
term = gsub(term, "(#?)", "%1" .. ASPR) -- voiceless consonants are post-aspirated initially
term = gsub(term, "()", ASPR .. "%1") -- and preaspirated otherwise
term = gsub(term, "", alt.unvoiced) -- substitute voiced plosives with their voiceless non-aspirated forms
term = gsub(term, "ŋ(" .. SLND .. "?)", "ŋ%1ɡ%1") -- ŋ is actually /ŋɡ/
-- broad and slender rules
term = gsub(term, "()" .. SLND, { = "ç", = "ʝ", = "ʎ", = "ɲ", = "ʃ" }) -- actual values for certain slender consonants (r is always broad when fortis)
term = gsub(term, "()()", "%1" .. BROD .. "%2") -- add broad marker for non-slender fortis consonants
term = gsub(term, "(*" .. ASPR .. "?)" .. SLND .. "(.)", function(l, c) return match(c, "") and (l .. "j" .. c) or (l .. c) end) -- slender labial consonants are palatalised before stressed back vowels
term = gsub(term, "()" .. SLND, "%1") -- remove excessive slender marks for labial consonants/l/n
-- dental marking of otherwise alveolar consonants
term = gsub_repeatedly(term, "()()", "%1" .. DENT .. "%2") -- broad alveolar consonants are actually dental
term = gsub_repeatedly(term, "()" .. DENT .. ASPR .. SLND, "%1" .. ASPR .. SLND) -- dental mark has to be removed this way as aspiration mark prevents proper substitution
term = gsub(term, "r", "ɾ") -- r is an alveolar tap
term = gsub(term, "()", function(c) return lower(c) end) -- make fortis consonants lowercase
-- removal of hiatus markers if next to any non-vowel
term = gsub(term, "()" .. HIAT, "%1") -- on the left
term = gsub(term, HIAT .. "()", "%1") -- and also on the right
-- convert uppercase characters to lowercase with length mark
term = gsub(term, "", function(c) return change_length(c, false) .. "ː" end)
-- hiatus mark's real value
term = gsub(term, HIAT, ".") -- hiatus is a syllable break
-- remove unnecessary symbols
return gsub(term, "", "")
-- return term
end
-- evaluate the canonicalised form based on certain spelling hints and substitutions
function export.canonicalise_pron(term, pgn)
if type(term) == "table" then
term = term.args
end
-- temp solution for now
local texts = split(term, "~")
local text, pagename = texts, texts
-- if not text or text == "+" then
-- text = pagename
-- end
-- apply general subsitution rules
if match(text, "^%$") then
text = respell_spec(text, pagename)
elseif match(text, "^h$") or match(text, "^$") then
text = lenited_spec(text, pagename)
end
-- add # at the start and end of each word
return gsub(text, "+", "#%1#")
-- return text
end
--[[
syllabification function
]]--
function export.syllabify(term)
-- initialise some variables
local seq = ""
local i = 1
local syll_init = true
-- loop over every character until reached end of string
while i <= len(term) do
local char = get_char_at_pos(term, i)
-- match vowels first
if match(char, "") then
local matched = false
-- loop over each vowel pattern
for _, pair in ipairs(longest_match_table(vowels)) do
local pattern = pair
local vowel_seq = match(sub(term, i), "^" .. pattern)
if vowel_seq then
-- add syllable marker if not at start
if not syll_init then
seq = seq .. HIAT
end
syll_init = false
seq = seq .. vowel_seq
i = i + len(vowel_seq)
matched = true
break
end
end
-- handle single vowels if no complex vowel matched
if not matched then
if not syll_init then
seq = seq .. HIAT
end
syll_init = false
seq = seq .. char
i = i + 1
end
-- then reset initial syllable if space found
elseif match(char, "%s") then
seq = seq .. char
syll_init = true
i = i + 1
-- otherwise handle everything else (consonants)
else
local cons_seq = match(sub(term, i), "^")
seq = seq .. cons_seq
i = i + len(cons_seq)
end
end
-- shift stress before s + consonant + l/r, consonant + l/r, otherwise the first consonant
seq = gsub(seq, "(s??)" .. HIAT, HIAT .. "%1")
-- then before lenited consonant digraphs
seq = gsub(seq, "()" .. HIAT .. "h", HIAT .. "%1h")
-- then before coronal consonant digraphs
return gsub(seq, "()" .. HIAT .. "%1", HIAT .. "%1%1")
end
--[[
main pronunciation function
]]--
function export.toIPA(term, pos, etyl)
if type(term) == "table" then
term = term.args
end
-- canonicalise term
-- term = export.canonicalise_pron(term)
-- syllabify term
-- term = export.syllabify(term)
-- normalise term
term = normalise(term)
-- parse over the string
term = parse(term, pos, etyl)
-- finalise term
term = finalise(term, pos, etyl)
return term
end
-- main display function
function export.show(text)
end
return export