-- This module is primarily maintained at:
-- https://en.wiktionary.orghttps://dictious.com/en/Module:mh-pronunc
-- Please direct all technical queries and contributions there.
-- The version of this script on Wikipedia is only a mirror.
local export = {}
local ASYLLABIC = "̯"
local BREVE = "̆"
local BREVE2 = "͝"
local CEDILLA = "̧"
local CENTRAL = "̈"
local DEVOICE = "̥"
local DEVOICE2 = "̊"
local LESSROUND = "̜"
local LESSROUND2 = "͑"
local MACRON = "̄"
local MOREROUND = "̹"
local MOREROUND2 = "͗"
local SYLLABIC = "̩"
local TIE = "͡"
local TIE2 = "͜"
local EPENTH_CLUSTER = 0
local ASSIM_CLUSTER = 1
local STABLE_CLUSTER = 2
-- Forward-declare functions.
local addUnique
local assign
local fastTrim
local lerpF2
local mergedMidVowelsMap
local needClusterTypes
local needPhoneticMap
local needVowelCharts
local parse
local parseBoolean
local reverseString
local splitTokens
local string_gsub2
local string_gsubx
local toBender
local toMOD
local toPhonemic
local toPhonetic
local toPhoneticDialect
local toPhoneticRemainder
-- Forward-declare lookup tables.
local benderMaps
local clusterTypes
local fromF1
local fromF2
local fromF2Conson
local parseC_CH_CWmap
local parsePseudoConsonMap
local parseRemainingMap
local phonemicMap
local phoneticMap
local toF1
local toF2
local toMODmap
local voicedPrimaries
-- Adds elements to a sequence as if it's a set (retains unique elements only).
addUnique = function(seq, value)
for _, value2 in pairs(seq) do
if value == value2 then
return
end
end
seq = value
end
-- Intended to work the same as JavaScript's Object.assign() function.
assign = function(target, ...)
local args = { ... }
for _, source in pairs(args) do
if type(source) == "table" then
for key, value in pairs(source) do
target = value
end
end
end
return target
end
fastTrim = function(text)
return string.match(text, "^%s*(.-)%s*$")
end
lerpF2 = function(secondaryL, secondaryR)
needVowelCharts()
return fromF2 + toF2)]
end
needClusterTypes = function()
if clusterTypes then
return
end
local EPENTH = {
= EPENTH_CLUSTER,
= EPENTH_CLUSTER,
= EPENTH_CLUSTER
}
local ASSIM = {
= ASSIM_CLUSTER,
= ASSIM_CLUSTER,
= ASSIM_CLUSTER
}
local STABLE = {
= STABLE_CLUSTER,
= STABLE_CLUSTER,
= STABLE_CLUSTER
}
local EMPTY = {
= EPENTH, = EPENTH, = EPENTH,
= EPENTH, = EPENTH, = EPENTH,
= EPENTH, = EPENTH, = EPENTH, = EPENTH
}
clusterTypes = {
= assign({}, EMPTY, {
= STABLE, -- /pp/
= ASSIM -- /pm/ becomes
}),
= assign({}, EMPTY, {
= STABLE -- /tt/
}),
= assign({}, EMPTY, {
= STABLE, -- /kk/
= ASSIM -- /kŋ/ becomes
}),
= assign({}, EMPTY, {
= STABLE, -- /mp/
= STABLE -- /mm/
}),
= assign({}, EMPTY, {
= STABLE, -- /nt/
= STABLE, -- /nn/
= STABLE, -- /nr/
= STABLE -- /nl/
}),
= assign({}, EMPTY, {
= STABLE, -- /ŋk/
= STABLE -- /ŋŋ/
}),
= assign({}, EMPTY, {
= ASSIM, -- /rn/ becomes
= STABLE, -- /rr/
= STABLE -- /rl/
}),
= assign({}, EMPTY, {
= assign({}, STABLE, {
= EPENTH_CLUSTER, -- /ltʲ/
}),
= ASSIM, -- /ln/ becomes
= STABLE, -- /lr/
= STABLE -- /ll/
}),
= EMPTY
}
end
needPhoneticMap = function()
if phoneticMap then
return
end
needVowelCharts()
local map = {
= "p",
= "b",
= "t",
= "d",
= "k",
= "ɡ",
= "m",
= "n",
= "ŋ",
= "r",
= "l",
= "j",
= "ɰ",
= "w",
= "‿",
= "ʲ",
= "ˠ",
= "ʷ",
= "æ",
= "ɛ",
= "e",
= "i",
= "a",
= "ɜ",
= "ɘ",
= "ɨ",
= "ɑ",
= "ʌ",
= "ɤ",
= "ɯ",
= "ɒ",
= "ɔ",
= "o",
= "u",
= ASYLLABIC,
= "",
= "(",
= ")",
= "ː",
= "",
= "ˈ",
= "ˌ",
= "",
= ""
}
assign(map, false and {
= "b̥",
= "d̥",
= "ɡ̊"
} or {
= map,
= map,
= map
})
if false then
for primary in mw.text.gsplit("kKgN", "") do
map = map
end
end
map = map or map or (map..map)
map = map or map
map = map or map
map = map or map
if true then
assign(map, {
= "ɰ",
= "ʁ",
= "ʁ",
= "ʕ"
})
end
if true then
for f1 in mw.text.gsplit("aEei", "") do
local key = f1.."5^"
map = map or map
end
end
for primary in mw.text.gsplit("pPbtTdkKgmnNrl_ \t\n", "") do
for secondary in mw.text.gsplit("jGw", "") do
local key = primary..secondary
map = map or ((map or primary)..map)
end
end
for f1 = 1, 4 do
local vowelF1 = fromF1
local vowel = vowelF1.."2"
map = map or (map..CENTRAL)
vowel = vowelF1.."4"
map = map or (map..LESSROUND2)
for f2 = 1, 5 do
vowel = vowelF1..fromF2
local semi = vowel.."="
map = map or (map..map)
semi = vowel.."@"
map = map or (map..map)
semi = vowel.."^"
map = map or (map..map)
end
end
phoneticMap = map
end
needVowelCharts = function()
if toF1 then
return
end
toF1 = {
= 1, = 2, = 3, = 4,
= 1, = 2, = 3, = 4
}
fromF1 = {
= "a", = "E", = "e", = "i",
= "a", = "E", = "e", = "i"
}
toF2 = {
= 1, = 3, = 5,
= 1, = 2, = 3, = 4, = 5,
= 1, = 2, = 3, = 4, = 5
}
fromF2 = {
= "1", = "2", = "3", = "4", = "5",
= "1", = "2", = "3", = "4", = "5",
= "1", = "3", = "5"
}
fromF2Conson = {
= "j", = "G", = "w",
= "j", = "G", = "w",
= "j", = "G", = "w"
}
end
parse = function(code)
local outSeq = {}
code = mw.ustring.gsub(code, "%s+", " ")
code = string.lower(code)
for text in mw.text.gsplit(code, " *,*") do
text = fastTrim(text)
if text ~= "" then
local temp = string.gsub(text, "", "")
if temp ~= "" then
error("'"..code.."' contains unsupported characters: "..temp)
end
-- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudoconsonants.
parsePseudoConsonMap = parsePseudoConsonMap or {
= "0",
= "0h",
= "0w"
}
text = string.gsub(text, "_*()_+", parsePseudoConsonMap)
text = string.gsub(text, "_+()", parsePseudoConsonMap)
if string.find(text, "_") then
error("contains misplaced underscores: "..code)
end
-- a plain {i} protected from dialect-specific reflexes
text = string.gsub(text, "'i", "I")
-- "yi'y" and "'yiy" sequences
text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB)
if aposA ~= "" then
-- "dwelling upon" i
return "Z"
elseif aposB ~= "" then
-- "passing over lightly" i
return "z"
end
end)
-- Convert multigraphs to pseudo-X-SAMPA format.
parseC_CH_CWmap = parseC_CH_CWmap or {
= "kG",
= "kGh", -- N\A
= "kW",
= "lJ",
= "lG",
= "lW",
= "mJ",
= "mG",
= "mJw", -- N\A
= "nJ",
= "nG",
= "nW",
= "NG",
= "NGh", -- N\A
= "NW",
= "rG",
= "rGh", -- N\A
= "rW",
= "_J",
= "_G",
= "_W"
}
text = string.gsub(text, "g??", parseC_CH_CWmap)
if string.find(text, "g") then
error("contains g that is not part of ng: "..code)
end
-- Convert remaining sequences to pseudo-X-SAMPA format.
parseRemainingMap = parseRemainingMap or {
= "pG",
= "rj",
= "E",
= "e",
= "hG",
= "tj",
= "j",
= "pj",
= "tG",
= "hw",
= "w",
= "hj",
= "yj",
= "Yj",
= ""
}
text = string.gsub(text, ".", parseRemainingMap)
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics,
-- but allow VC, CV at affix boundaries
-- where a vowel may link to another morpheme's consonant.
temp = string.gsub(text, "+", "")
if string.find(temp, "_..") or
string.find(temp, "._.")
then
error("pseudoconsonants may not neighbor a consonant")
end
if string.find(temp, "_.") then
error(
"pseudoconsonants may only be at the beginning or end"..code
)
end
if string.find(temp, "") then
error("vowels must be separated by a consonant: "..code)
end
if string.find(temp, "..$") then
error("may not end with a consonant cluster: "..code)
end
string.gsub(" "..temp, "(.)(.*)",
function(consonX, consonY)
if consonX ~= consonY then
error(
"may not begin with a consonant cluster "..
"unless it is a geminate: "..code
)
end
end
)
if text ~= "" then
addUnique(outSeq, text)
end
end
end
return outSeq
end
parseBoolean = function(text)
if type(text) == "string" then
text = string.gsub(text, "", "")
if text ~= "" and
text ~= "0" and
string.lower(text) ~= "false"
then
return true
end
end
return false
end
reverseString = function(text)
local chars = splitTokens(text)
local i = 1
local j = #chars
while i < j do
chars, chars = chars, chars
i = i + 1
j = j - 1
end
text = table.concat(chars, "")
return text
end
splitTokens = function(text, pattern, chars, shorten)
chars = chars or {}
local index = 1
for ch in string.gmatch(
text, pattern or "*"
) do
chars = ch
index = index + 1
end
if index <= #chars then
if shorten then
table.remove(chars, index)
else
repeat
chars = nil
index = index + 1
until index > #chars
end
end
return chars
end
string_gsub2 = function(text, pattern, subst)
local result = text
result = string.gsub(result, pattern, subst)
-- If it didn't change the first time, it won't change the second time.
if result ~= text then
result = string.gsub(result, pattern, subst)
end
return result
end
string_gsubx = function(text, pattern, subst)
repeat
local oldText = text
text = string.gsub(text, pattern, subst)
until oldText == text
return text
end
toBender = function(inSeq, args)
-- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender).
-- "med" is from the Marshallese-English Dictionary (1976).
-- "mod" is from the Marshallese-English Online Dictionary.
-- "default" is the same as "mod" but with cedillas.
local version = args and args.version
if not benderMaps then
local map1968 = {
= "p", = "b",
= "j", = "t",
= "k", = "q",
= "m", = "ṁ",
= "n", = "ṅ", = "n̈",
= "g", = "g̈",
= "d", = "r", = "r̈",
= "l", = "ł", = "l̈",
= "yi'y",
= "'yiy",
= "y", = "h", = "w",
= "", = "", = "",
= "a",
= "e",
= "&",
= "i",
= "i"
}
local mapMED = assign({}, map1968, {
= "m̧",
= "ņ",
= "ņ°",
= "g°",
= "r°",
= "ļ",
= "ļ°",
= "ȩ"
})
local mapMOD = assign({}, mapMED, {
= "kʷ",
= "ṃ",
= "ṇ",
= "ṇʷ",
= "gʷ",
= "rʷ",
= "ḷ",
= "ḷʷ",
= "ẹ"
})
local mapDefault = assign({}, mapMOD, {
= "m̧",
= "ņ",
= "ņʷ",
= "ļ",
= "ļʷ",
= "ȩ"
})
benderMaps = {
= map1968,
= mapMED,
= mapMOD,
= mapDefault
}
end
local map = benderMaps[
type(version) == "string" and string.lower(version) or ""
] or benderMaps
local outSeq = {}
for _, text in pairs(inSeq) do
text = string.gsub(text, ".?", map)
addUnique(outSeq, text)
end
return outSeq
end
toMOD = function(text)
toMODmap = toMODmap or {
= "Ẹ", = "ẹ",
= "Ḷ", = "ḷ",
= "Ṃ", = "ṃ",
= "Ṇ", = "ṇ",
= "Ñ", = "ñ",
= "Ọ", = "ọ"
}
text = mw.ustring.gsub(text, ".?", toMODmap)
return text
end
toPhonemic = function(inSeq)
local outSeq = {}
if not phonemicMap then
local map = {
= "pʲ", = "pˠ",
= "tʲ", = "tˠ",
= "kˠ", = "kʷ",
= "mʲ", = "mˠ",
= "nʲ", = "nˠ", = "nʷ",
= "ŋˠ", = "ŋʷ",
= "rʲ", = "rˠ", = "rʷ",
= "lʲ", = "lˠ", = "lʷ",
= "j", = "ɰ", = "w",
= "", = "", = "",
= "æ",
= "ɛ",
= "e",
= "i",
= "i"
}
phonemicMap = map
if false then
assign(map, {
= "ɐ",
= "ə",
= "ɘ",
= "ɨ",
= "ɨ"
})
end
map = map.hj..map.i..ASYLLABIC..map.hj
map = map.hj..map.i.."ː"..map.hj
end
for _, text in pairs(inSeq) do
text = string.gsub(text, ".?", phonemicMap)
addUnique(outSeq, text)
end
return outSeq
end
toPhonetic = function(inSeq, args)
-- Recognize "ralik" for Rālik Chain (western dialect).
-- Recognize "ratak" for Ratak Chain (eastern dialect).
-- For other values, list both possible dialect reflexes where applicable.
local dialect = args and args.dialect and
mw.ustring.lower(mw.text.trim(args.dialect)) or ""
if dialect == "rālik" then
dialect = "ralik"
end
-- If enabled, break words at consonant cluster boundaries
-- and enunciate the word fragments individually.
-- This mode does not assimilate clusters or produce epenthetic vowels.
local enunciate = not not (args and parseBoolean(args.enunciate))
-- If enabled, display liaison joiners to mark
-- spaces or hyphens in the input code that are not consonant clusters.
local liaison = not not (args and parseBoolean(args.liaison))
-- If enabled, do not display pseudoconsonant hints at all.
local noHints = not not (args and parseBoolean(args.nohints))
-- "false" will display all obstruent allophones as voiceless.
-- "true" will display all obstruent allophones as voiced.
-- Empty string or absent by default will display
-- only medial obstruent allophones as semi-voiced.
local voice = args and args.voice or ""
if voice ~= "" then
voice = parseBoolean(voice)
end
local outSeq = {}
local config = {
= outSeq,
= enunciate,
= liaison,
= noHints,
= voice
}
for _, text in pairs(inSeq) do
text = string.gsub(text, "+", " ")
text = fastTrim(text)
local isRalik = dialect == "ralik"
if isRalik or dialect == "ratak" then
text = toPhoneticDialect(text, config, isRalik)
toPhoneticRemainder(text, config)
else
local ralik = toPhoneticDialect(text, config, true)
local ratak = toPhoneticDialect(text, config, false)
-- If both dialect reflexes are the same, display only one of them.
toPhoneticRemainder(ralik, config)
if ralik ~= ratak then
toPhoneticRemainder(ratak, config)
end
end
end
return outSeq
end
toPhoneticDialect = function(text, config, isRalik)
-- To streamline morpheme-initial regular expressions.
text = "\t"..text
-- Morphemes can begin with geminated consonants, but spoken words cannot.
text = string.gsub(text, "( *)(.)( *)%2( *)()",
function(prefix, conson, _, __, vowel)
local copyVowel = vowel
if vowel == "I" then
copyVowel = "i"
elseif
vowel == "a" and
conson ~= "hG"
then
copyVowel = "E"
end
if isRalik then
return prefix.."hj"..copyVowel..conson.._..conson..__..vowel
elseif conson == "hw" then
return prefix..conson..copyVowel..conson.._..conson..__..vowel
else
return prefix..conson..copyVowel.._..conson..__..vowel
end
end
)
-- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior.
-- To block this in the template argument, use "'i" instead of "i".
if isRalik then
-- Rālik {wiwV-} becomes {yiwV-}.
text = string.gsub(text, "( *h)w( *i *hw *)", "%1j%2")
end
-- {iwV-} becomes {iwwV-} in both dialects.
text = string.gsub(text, "( *h *i *hw)( *)", "%1hw%2")
-- {yiyV-} sequences
text = string.gsub(text,
"( *)hj( *)i( *)hj( *)",
isRalik and "%1Yj%2%3%4" or "%1yj%2%3%4"
)
-- No longer need initial "\t".
text = text.sub(text, 2)
-- Don't need to protect {i} anymore.
text = string.gsub(text, "I", "i")
return text
end
toPhoneticRemainder = function(code, config)
-- "\n" bookends pronunciations of full terms.
-- "\t" bookends prosodic breaks within pronunciations.
local text = "\n\t"..code.."\t\n"
local oldText
-- Handle pseudoconsonants and phrases that begin or end with bare vowels.
local hasLeftVowel = string.find(code, "^_")
if not hasLeftVowel then
hasLeftVowel = string.find(code, "^")
if hasLeftVowel then
text = string.gsub(
text,
"\n\t".."(*)".."\t\n",
"\n\t".."_j%1".."\t\n"..
"\n\t".."_G%1".."\t\n"..
"\n\t".."_w%1".."\t\n"
)
end
end
local hasRightVowel = string.find(code, "_.$")
if not hasRightVowel then
hasRightVowel = string.find(code, "$")
if hasRightVowel then
text = string.gsub(
text,
"\n\t".."(-)".."\t\n",
"\n\t".."%1_j".."\t\n"..
"\n\t".."%1_G".."\t\n"..
"\n\t".."%1_w".."\t\n"
)
end
end
local hasEdgeVowel = hasLeftVowel or hasRightVowel
if hasEdgeVowel then
text = string.gsub(text, "/", "\t\t")
end
local enunciate = config.enunciate
local liaison = config.liaison
local noHints = config.noHints
local outSeq = config.outSeq
-- Use liaison if we're enunciating.
liaison = liaison or enunciate
if enunciate then
-- Create a prosodic break at consonant clusters.
text = string.gsub(text, "() *(.)", "%1".."\t\t".."%2")
end
-- Per the Marshallese Reference Grammar.
if false then
-- Non-phrase-initial {yi'y-} vocalizes to true {yiy}.
text = string.gsub(text, "( *)yj", "%1hjihj")
-- Experimental, to fix the iọkiọkwe problem.
else
-- Non-phrase-initial {yi'y-}
-- vocalizes to true {yiy} at the beginning of a word,
-- but not in a non-initial position within a word.
text = string.gsub(text, " yj", " hjihj")
end
-- {'yiy} vocalizes contextually.
do
-- To {iyy} after a consonant.
if not enunciate then
text = string.gsub(text, "( *)Yj", "%1ihjhj")
end
-- To {yiyy} everywhere else.
text = string.gsub(
text, "Yj", enunciate and ("hjihj".."\t\t".."hj") or "hjihjhj"
)
end
-- Mid-vowel harmony assimilation across semiconsonants.
do
-- Always {e-a}, never {ẹ-a}.
text = string.gsub(text, "e(*a)", "E%1")
-- Always {ẹ-i}, never {e-i}.
text = string.gsub(text, "E(*i)", "e%1")
-- Always {e-e} and {ẹ-ẹ}, never {e-ẹ} or {ẹ-e}.
text = string.gsub(text, "**",
function(match)
local index = string.find(text, "*$")
local vowel = string.sub(text, index, index)
match = string.gsub(match, "", vowel)
return match
end
)
end
-- Detect and mark stressed syllables, but not if this term is an affix.
if not hasEdgeVowel then
-- Temporarily mark the end of the term's bookend as stressed.
text = string.gsub(text, "(\t)", "\"%1")
-- Temporarily mark all natural syllables as unstressed.
text = string.gsub(text, "(. *)", ",%1")
-- Recursively place stress before each CVC, CVCV and CVCCV sequence.
text = string_gsubx(
text,
",("..
". * *?? *"..
",?"..
". *? *"..
"\"*\t"..
")",
"\"%1"
)
-- Remove dangling syllable markers from the term's bookends.
text = string.gsub(text, " *\"? *\t *,? *", "\t")
-- Remove all unstressed syllable markers.
text = string.gsub(text, ",", "")
if not enunciate then
-- Restore unstressed syllable markers
-- only within consonant clusters that are not already stressed.
-- These will be removed again later anyway.
text = string.gsub(text, "( *)(.)", "%1,%2")
end
-- If there is more than one stressed syllable,
-- then mark the penultimate stressed syllable as primarily stressed,
-- and the others as secondarily stressed.
if string.find(text, "\"*\"*\t") then
text = string.gsub(text, "\"", "%%")
text = string.gsub(text, "%%(*%%*\t)", "\"%1")
end
end
-- Mark full vowels as syllabic.
text = string.gsub(text, "()", "%1=")
if not enunciate then
-- Tag consonant clusters for the next operation.
oldText = text
text = string.gsub(text, "(.)( *?.)", "%1/%2")
needClusterTypes()
-- Process unstable and assimilating consonant clusters.
if oldText ~= text then
text = string_gsub2(
text,
"()(= *?)(.)()/"..
"( *?)(.)()( *)()",
function(
vowelL, _, primaryL, secondaryL,
__, primaryR, secondaryR, ___, vowelR
)
local vowelE = ""
local markE = ""
local cluster = clusterTypes
if cluster == EPENTH_CLUSTER then
-- An epenthetic vowel will be inserted.
if primaryL == "h" then
-- If the first consonant is a semiconsonant,
-- then copy the vowel on the left.
vowelE = vowelL
elseif primaryR == "h" then
-- If the first consonant is a full consonant
-- but the second consonant is a semicomsonant,
-- then copy the vowel on the right.
vowelE = vowelR
elseif primaryR == "y" then
-- If the first consonant is a full consonant
-- but the second consonant is {yi'y},
-- then the epenthetic vowel is {i},
-- and the second consonant becomes plain {y}.
vowelE = "i"
primaryR = "h"
else
-- If neither consonant is a semiconsonant,
-- then the epenthetic vowel has an F1
-- that is the maximum of
-- the two neighboring vowels and {e}.
vowelE = fromF1[math.max(
toF1,
toF1,
toF1
)]
end
markE = "@"
else
-- No epenthetic vowel.
if cluster == ASSIM_CLUSTER then
-- Regressive primary assimilation.
primaryL = primaryR
end
if secondaryL == "w" and
primaryR ~= "t"
then
-- Progressive secondary assimilation.
-- But there is no {tʷ} in Marshallese.
secondaryR = secondaryL
else
-- Regressive secondary assimilation.
secondaryL = secondaryR
end
end
return (
vowelL.._..primaryL..secondaryL..vowelE..markE..
__..primaryR..secondaryR..___..vowelR
)
end
)
end
end
needVowelCharts()
-- Give a default F2 to vowels,
-- averaging the F2 of their two neighboring consonants.
-- This can also create transitional vowels whose F2
-- have no direct counterparts with consonant secondary articulation.
text = string_gsub2(text, "()( *.)( *?.)()",
function(secondaryL, _, __, secondaryR)
return secondaryL.._..lerpF2(secondaryL, secondaryR)..__..secondaryR
end
)
-- Unconditionally surface semiconsonants in complete isolation.
oldText = text
text = string.gsub(text, "\t *h(.) *\t", "\tH%1\t")
-- If the term contains any other semiconsonants...
if oldText == text and
string.find(text, "h")
then
local hasVG = false
local hasGV = false
local hasVGV = false
-- Give unsurfaced semiconsonants a surface F1
-- matching the vowels on their left.
text = string.gsub(text, "()(. *?)h(.)",
function(vowelF1, _, secondary)
hasVG = true
return vowelF1.._..vowelF1..fromF2].."^"
end
)
-- Adjust the F1 of surfaced semiconsonants
-- according to the vowels on their right.
-- To the maximum of the vowel if {y} or {w}.
-- To the minimum of the vowel if {h}.
if hasVG then
text = string.gsub(text, "(.)(.)(%^ *)()",
function(semiF1, semiF2, _, vowelF1)
hasGV = true
hasVGV = true
local fn = semiF2 == "3" and math.min or math.max
return fromF1[fn(
toF1, toF1
)]..semiF2.._..vowelF1
end
)
end
-- Give remaining unsurfaced semiconsonants a surface F1
-- matching the vowels on their right.
text = string.gsub(text, "h(.)( *)()",
function(secondary, _, vowelF1)
hasGV = true
return vowelF1..fromF2].."^".._..vowelF1
end
)
local startsGV = hasGV and not not string.find(text, "\t *?..%^")
local endsVG = hasVG and not not string.find(text, "%^ *\t")
if not enunciate then
-- If a vowel comes before a semiconsonant of the same F1,
-- then change the vowel's F2 to match the the semiconsonant.
if hasVG then
text = string.gsub(
text, "(.).( *?)%1(.)", "%1%3%2%1%3"
)
end
-- If a non-open vowel comes after {y} of the same F1
-- and before a velarized full consonant,
-- then change the vowel's F2 to match the {y}.
if hasGV then
text = string.gsub(
text,
"()(1)(%^ *)%1.( *?G)",
"%1%2%3%1%2%4"
)
end
-- If a non-open vowel comes after {y} of the same F1
-- and before a syllable stress boundary,
-- then change the vowel's F2 to match the {y}.
if hasGV then
text = string.gsub(
text, "()(1)(%^ *)%1.( *)", "%1%2%3%1%2%4"
)
end
-- If {a} comes after {y} of the same F1 after a stressed vowel,
-- then change the vowel's F2 to match the {y}.
if hasVGV then
text = string.gsub(text, "(= *a)(1)(%^ *a).", "%1%2%3%2")
end
-- If a vowel comes after {w} of the same F1 after a stressed vowel,
-- then change the vowel's F2 to match the {w}.
if hasVGV then
text = string_gsub2(
text, "(= *)(.)(5)(%^ *)%2.", "%1%2%3%4%2%3"
)
end
-- If a vowel comes after {h}...
if hasGV then
text = string.gsub(
text, "(.)(3)(%^ *)(.).( *?.)()",
function(semiF1, semiF2, _, vowelF1, __, secondary)
local vowelF2
if semiF1 == vowelF1 then
-- If they have the same F2,
-- then change the vowel's F2 to match the {h}.
vowelF2 = semiF2
else
-- If they do not have the same F2,
-- then reset the vowel's F2.
vowelF2 = lerpF2(semiF2, secondary)
end
return (
semiF1..semiF2.._..vowelF1..vowelF2..__..secondary
)
end
)
end
-- If a vowel comes after {y} or {w}
-- at the beginning of a prosodic unit
-- and before a stress boundary
-- before a semiconsonant and another vowel
-- that have the same F2 as each other
-- and both have the same F1 as the first vowel,
-- then change the first vowel's F2 to match.
if hasVGV then
text = string.gsub(
text,
"\t *(.%^ *)(.).( *)%2(.)(%^ *)%2%4",
"\t%1%2%4%3%2%4%5%2%4"
)
end
end
-- Unsurface {h} everywhere.
text = string.gsub(text, ".3%^", "hG")
-- Unsurface semiconsonants that can coalesce
-- with either of their neighboring vowels,
-- but not crossing syllable stress boundaries.
if hasGV then
text = string.gsub(text, "(.)(.)%^( *)%1%2",
function(vowelF1, vowelF2, _)
return "h"..fromF2Conson].._..vowelF1..vowelF2
end
)
end
if hasVG then
text = string.gsub(text, "(.)(.)(= *)%1%2%^",
function(vowelF1, vowelF2, _)
return vowelF1..vowelF2.._.."h"..fromF2Conson]
end
)
end
-- Adjust the F1 of remaining surfaced {y} and {w}.
text = string.gsub(text, "(.)()%^", function(semiF1, semiF2)
if semiF2 == "1" then
if semiF1 == "a" then
semiF1 = "E"
end
else -- semiF1 == "5"
semiF1 = "i"
end
return semiF1..semiF2.."^"
end)
-- Delete remaining unsurfaced semiconsonants altogether.
text = string.gsub(text, "h.", "")
if hasVGV and not enunciate then
-- Indicate certain long monophthongs as geminated.
text = string.gsub(text, "(.)( *)%1", "%1=%2:")
text = string.gsub(
text, "(.)( *)%1()", "%1=%2:%3"
)
end
-- If a weakened semiconsonant falls on a stressed syllable
-- before a vowel with the same F2,
-- then shift forward the stress marker.
text = string.gsub(text, "()0(.)( *)(.)",
function(stress, semiF2, _, vowelF2)
if toF2 == toF2 then
return "0"..semiF2..stress.._..vowelF2
end
end
)
end
-- Neutralize the difference between full and epenthetic vowels.
text = string.gsub(text, "", "")
-- Simplify secondary articulation of consonant clusters.
text = string.gsub(text, "()( *?.)%1", "%2%1")
-- Partially voice obstruents before vowels at the beginning of a phrase or
-- in consonant clusters after other obstruents or laterals.
text = string.gsub(text, "( *?)()(. *)",
function(_, primary, __)
return _..string.upper(primary)..__
end
)
voicedPrimaries = voicedPrimaries or {
= "b", = "d", = "g"
}
-- Voice remaining obstruents before vowels.
text = string.gsub(text, "()(. *%(?)", function(primary, _)
return voicedPrimaries.._
end)
if hasEdgeVowel then
if noHints then
-- Strip pseudoglides.
text = string.gsub(text, "_.", "")
elseif hasLeftVowel then
-- Reverse text of left pseudoglide.
text = string.gsub(text, "\t *_(.)", "\t%1_")
end
end
if liaison then
-- Remove whitespace from bookends.
text = string.gsub(text, " *\t *", "\t")
-- Prepare liaisons.
text = string.gsub(text, "+", "_")
else
-- Strip liaisons.
text = string.gsub(text, " ", "")
end
if enunciate then
-- Convert bookends to spaces.
text = string.gsub(text, "\t+", " ")
end
needPhoneticMap()
-- Convert pseudo-X-SAMPA to phonetic IPA.
text = string.gsub(text, ".?%^?", phoneticMap)
-- Output unique pronunciations.
string.gsub(text, "\n*\n", function(result)
addUnique(outSeq, fastTrim(result))
return ""
end)
end
export._parse = parse
export._toBender = toBender
export._toMOD = toMOD
export._toPhonemic = toPhonemic
export._toPhonetic = toPhonetic
function export.bender(frame)
return table.concat(toBender(parse(frame.args, frame.args)), ", ")
end
function export.MOD(frame)
return toMOD(frame.args)
end
function export.parse(frame)
return table.concat(parse(frame.args), ", ")
end
function export.phonemic(frame)
return table.concat(toPhonemic(parse(frame.args)), ", ")
end
function export.phonetic(frame)
return table.concat(toPhonetic(parse(frame.args), frame.args), ", ")
end
return export