local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local concat = table.concat
local extend = m_table.extend
local insert = table.insert
local list_to_set = m_table.listToSet
local remove = table.remove
local ugmatch = mw.ustring.gmatch
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local umatch = mw.ustring.match
local usub = m_str_utils.sub
local vowel_patterns = {}
vowel_patterns = {
{"ā", "a"},
{"ae", "ę"},
{"áé", "ę́"},
{"e", "ę"},
{"o", "ǫ"},
}
vowel_patterns = {
{"ē", "ẹ"},
{"i", "ẹ"},
{"ī", "i"},
{"ō", "ọ"},
{"u", "ọ"},
{"ū", "u"},
}
vowel_patterns = vowel_patterns
vowel_patterns = vowel_patterns
vowel_patterns = {
{"ē", "ẹ"},
{"i", "ẹ"},
{"ī", "i"},
{"ō", "o"},
{"ū", "u"},
}
vowel_patterns = vowel_patterns
vowel_patterns = vowel_patterns
vowel_patterns = {
{"ē", "e"},
{"ẹ", "e"},
{"ī", "i"},
{"ō", "o"},
{"ọ", "o"},
{"ū", "u"},
}
vowel_patterns = vowel_patterns
local dictionary = {
= "a", = "e", = "i", = "o", = "u",
= "ā", = "ē", = "ī", = "ō", = "ū",
= "ae", = "ē", = "aị", = "ėị", = "aụ", = "ėụ",
= "b", = "d", = "f",
= "c", = "g", = "v", = "x",
= "qŭ",
= "'"
}
local vowels = list_to_set{
"a", "e", "i", "o", "u",
"ā", "ē", "ī", "ō", "ū",
"ae", "oe", "aị", "ėị", "aụ", "ėụ",
"-"
}
local onsets = list_to_set{
"b", "p", "d", "t",
"g", "c", "cu", "qŭ",
"f", "s", "z",
"l", "m", "n", "r", "j", "v", "w",
"bl", "pl", "br", "pr", "ps",
"dr", "tr",
"gl", "cl", "gr", "cr",
"fl", "fr",
"sp", "st", "sc", "scu", "sl", "sm", "sn", "su",
"spr", "str", "scr",
"spl", "scl"
}
local codas = list_to_set{
"b", "p", "d", "t", "g", "c",
"f", "s", "z",
"l", "m", "n", "r", "j",
"sp", "st", "sc",
"lp", "lt", "lc",
"lb", "ld", "lg",
"lf",
"rp", "rt", "rc",
"rb", "rd", "rg",
"rf",
"mp", "nt", "nc",
"mb", "nd", "ng",
"lm", "rl", "rm", "rn",
"ps", "ts", "cs", "x", "ls", "ns", "rs",
"lcs", "ncs", "rcs",
"lms", "rls", "rms", "rns"
}
local voicing = {
= "b",
= "d",
= "ɡ",
}
local devoicing = m_table.invert(voicing)
-- These phonetic rules apply to the whole word, not just a syllable
local word_rules_start = {
{"h", ""},
{"k", "c"},
{"y", "i"},
{"ȳ", "ī"},
{"x('?)", "s%1"},
{"('?)b%f", "%1v"},
{"()(ˈ?)%f", function (consonant, stress)
return (devoicing or consonant) .. stress
end},
{"()(ˈ?)%f", function (consonant, stress)
return (voicing or consonant) .. stress
end},
{"m(ˈ?)", "n%1"},
{"n(ˈ?)", "m%1"},
{"um$", "u"}
}
local stress_shift_rules = {
= "'qu", = "n'gu", = "'gu", = "'v",
= "'bl", = "'pl", = "'br", = "'pr",
= "'dr", = "'tr",
= "'gl", = "'cl", = "'gr", = "'cr",
= "'fl", = "'fr", = "c't", = "p't", = "g'd",
= "s'l", = "s'm", = "s'n", = "s'u",
= "s't", = "x't",
= "s'pr", = "s'tr", = "s'cr",
= "s'pl", = "s'cl",
= "nc't"
}
local word_rules_end = {
{"^ĭ", "j"},
-- {"g()", "j%1"},
-- {"dĭ", "j"},
-- {"gĭ", "j"},
-- {"z", "j"},
{"ė", "e"},
{"ị", "i"},
{"ụ", "u"},
{"ĭ", "i"},
{"ŭ", "u"},
{"ei", "i"},
{"ii", "i"},
{"ee$", "ie"},
{"()(́?)e$", "%1%2i"},
}
local nasalized = {}
nasalized = {
{"(́?)", "ẽ%1"},
{"(́?)", "ĩ%1"},
{"(́?)", "ũ%1"},
{"(́?)", "õ%1"},
}
nasalized = {
{"(́?)", "ẽ%1"},
{"(́?)", "ĩ%1"},
{"(́?)", "ũ%1"},
{"(́?)", "õ%1"},
}
nasalized = {
{"(́?)", "ẽ%1"},
{"(́?)", "ĩ%1"},
{"(́?)", "ũ%1"},
{"(́?)", "õ%1"},
}
local function nasalize_vowels(word, family)
word = ugsub(word, "(́?)$", "ã%1")
word = ugsub(word, "(́?)s", "ã%1s")
for _, rule in ipairs(nasalized) do
word = ugsub(word, rule .. "$", rule)
word = ugsub(word, rule .. "(́?)s", rule .. "%2s")
end
return word
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels] then
break
end
if syll ~= "'" then
insert(consonants, syll)
end
end
return concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels] then
break
end
insert(consonants, 1, syll)
end
return concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels] then return syll end
end
end
local function split_syllables(word)
local phonemes = {}
while ulen(word) > 0 do
local longestmatch = ""
for letter in pairs(dictionary) do
if ulen(letter) > ulen(longestmatch) and usub(word, 1, ulen(letter)) == letter then
longestmatch = letter
end
end
if ulen(longestmatch) > 0 then
insert(phonemes, dictionary)
word = usub(word, ulen(longestmatch) + 1)
else
insert(phonemes, usub(word, 1, 1))
word = usub(word, 2)
end
end
local syllables, syll = {}, {}
while #phonemes > 0 do
local phoneme = remove(phonemes, 1)
if phoneme == "'" then
if #syll > 0 then
insert(syllables, syll)
end
syll = {"'"}
elseif vowels then
insert(syll, phoneme)
insert(syllables, syll)
syll = {}
else
insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant
-- Add them to the last syllable
extend(syllables, syll)
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if i > 1 then
local previous = syllables
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets) do
insert(previous, remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over
if get_coda(previous) == "" and ((current == "s" and not vowels]) or (current == "g" and current == "n")) then
insert(previous, remove(current, 1))
end
-- If there is no vowel at all in this syllable
if not get_vowel(current) then
for _ = 1, #current do
insert(syllables, remove(current, 1))
end
remove(syllables,i)
end
end
end
for _, s in ipairs(syllables) do
local onset = get_onset(s)
if not (onset == "" or onsets) then
error("onset error:")
end
local coda = get_coda(s)
if not (coda == "" or codas) then
error("coda error:")
end
end
return syllables
end
local function detect_accent(syllables)
-- Manual override
for i = 1, #syllables do
for j = 1, #syllables do
if syllables == "'" then
remove(syllables, j)
return i
end
end
end
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables
if penult:match("^$") then
local ult = syllables
if ult and (ult .. ult):match("") then
return #syllables - 1
end
return #syllables - 2
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
end
return #syllables
end
local function place_accent(syllable)
-- Special case: i before a or o
local new_syllable = ugsub(syllable, "i()", "i%1́")
if syllable == new_syllable then
new_syllable = ugsub(syllable, "()", "%1́")
end
return new_syllable
end
local function convert_word(raw_word, vowel_pattern)
local asterisks, word = raw_word:match("^(%**)(.*)")
-- do starting word-based rules
for _, rule in ipairs(word_rules_start) do
word = ugsub(word, rule, rule)
end
-- Prothetic i before s + consonant
if vowel_pattern == "It-W" then
word = word:gsub("^s+", "i%0")
end
for k, v in pairs(stress_shift_rules) do
word = ugsub(word, k .. "'", v)
end
-- Double consonant stress shifts
word = word:gsub("()%1'", "%1'%1")
local syllables = split_syllables(word)
local accent = detect_accent(syllables)
-- Check antepenult for e, i > j (written i)
--
local penult = syllables
if antepenult and penult then
if syllables == antepenult and umatch(antepenult, "^$") and umatch(penult, "^$") then
syllables = "ị"
accent = accent + 1
end
end]]--
for i, syll in ipairs(syllables) do
if syllables then
if umatch(syll, "^$") and umatch(syllables, "^$") then
syll = "ĭ"
if syllables == syll then
accent = accent + 1
end
end
end
end
for i, syll in ipairs(syllables) do
syll = concat(syll)
for _, rule in ipairs(vowel_patterns) do
syll = ugsub(syll, rule, rule)
end
for _, rule in ipairs(vowel_patterns) do
syll = ugsub(syll, rule, rule)
end
--[[if i ~= accent then
syll = syll:gsub("ẹ", "e")
syll = syll:gsub("ọ", "o")
end]]
syllables = (i == accent and place_accent(syll) or syll)
end
word = concat(syllables)
for _, rule in ipairs(word_rules_end) do
word = ugsub(word, rule, rule)
end
word = nasalize_vowels(word, vowel_pattern)
return asterisks .. word
end
function export.convert_words(words, vowel_pattern)
local word_table = {}
for word in ugmatch(words, "%S+") do
insert(word_table, convert_word(word, vowel_pattern))
end
return concat(word_table, " ")
end
return export