local export = {}
export.consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh"
export.consonant = ""
export.accent = ""
export.vowel_list = "aAiIuUfFxXeEoO"
export.vowel = ""
export.vowel_with_accent = export.vowel .. export.accent .. "?"
local U = mw.ustring.char
local match = mw.ustring.match
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local lower = mw.ustring.lower
local upper = mw.ustring.upper
local function ends_with(text, pattern)
return match(text, pattern .. "$")
end
local function starts_with(text, pattern)
return match(text, "^" .. pattern)
end
export.up_one_grade = {
= 'A', = 'A', = 'A/', = 'A/', = 'A\\', = 'A\\',
= 'e', = 'e', = 'e/', = 'e/', = 'e\\', = 'e\\',
= 'o', = 'o', = 'o/', = 'o/', = 'o\\', = 'o\\',
= 'E', = 'E', = 'E/', = 'E/', = 'E\\', = 'E\\',
= 'O', = 'O', = 'O/', = 'O/', = 'O\\', = 'O\\',
= 'ar', = 'ar', = 'a/r', = 'a/r', = 'a\\r', = 'a\\r',
}
export.shorten = {
= 'a', = 'a', = 'a/', = 'a/', = 'a\\', = 'a\\',
= 'i', = 'i', = 'i/', = 'i/', = 'i\\', = 'i\\',
= 'u', = 'u', = 'u/', = 'u/', = 'u\\', = 'u\\',
= 'f', = 'f', = 'f/', = 'f/', = 'f\\', = 'f\\',
}
export.lengthen = {
= 'A', = 'A', = 'A/', = 'A/', = 'A\\', = 'A\\',
= 'I', = 'I', = 'I/', = 'I/', = 'I\\', = 'I\\',
= 'U', = 'U', = 'U/', = 'U/', = 'U\\', = 'U\\',
= 'F', = 'F', = 'F/', = 'F/', = 'F\\', = 'F\\',
}
export.split_diphthong = {
= 'ay', = 'a/y', = 'a\\y',
= 'Ay', = 'A/y', = 'A\\y',
= 'av', = 'a/v', = 'a\\v',
= 'Av', = 'A/v', = 'A\\v',
}
export.semivowel_to_cons = {
= 'y', = 'y',
= 'v', = 'v',
= 'r', = 'r',
= 'l', = 'l',
}
local insert_glide = {
= 'iy', = 'iy', = 'i/y', = 'i/y', = 'i\\y', = 'i\\y',
= 'uv', = 'uv', = 'u/v', = 'u/v', = 'u\\v', = 'u\\v',
}
local to_final = {
= 'k', = 'k', = 'k', = 'k',
= 'w', = 'w', = 'w', = 'w',
= 't', = 't', = 't', = 't',
= 'p', = 'p', = 'p', = 'p',
= 'N',
}
local dental_to_retroflex = {
= 'w', = 'W', = 'q', = 'Q', = 'R',
}
local deaspirate = {
= 'k', = 'g',
= 'c', = 'j',
= 'w', = 'q',
= 't', = 'd',
= 'p', = 'b',
= 'g',
}
function export.is_monosyllabic(text)
return match(text, "^" .. export.consonant .. "*" .. export.vowel .. export.accent .. "?" .. export.consonant .. "*$")
end
local function absolute_final(text, ambig_hint)
if ends_with(text, export.consonant .. export.consonant) then -- at least 2 consonants
-- take the first of the cluster
text = gsub(text, "(" .. export.consonant .. "+)$",
function(cluster) return sub(cluster, 1, 1) end)
end
-- L, v, and y are not handled as they should not appear finally
if ends_with(text, "?") then
-- do nothing
elseif ends_with(text, "M") then -- just in case
text = gsub(text, ".$", "m")
elseif ends_with(text, "") then
text = gsub(text, ".$", "H")
elseif ends_with(text, "") then
text = gsub(text, ".$", to_final)
elseif ends_with(text, "") then
text = gsub(text, ".$", ambig_hint)
end
return text
end
function export.retroflexion(stem, ending)
if ends_with(stem, "??") then
ending = gsub(ending, "^s()", "z%1")
end
if ends_with(stem, "??s") and starts_with(ending, "") then
stem = gsub(stem, "s$", "z")
end
if ends_with(stem, "z") then
ending = gsub(ending, "^*", function(dentals) return gsub(dentals, ".", dental_to_retroflex) end)
end
if ends_with(stem, "*") then
ending = gsub(ending,
"^(*)n()",
function(pre, post)
return pre .. "R" .. post
end)
end
if ends_with(stem, "*n") and starts_with(ending, "") then
stem = gsub(stem, "n$", "R")
end
-- for safety
ending = gsub(ending,
"(*)n()",
function(pre, post)
return pre .. "R" .. post
end)
return stem, ending
end
local function combine_accent(stem, ending, has_accent, accent_override, mono, recessive)
if has_accent then
if recessive then
local combined = stem .. ending
combined = gsub(combined, export.accent, "") -- remove any accent
combined = gsub(combined, "^(-)(" .. export.vowel .. ")", "%1%2/")
return combined
elseif accent_override then
stem = gsub(stem, export.accent, "")
elseif mono and match(ending, export.accent) then
stem = gsub(stem, export.accent, "")
elseif match(stem, export.accent) and match(ending, export.accent) then
ending = gsub(ending, export.accent, "")
end
end
return stem .. ending
end
function export.internal_sandhi(input_table)
local stem, ending = input_table.stem, input_table.ending
local last, acc, first, combined
-- explicitly ignored are CV, C + semivowel, or C + nasal
if ending == "" then
return absolute_final(stem, input_table.ambig_hint)
elseif starts_with(ending, export.vowel) then -- ending starts with vowel
if ends_with(stem, export.vowel_with_accent) then -- stem ends with vowel
-- strip last vowel and accent off stem
stem, last, acc = match(stem, "^(.*)(" .. export.vowel .. ")(" .. export.accent .. "?)$")
-- strip first vowel off ending
first, ending = match(ending, "^(.)(.*)$")
if match(last, '') and input_table.mono then
stem = stem .. insert_glide
ending = first .. ending
elseif lower(last) == lower(first) then -- homorganic
ending = upper(first) .. acc .. ending
elseif lower(last) == "a" then -- gunation and vrddhization
ending = export.up_one_grade .. ending
elseif export.semivowel_to_cons then
stem = stem .. export.semivowel_to_cons
ending = first .. (acc == "/" and "\\" or "") .. ending
elseif export.split_diphthong then -- guna and vrddhi splitting
stem = stem .. export.split_diphthong
ending = first .. ending
end
end
-- all consonants unchanged
elseif ends_with(stem, "?") and input_table.mono then
stem = gsub(stem, "(?)()$", function(vow, glide) return export.lengthen .. glide end)
elseif ends_with(stem, "") and starts_with(ending, "n") then
ending = gsub(ending, "^.", "Y")
elseif ends_with(stem, "S") and starts_with(ending, "s") then
stem = gsub(stem, ".$", "k")
elseif ends_with(stem, "s") and starts_with(ending, "s") and input_table.mono then
stem = gsub(stem, ".$", "t")
elseif ends_with(stem, "s") and starts_with(ending, "B") and input_table.mono then
stem = gsub(stem, ".$", "d")
elseif ends_with(stem, "j") and starts_with(ending, "") and input_table.j_to_z then
stem = gsub(stem, ".$", "z")
elseif ends_with(stem, "h") and starts_with(ending, "") then
stem = gsub(stem, "(?)(?)h$", function(vow, acc) return (export.lengthen or "") .. acc end)
ending = gsub(ending, "", "Q")
elseif ends_with(stem, "") and starts_with(ending, "") then
stem = gsub(stem, ".$", deaspirate)
ending = gsub(ending, "^.", "D")
elseif ends_with(stem, export.consonant) and starts_with(ending, export.consonant) then
if input_table.final then
if ends_with(stem, export.consonant .. export.consonant) then -- at least 2 consonants
-- take the first of the cluster
stem = gsub(stem, "(" .. export.consonant .. "+)$",
function(cluster) return sub(cluster, 1, 1) end)
end
if ends_with(stem, "") then
stem = gsub(stem, ".$", to_final)
elseif ends_with(stem, "") then
stem = gsub(stem, ".$", input_table.ambig_hint)
end
end
if ends_with(stem, "") then
if starts_with(ending, "") then
stem = gsub(stem, ".$", { = 'g', = 'q', = 'b'})
elseif starts_with(ending, "h") then
stem = gsub(stem, ".$", { = 'g', = 'q', = 'b'})
ending = gsub(ending, "^.", gsub(stem, ".$", { = 'G', = 'Q', = 'B'}))
end
elseif ends_with(stem, "t") then
if starts_with(ending, "") then
stem = gsub(stem, ".$", gsub(ending, "^.",
{
= 'c', = 'c', = 'j', = 'J',
= 'w', = 'w', = 'q', = 'q',
}))
elseif starts_with(ending, "S") then
stem = gsub(stem, ".$", "c")
ending = gsub(ending, "^.", "C")
elseif starts_with(ending, "") then
stem = gsub(stem, ".$", "d")
elseif starts_with(ending, "") then
stem = gsub(stem, ".$", "n")
elseif starts_with(ending, "h") then
stem = gsub(stem, ".$", "d")
ending = gsub(ending, "^.", "D")
end
elseif ends_with(stem, "m") then
if starts_with(ending, "") then
stem = gsub(stem, ".$", "M")
elseif starts_with(ending, "") then
stem = gsub(stem, ".$", "n")
end
elseif ends_with(stem, "n") then
if starts_with(ending, "") then
stem = gsub(stem, ".$", "M")
elseif starts_with(ending, "") then
stem = gsub(stem, ".$",
{
= 'N', = 'N', = 'N', = 'N',
= 'Y', = 'Y', = 'Y', = 'Y',
= 'R', = 'R', = 'R', = 'R',
= 'm', = 'm', = 'm', = 'm',
= 'M', -- or 'l~'
})
end
elseif ends_with(stem, "?") and starts_with(ending, "") then
stem = gsub(stem, "()(?)$",
function(vow, acc) return (vow == "a" and "o" or "A") .. acc end)
elseif ends_with(stem, "") then
if starts_with(ending, "") then
stem = gsub(stem, ".$", "H")
elseif starts_with(ending, "") then
stem = gsub(stem, ".$",
{
= 'S', = 'S',
= 'z', = 'Z',
= 's', = 's'
})
elseif starts_with(ending, "r") then
stem = gsub(stem, "(" .. export.vowel .. ")$", function(vow) return export.lengthen or vow end)
elseif starts_with(ending, "") then
stem = gsub(stem, ".$", "r")
end
end
end
stem, ending = export.retroflexion(stem, ending)
combined = combine_accent(stem, ending, input_table.has_accent, input_table.accent_override, input_table.mono, input_table.recessive)
return absolute_final(combined, input_table.ambig_hint)
end
return export