This module powers {{fi-hyphenation}}
.
local export = {}
local lang = "fi"
local sc = "Latn"
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local find = m_str_utils.find
local len = m_str_utils.len
local lower = m_str_utils.lower
local match = m_str_utils.match
local sub = m_str_utils.sub
local vowels = "aeiouyåäö"
local vowel = ""
local consonants = "bcdfghjklmnpqrstvwxzšžʔ*"
local consonant = ""
-- orthographic symbols that signify separation of syllables
local sep_symbols = "'’./ +\"-"
-- these signify that the next syllable is an "initial" syllable in a word
-- all symbols from here should also be in sep_symbols
local stressed_symbols = "/ +-"
export.sep_symbols = sep_symbols
-- diphthongs and long vowels
-- in initial syllables
local vowel_sequences_initial = {
"i",
"u",
"y",
"uo",
"ie",
"yö",
"aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}
-- in non-initial syllables
-- further, diphthongs ending _u or _y are diphthongs only
-- in non-initial syllables if the syllable is open
local vowel_sequences_noninitial = {
"i",
"aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}
-- in non-initial *open* syllables, in addition to above
local vowel_sequences_noninitial_open = {
"u",
"y"
}
-- sonority by consonant (higher number is more sonorous)
local sonorities = {
= -1, = -1, = -1, = -1, = -1,
= -2, = -2, = -2,
= -3, = -3, = -3,
= -4, = -4, = -4,
= -5, = -5, = -5,
= -6, = -6, = -6
}
local function split_by_sonority(cluster)
local i = len(cluster)
-- always split two-consonant clusters evenly
if i <= 2 then
return i - 1
end
-- always split 'native' three-consonant clusters to AB.C
if i == 3 then
if match(cluster, "t$") or match(cluster, "s$") then
native = match(cluster, "^")
elseif match(cluster, "kk$") then
native = match(cluster, "^")
elseif match(cluster, "pp$") then
native = match(cluster, "^")
end
if native then return 2 end
end
local ng = find(cluster, "ng")
-- never split ng
if ng then
return ng + 1 + split_by_sonority(sub(cluster, ng + 2))
end
local max_sonority = 0
while i > 1 do
local sonority = sonorities
if sonority == nil then return len(cluster) - 1 end
if sonority >= max_sonority then break end
max_sonority = sonority
i = i - 1
end
return i
end
-- mode may be one of the following:
-- default: naive hyphenation using Finnish hyphenation rules
-- "dots": use default rules, but add dots between syllable boundaries if allowed by keep_sep_symbols
-- "sonority": split consonant clusters according to the sonority sequencing principle
-- add dots for new splits (i.e. not those incurred by symbols) followed by multiple consonants if keep_sep_symbols allows it.
-- "allow_diphthongs_everywhere": only for backwards compatibility with {{fi-hyphenation}}
function export.generate_hyphenation(word, keep_sep_symbols, mode)
local res = {}
local syllable = ""
local pos = 1
local found_vowel = false
local initial_syllable = true
local lower_word = lower(word)
local dots = (mode == "dots") and (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and find(keep_sep_symbols, ".")))
local sonority = mode == "sonority"
local allow_diphthongs_everywhere = mode == "allow_diphthongs_everywhere"
while pos <= #word do
if match(lower_word, "^" .. consonant .. vowel, pos) then
-- CV: end current syllable if we have found a vowel
if found_vowel then
if syllable and #syllable > 0 then
table.insert(res, syllable)
initial_syllable = false
end
found_vowel = false
syllable = dots and "." or ""
end
syllable = syllable .. sub(word, pos, pos)
pos = pos + 1
elseif sonority and found_vowel and #syllable > 0 and match(lower_word, "^" .. consonant .. "+" .. vowel, pos) then
-- (V)C+V: split by sonority
local cluster = match(lower_word, "^" .. consonant .. "+", pos)
local split_point = split_by_sonority(cluster)
if split_point ~= nil then
-- split at the specified position.
syllable = syllable .. sub(cluster, 1, split_point)
table.insert(res, syllable)
initial_syllable = false
local len_cluster = len(cluster)
-- add a dot if dots are allowed, and if the split point is not before the final consonant
local add_dot = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and find(keep_sep_symbols, "."))) and split_point ~= len_cluster - 1
syllable = (add_dot and "." or "") .. sub(cluster, split_point + 1)
pos = pos + len_cluster
found_vowel = false
end
elseif match(lower_word, "^" .. consonant, pos) then
-- C: continue
syllable = syllable .. sub(word, pos, pos)
pos = pos + 1
elseif match(lower_word, "^" .. vowel, pos) then
if found_vowel then
-- already found a vowel, end current syllable
if syllable and #syllable > 0 then
table.insert(res, syllable)
initial_syllable = false
end
syllable = dots and "." or ""
end
found_vowel = true
-- check for diphthongs or long vowels
local vowel_sequences = (allow_diphthongs_everywhere or initial_syllable) and vowel_sequences_initial or vowel_sequences_noninitial
local seq_ok = false
for k, v in pairs(vowel_sequences) do
if match(lower_word, "^" .. v, pos) then
seq_ok = true
break
end
end
if not seq_ok and not initial_syllable then
for k, v in pairs(vowel_sequences_noninitial_open) do
if match(lower_word, "^" .. v .. "", pos) or match(lower_word, "^" .. v .. "", pos) then
seq_ok = true
break
end
end
end
if seq_ok then
syllable = syllable .. sub(word, pos, pos + 1)
pos = pos + 2
else
syllable = syllable .. sub(word, pos, pos)
pos = pos + 1
end
else
local sepchar = match(lower_word, "^%(?%)?", pos)
if sepchar then
-- separates syllables
if syllable and #syllable > 0 then
table.insert(res, syllable)
end
initial_syllable = match(sepchar, "^%(?%)?")
syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and find(keep_sep_symbols, sub(word, pos, pos)))) and sepchar or ""
pos = pos + len(sepchar)
found_vowel = false
else
-- ?: continue
syllable = syllable .. sub(word, pos, pos)
pos = pos + 1
end
end
end
if syllable and #syllable > 0 then
table.insert(res, syllable)
end
return res
end
function export.hyphenation(frame)
local title = mw.title.getCurrentTitle().text
if type(frame) == "table" then
local params = {
= {list = true, default = nil},
= {},
= {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
hyphenation = args
title = args or (args or title)
end
if not hyphenation or #hyphenation < 1 then
hyphenation = export.generate_hyphenation(title, false, "diphthongs_everywhere")
end
local text = require("Module:links").full_link({lang = require("Module:languages").getByCode(lang), alt = table.concat(hyphenation, "‧"), tr = "-"})
return "Hyphenation: " .. text
end
return export