Syllabifies a Toki Pona word.
E.g. for sitelen: Syllabification: si‧te‧len.
Basic usage: {{tok-hyph}}
.
Optionally, you can pass a word to be syllabified instead of the page title: {{tok-hyph|sitelen}}
.
All tests passed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
a | a | a | |
akesi | a‧ke‧si | a‧ke‧si | |
ala | a‧la | a‧la | |
alasa | a‧la‧sa | a‧la‧sa | |
ale | a‧le | a‧le | |
ali | a‧li | a‧li | |
anpa | an‧pa | an‧pa | |
ante | an‧te | an‧te | |
anu | a‧nu | a‧nu | |
awen | a‧wen | a‧wen | |
e | e | e | |
en | en | en | |
epiku | e‧pi‧ku | e‧pi‧ku | |
esun | e‧sun | e‧sun | |
ijo | i‧jo | i‧jo | |
ike | i‧ke | i‧ke | |
ilo | i‧lo | i‧lo | |
insa | in‧sa | in‧sa | |
jaki | ja‧ki | ja‧ki | |
jan | jan | jan | |
jasima | ja‧si‧ma | ja‧si‧ma | |
jelo | je‧lo | je‧lo | |
jo | jo | jo | |
kala | ka‧la | ka‧la | |
kalama | ka‧la‧ma | ka‧la‧ma | |
kama | ka‧ma | ka‧ma | |
kasi | ka‧si | ka‧si | |
ken | ken | ken | |
kepeken | ke‧pe‧ken | ke‧pe‧ken | |
kijetesantakalu | ki‧je‧te‧san‧ta‧ka‧lu | ki‧je‧te‧san‧ta‧ka‧lu | |
kili | ki‧li | ki‧li | |
kin | kin | kin | |
kipisi | ki‧pi‧si | ki‧pi‧si | |
kiwen | ki‧wen | ki‧wen | |
ko | ko | ko | |
kokosila | ko‧ko‧si‧la | ko‧ko‧si‧la | |
kon | kon | kon | |
ku | ku | ku | |
kule | ku‧le | ku‧le | |
kulupu | ku‧lu‧pu | ku‧lu‧pu | |
kute | ku‧te | ku‧te | |
la | la | la | |
lanpan | lan‧pan | lan‧pan | |
lape | la‧pe | la‧pe | |
laso | la‧so | la‧so | |
lawa | la‧wa | la‧wa | |
leko | le‧ko | le‧ko | |
len | len | len | |
lete | le‧te | le‧te | |
li | li | li | |
lili | li‧li | li‧li | |
linja | lin‧ja | lin‧ja | |
lipu | li‧pu | li‧pu | |
loje | lo‧je | lo‧je | |
lon | lon | lon | |
luka | lu‧ka | lu‧ka | |
lukin | lu‧kin | lu‧kin | |
lupa | lu‧pa | lu‧pa | |
ma | ma | ma | |
mama | ma‧ma | ma‧ma | |
mani | ma‧ni | ma‧ni | |
meli | me‧li | me‧li | |
meso | me‧so | me‧so | |
mi | mi | mi | |
mije | mi‧je | mi‧je | |
misikeke | mi‧si‧ke‧ke | mi‧si‧ke‧ke | |
moku | mo‧ku | mo‧ku | |
moli | mo‧li | mo‧li | |
monsi | mon‧si | mon‧si | |
monsuta | mon‧su‧ta | mon‧su‧ta | |
mu | mu | mu | |
mun | mun | mun | |
musi | mu‧si | mu‧si | |
mute | mu‧te | mu‧te | |
n | n | n | |
namako | na‧ma‧ko | na‧ma‧ko | |
nanpa | nan‧pa | nan‧pa | |
nasa | na‧sa | na‧sa | |
nasin | na‧sin | na‧sin | |
nena | ne‧na | ne‧na | |
ni | ni | ni | |
nimi | ni‧mi | ni‧mi | |
noka | no‧ka | no‧ka | |
o | o | o | |
oko | o‧ko | o‧ko | |
olin | o‧lin | o‧lin | |
ona | o‧na | o‧na | |
open | o‧pen | o‧pen | |
pakala | pa‧ka‧la | pa‧ka‧la | |
pali | pa‧li | pa‧li | |
palisa | pa‧li‧sa | pa‧li‧sa | |
pan | pan | pan | |
pana | pa‧na | pa‧na | |
pi | pi | pi | |
pilin | pi‧lin | pi‧lin | |
pimeja | pi‧me‧ja | pi‧me‧ja | |
pini | pi‧ni | pi‧ni | |
pipi | pi‧pi | pi‧pi | |
poka | po‧ka | po‧ka | |
poki | po‧ki | po‧ki | |
pona | po‧na | po‧na | |
pu | pu | pu | |
sama | sa‧ma | sa‧ma | |
seli | se‧li | se‧li | |
selo | se‧lo | se‧lo | |
seme | se‧me | se‧me | |
sewi | se‧wi | se‧wi | |
sijelo | si‧je‧lo | si‧je‧lo | |
sike | si‧ke | si‧ke | |
sin | sin | sin | |
sina | si‧na | si‧na | |
sinpin | sin‧pin | sin‧pin | |
sitelen | si‧te‧len | si‧te‧len | |
soko | so‧ko | so‧ko | |
sona | so‧na | so‧na | |
soweli | so‧we‧li | so‧we‧li | |
suli | su‧li | su‧li | |
suno | su‧no | su‧no | |
supa | su‧pa | su‧pa | |
suwi | su‧wi | su‧wi | |
tan | tan | tan | |
taso | ta‧so | ta‧so | |
tawa | ta‧wa | ta‧wa | |
telo | te‧lo | te‧lo | |
tenpo | ten‧po | ten‧po | |
toki | to‧ki | to‧ki | |
tomo | to‧mo | to‧mo | |
tonsi | ton‧si | ton‧si | |
tu | tu | tu | |
unpa | un‧pa | un‧pa | |
uta | u‧ta | u‧ta | |
utala | u‧ta‧la | u‧ta‧la | |
walo | wa‧lo | wa‧lo | |
wan | wan | wan | |
waso | wa‧so | wa‧so | |
wawa | wa‧wa | wa‧wa | |
weka | we‧ka | we‧ka | |
wile | wi‧le | wi‧le |
-- Primary module authorship: Chernorizets (original Bulgarian syllabification code)
-- Port to Lua: Kiril Kovachev
-- Adaptation to Toki Pona: Kiril Kovachev
-- 17 April 2024.
local export = {}
local substring = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local U = mw.ustring.char
local lang = require("Module:languages").getByCode("tok")
local script = require("Module:scripts").getByCode("Latn")
local hvowels_c = ""
local HYPH = U(0x2027)
local BREAK_MARKER = "."
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function char_at(str, index)
return substring(str, index, index)
end
local function count_vowels(word)
local _, vowel_count = mw.ustring.gsub(word, hvowels_c, "")
return vowel_count
end
local function is_vowel(ch)
for _, v in pairs{"a", "e", "i", "o", "u"} do
if v == ch then
return true
end
end
return false
end
---- Main syllabification code
-- word: the word being scanned
-- left/right vowels: integers
local function find_next_syllable_onset(word, left_vowel, right_vowel)
local n_cons = right_vowel - left_vowel - 1
-- No consonants - syllable starts on rightVowel
if n_cons == 0 then return right_vowel end
-- Single consonant between two vowels - starts a syllable
if n_cons == 1 then return left_vowel + 1 end
-- Two ("or more") consonants between the vowels.
-- In Toki Pona, the phonotactics only allow this if the first syllable
-- ends in a nasal and the second begins in a consonant, so there can only
-- ever be two consonants, and the sonority break occurs between the two
-- consonants (so just add 2 to the left vowel).
local sonority_break = left_vowel + 2
return sonority_break
end
-- Returns a table of strings (list)
local function syllabify_poly(word)
local syllables = {}
local prev_vowel = -1
local prev_onset = 1;
for i = 1, mw.ustring.len(word) do
if is_vowel(mw.ustring.lower(char_at(word, i))) then
-- A vowel, yay!
local should_skip = false
if prev_vowel == -1 then
prev_vowel = i
should_skip = true;
end
-- This is not the first vowel we've seen. In-between
-- the previous vowel and this one, there is a syllable
-- break, and the first character after the break starts
-- a new syllable.
if not should_skip then
local next_onset = find_next_syllable_onset(word, prev_vowel, i)
table.insert(syllables, substring(word, prev_onset, next_onset - 1))
prev_vowel = i
prev_onset = next_onset
end
end
end
-- Add the last syllable
table.insert(syllables, substring(word, prev_onset))
return syllables
end
function export.syllabify_word(word)
if mw.ustring.len(word) == 0 then return {} end;
local n_vowels = count_vowels(word)
local syllables = n_vowels <= 1 and {word} or syllabify_poly(word)
return table.concat(syllables, HYPH)
end
function export.syllabify(term)
local words = rsplit(term, " ")
local out = {}
for _, word in pairs(words) do
table.insert(out, export.syllabify_word(word))
end
return table.concat(out, " ")
end
function export.show_syllabification(frame)
local params = {
= {},
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args or title.nsText == "Template" and "sitelen" or title.text
local syllabification = export.syllabify(term)
local syllables = rsplit(syllabification, HYPH)
return require("Module:hyphenation").format_hyphenations(
{
lang = lang,
hyphs = { { hyph = syllables } },
sc = script,
caption = "Syllabification",
}
)
end
return export