--[[
This module implements the template {{af-IPA}}.
Author: AmazingJus
Sources:
- Donaldson, Bruce C. (1993). A Grammar of Afrikaans.
- Wissing, Daan (2016). "Afrikaans phonology". Taalportaal.
--]]
local export = {}
local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local tbl = require("Module:table")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local split = mw.text.split
local gsplit = mw.text.gsplit
local sub = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- list of constants
local grave = u(0x0300) -- grave
local acute = u(0x0301) -- acute
local circ = u(0x0302) -- circumflex
local dia = u(0x0308) -- diaresis
local syll = "‧" -- syllable dot
-- for automatically generated stress
local auto_grave = u(0xFFF0) -- automatic grave
local auto_acute = u(0xFFF1) -- automatic acute
-- list of char classes
local accent = grave .. acute .. auto_grave .. auto_acute .. circ .. dia
local stress_accent = grave .. acute .. auto_grave .. auto_acute
local vowel = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local bound = syll .. "#"
-- put them into classes
local A = "" -- all accents
local V = "" -- all vowels
local non_V = "" -- all non-vowels
local C = "" -- all consonants
local non_C = "" -- all non-consonants
local CV = "" -- all consonants and vowels
local S = "" -- any syllable boundary
-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
= "ɑːɪ̯",
= "iʊ̯",
= "iʊ̯",
= "uɪ̯",
= "oːɪ̯",
= "ɑː",
= "ɑː",
= "aɪ̯",
= "œʊ̯",
= "ɪə̯",
= "əɪ̯",
= "iʊ̯",
= "į", -- temporary value
= "ů", -- temporary value
= "ɔɪ̯",
= "ʊə̯",
= "œʊ̯",
= "uɪ̯",
= "ü" -- temporary value
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)
-- list of various grapheme sets
local sets = {
= { -- long-short vowels
= {"a", "ɑː"},
= {"ɛ", "ɪə̯"},
= {"ə", "i"},
= {"ɔ", "ʊə̯"},
= {"œ", "y"}
},
= { -- voiced/voiceless consonants
{"b", "p"},
{"d", "t"},
{"ʤ", "ʧ"},
{"ɡ", "k"},
{"v", "f"},
{"z", "s"},
{"ʒ", "ʃ"},
}
}
-- list of defined affixes
local affixes = {
-- prefixes
= {
{"aan"},
{"agter"},
{"be", restriction = "^"},
{"deur"},
{"er"},
{"ge", restriction = "^"},
{"her"},
{"om"},
{"ont"},
{"onder"},
{"van", pos = "d"},
{"ver"},
{"voor"}
},
-- suffixes
= {
{"agtig"},
{"baar"},
{"dom"},
{"end"},
{"heid"},
{"lik"},
{"loos"},
{"nis"},
{"sel"},
{"skap"}
}
}
-- list of unstressed words
local unstressed = {
"die",
"dit",
"is",
"nie",
"'n"
}
-- list of stressed endings (mostly in loanwords)
local stressed_endings = {
"aa",
"aans?",
"aard?",
"ant",
"asie",
"a",
"ee?",
"ein",
{"el", orig = "loan"}, -- only in loanwords
"ent",
"eu",
"e",
"ieel",
"ie",
"ine",
"ie",
{"o", orig = "fr"}, -- only in french loanwords
"oen",
"on",
"oo",
{"sie", stress = "pre"},
"teek",
"teit",
"uu",
"u",
"y?",
}
-- list of respelling substitutions
local subs = {
-- 'N
{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise
-- CH
{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
{"sch", "sk", "-"}, -- pronounced /sk/ in the sequence "sch"
{"ch(?)", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
{"ch", "k", "-"}, -- otherwise /k/
-- NG
{"ng", "ŋ", "-"}, -- pronounced /ŋ/
-- SH/SJ
{"s", "ʃ", "-"}, -- pronounced /ʃ/
-- DJ/TJ
{"jie", "kį", "-"}, -- pronounced /-ci/ in the suffix "-djie"/"-tjie"
{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/
-- C
{"c()", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
{"c", "k", "-"}, -- otherwise /k/
-- GH
{"gh", "ɡ", "-"}, -- pronounced /ɡ/
-- G
{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
{"g", "χ", "-"}, -- otherwise /χ/
{"n(‧?)", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/
-- V
{"v", "f", "af"}, -- pronounced /f/ in native words
-- W
{"w", "w", "en"}, -- pronounced /w/ in english loans
{"w", "v", "-"}, -- otherwise /v/
-- EAU
{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in french loans
-- OI
{"oi", "wA", "fr"}, -- pronounced /wa/ in french loans
-- IJ
{"ij(" .. non_V .. ")", "EI%1", "-"}, -- pronounced /əɪ̯/ in dutch-based names
-- X
{"#x", "#s", "-"}, -- pronounced /s/ word-initially
{"x", "ks", "-"}, -- otherwise /ks/
-- H
{"(" .. CV .. ")h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
{"h", "ɦ", "-"}, -- otherwise /ɦ/
-- O
{"o(" .. S .. ")", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position
-- U
{"u(" .. C .. ")", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
{"u", "jů", "en"}, -- otherwise /ju/ in english loans
-- Y
{"y", "j", "en"}, -- pronounced /j/ in english loans
{"y", "EI", "-"}, -- otherwise /əɪ̯/
-- circumflex accent
{circ, "ː", "-"} -- lengthens a vowel with its short quality
}
-- canonicalisation function
local function canonicalise(text)
-- decompose accents
text = decomp(text)
-- make text lowercase
text = lower(text)
-- remove extrenous spaces
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
-- treat commas as a pause
text = rsub_repeatedly(text, "%s*,%s*", " | ")
-- return as array of words
return split(text, " ")
end
-- syllabification function
local function syllabify(word, etyl, pos)
-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
word = rsub(word, "(" .. V .. ")" .. dia, syll .. "%1")
-- mark trigraphs and digraphs with curly braces
for _, graph in ipairs(graphemes_sorted) do
word = rsub(word, graph, "{" .. graph .. "}")
end
-- add dot before consonant + vowel
word = rsub(word, "(" .. C .. "?{?" .. V .. A .. "?)", syll .. "%1")
-- remove any dots inside brackets
word = rsub(word, "{*}", function(a) return rsub(a, syll, "") end)
-- shift dot before certain consonant clusters and digraphs
word = rsub(word, "()‧l", syll .. "%1l") -- clusters with l
word = rsub(word, "()‧r", syll .. "%1r") -- clusters with r
word = rsub(word, "()‧j", syll .. "%1j") -- digraphs with j
word = rsub(word, "()‧h", syll .. "%1h") -- digraphs with h
word = rsub(word, "n‧g", "ng‧") -- ng is syllable-final
-- remove leading dots and brackets
-- word = rsub(word, "^(" .. non_V .. "*)" .. syll, "%1")
word = rsub(word, "%.", syll)
word = rsub(word, "", "") -- comment out to debug
return rsub(word, syll .. "+", syll)
end
-- hyphen depth check function
local function is_hyphen_depth(depth)
return (depth == 1) and "%-" or ""
end
-- onset validation function
local function is_valid_onset(string)
-- check if matching syllable onset (including ones starting with s)
if find(string, "^" .. syll) or find(string, "^s" .. syll .. "") then
return true
end
return false
end
-- rest of string function
local function get_rest_string(string, affix, affix_type)
if affix_type == "pre" then
return sub(string, len(affix) + 1)
else
return sub(string, 1, -len(affix) - 1)
end
end
-- affix validation function
local function is_valid_affix(string, affix, affix_type, pos, depth)
-- get rest of string
local rest = get_rest_string(string, affix, affix_type)
-- check for existing pos restriction
if affix.pos and not find(pos, affix.pos) then
-- then for explicit non-boundaries
elseif affix.restriction and not find(rest, affix.restriction) and affix_type == "pre" then
-- then for matching syllable onset
elseif not is_valid_onset(syllabify(rest)) and affix_type == "pre" then
-- then for explicit word boundary
elseif find(rest, "^%+") and affix_type == "pre" then
-- then for no vowels
elseif not find(rest, V) and affix_type == "pre" then
-- then only for two or less chars
elseif find(rest, "^..?$") then
else
-- match hyphen at appropriate depth
local hyphen = is_hyphen_depth(depth)
-- match appropriate pattern
local pattern = affix_type == "pre" and "^" .. affix .. hyphen or hyphen .. affix .. "$"
return true and find(string, pattern) or false
end
return false
end
-- affix application function
local function apply_affixes(string, depth, pos)
-- match hyphen at appropriate depth
local hyphen = is_hyphen_depth(depth)
-- process prefixes
for _, affix in ipairs(affixes.pre) do
if is_valid_affix(string, affix, "pre", pos, depth) then
-- add prefix marker >
string = rsub(string, "^" .. affix .. hyphen, affix .. ">")
break
end
end
-- process suffixes
for _, affix in ipairs(affixes.suf) do
if is_valid_affix(string, affix, "suf", pos, depth) then
-- add suffix marker <
string = rsub(string, hyphen .. affix .. "$", "<" .. affix)
break
end
end
return string
end
-- stress assignment function (FIXME)
local function assign_stress(string, etyl, pos)
-- first check for stressed endings list and stress on that syllable
-- then go to the FIRST syllable after the > sign or just the first syllable otherwise
-- however, an immdiate < should force the stress just before
end
-- components parsing function
local function split_components(word, depth, etyl, pos)
-- initialise some variables
depth = depth or 0
pos = pos or ".*"
-- depth 0: handle double hyphen compounds first
if depth == 0 then
local parts = split(word, "%-%-")
if #parts > 1 then
local result = {}
for _, part in ipairs(parts) do
table.insert(result, split_components(part, depth + 1, etyl, pos))
end
return table.concat(result, "--")
else
return split_components(word, depth + 1, etyl, pos)
end
end
-- depth 1: handle single hyphen compounds and hyphenated affixes
if depth == 1 then
-- explicitly mark ambiguous prefix and suffixes with a hyphen with < and > respectively
word = apply_affixes(word, depth, pos)
local parts = split(word, "%-")
if #parts > 1 then
local result = {}
for _, part in ipairs(parts) do
table.insert(result, split_components(part, depth + 1, etyl, pos))
end
return table.concat(result, "-")
else
return split_components(word, depth + 1, etyl, pos)
end
end
-- depth 2: handle non-hyphenated affixes
if depth == 2 then
-- add < and > for prefix and suffixes respectively
word = apply_affixes(word, depth, pos)
-- apply syllabification
word = syllabify(word, etyl, pos)
return word
end
return word
end
-- component generation function
local function to_components(words, etyl, pos)
-- loop over every word
local results = {}
for _, word in ipairs(words) do
-- get term as split components
local w = split_components(word, 0, etyl, pos)
table.insert(results, "#" .. w .. "#")
end
-- join processed words
return table.concat(results, " ")
end
-- hyphenation function
function export.hyphenation(term, etyl, pos)
-- get user input as table
if type(term) == "table" then
term = term.args
end
-- mark all word borders
term = rsub(term, "(+)", "#%1#")
-- format hyphenation
-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "<>]", ""), "%.")}} }
-- return hyphen.format_hyphenations(data)
return rsub(recomp(syllabify(term)), "<>]", "")
end
-- generate substitutions function
local function generate_subs(term, etyl, pos)
local to_sub = {}
local seen_patterns = {}
for _, s in ipairs(subs) do
local s_patt, s_repl, s_etyl = s, s, s
-- only add if pattern wasn't added already
if not seen_patterns then
-- add substitution for etymology-specific rules
if etyl ~= "-" and s_etyl == etyl then
table.insert(to_sub, {s_patt, s_repl})
seen_patterns = true
-- otherwise add substitution for default rules
elseif s_etyl == "-" then
table.insert(to_sub, {s_patt, s_repl})
seen_patterns = true
end
end
end
return to_sub
end
-- pronunciation function
local function toIPA(text, etyl, pos)
-- canonicalise term as array of words
local words = canonicalise(text)
-- mark text with appropriate components
local term = to_components(words, etyl, pos)
-- add stress to term
-- term = stress(term, etyl, pos)
-- shift stress rightwards to a syllable boundary
-- term = rsub(term, "(*)ˈ", "ˈ%1")
--[[
-- prepare table to substitute the appropriate phonemes based on etymology and part of speech
local to_sub = generate_subs(term, etyl, pos)
-- go over substitution table
for _, s in ipairs(to_sub) do
local k, v = s, s
rsub(term, k, v)
end
-- make text lowercase again
term = lower(term)
-- substitute graphemes
for graph, phoneme in pairs(graphemes) do
term = rsub(term, graph, phoneme)
end
-- substitute single-letter vowels
term = rsub(term, "()()", function(a, b)
if match("", b) then
return sets.vowel_length .. b -- for open syllables
else
return sets.vowel_length .. b -- for closed syllables
end
end)
-- replace į, ů, ü with their actual phonetic values
term = rsub(term, "", { = "i", = "u", = "y"})
-- remove double consonants
term = rsub(term, "(.)(‧?)%1", "%2%1")
]]--
-- final adjustments
-- term = rsub(term, "‧", ".")
return rsub(term, "]", "")
end
-- main export function
function export.show(term, etyl, pos)
-- get user input as table
if type(term) == "table" then
term = term.args
end
return toIPA(term, etyl, pos)
end
return export