Module:User:Thadh/nl-IPA

This module sandbox lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export={}

local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local lang = require("Module:languages").getByCode("nl")
local rsub = mw.ustring.gsub
local rlower = mw.ustring.lower

local V = ""
local V_IPA = ""
local C = ""
local C_IPA = ""
local T = ""
local t = mw.ustring.char(0x0308)
local p = mw.ustring.char(0x002E)

local diac = {
	="a" .. t,	="e" .. t,	="i" .. t,	="o" .. t,	="u" .. t
}

local phon = {
	="b",	="d",	="f",	="ɣ",	="ɦ",
	="j",	="k",	="l",	="m",	="n",
	="p",	="r",	="s",	="t",	="v",
	="ʋ",	="z",
	="ɑ",	="ə",	="ɪ",	="ɔ",	="ʏ",
	="ɛː",	="ɔː",
	="a",	="e",	="i",	="o",	="u"
}

local digr = {
	="x",		="ʃ",		="ʒ",
	="ii",	="øː",	="ú",	
	="ɛí",	="ɛí",	="ɑú",	
	="ɑú",	="œy",
}

local function phonemic(text)
	text = rlower(text)
	-- stress
	if mw.ustring.find(text, "ˈ") == nil then
		text = "ˈ" .. text
	end
	if mw.ustring.find(text, "ˌ") == nil then
		text = mw.ustring.gsub(text, "-", "ˌ")
	end
	-- special cases
	if text == "ˈeen" then text = "ən" end
	if text == "ˈéén" then text = "ˈeen" end
	if text == "ˈde" then text = "də" end
	if text == "ˈge" then text = "gə" end
	if text == "ˈje" then text = "jə" end
	if text == "ˈme" then text = "mə" end
	if text == "ˈwe" then text = "wə" end
	if text == "ˈze" then text = "zə" end
	text = rsub(text, "(" .. C .. "?)iken$", "%1ɪken")
	text = rsub(text, "(" .. C .. "?)iken ", "%1ɪken ")
	text = rsub(text, "(" .. C .. ")iken$", "%1əken")
	text = rsub(text, "(" .. C .. ")iken ", "%1əken ")
	text = rsub(text, "oeuvre$", "œːvre")
	if text == "(" .. C .. "?" .. C .. "?)lijk" then text = "%1lɛík" end
	text = rsub(text, "lijk", "lək")
	if text == "(" .. C .. "?" .. C .. "?)ig" then text = "%1ɪg" end
	text = rsub(text, "g", "əg")
	text = rsub(text, "é", "éː")
	text = rsub(text, "ü", "yː")
	text = rsub(text, "auw", "áːú")
	text = rsub(text, "eeuw", "éːú")
	text = rsub(text, "ieuw", "íːú")
	text = rsub(text, "ouw", "áːú")
	-- digraphs
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "êŋ", "ɛ̃ː")
	text = rsub(text, "ôŋ", "ɔ̃ː")
	text = rsub(text, "âŋ", "ɑ̃ː")
	local index = 0
	while index <= 5 do
		text = rsub(text, "..", digr)
		text = rsub("@" .. text, "..", digr)
		text = rsub(text, "@", "")
		index = index + 1
	end
	-- long vowels
	text = rsub(text, ".", diac)
	text = rsub(text, "(" .. V .. ")" .. t, p .. "%1")
	text = rsub(text, "(" .. V .. ")(" .. C .. "?" .. V .. ")", "%1%1%2")
	text = rsub(text, "(" .. V .. ")(" .. C .. V .. ")", "%1%1%2")
	text = rsub(text, "(" .. C .. "*)e(" .. C .. V .. ")", "%1ee" .. p .. "%2")
	text = rsub(text, "(" .. C .. ")(" .. V .. ")$", "%1%2%2")
	text = rsub(text, "(" .. C .. ")(" .. V .. ")%s", "%1%2%2 ")
	text = rsub(text, "()(" .. V .. ")$", "%1%2%2")
	text = rsub(text, "()(" .. V .. ")%s", "%1%2%2 ")
	text = rsub(text, "aa", "áː")
	text = rsub(text, "ee", "eː")
	text = rsub(text, "ii", "í")
	text = rsub(text, "oo", "óː")
	text = rsub(text, "uu", "y")
	text = rsub(text, "əə", "ə")
	-- schwa
	text = rsub(text, "(" .. C .. "?" .. C .. "?" .. C .. "?)eː", "%1éː") -- any unstressed <e> becomes a schwa
	text = rsub(text, "(" .. C .. "?" .. C .. "?" .. C .. "?)e", "%1ɛ") -- any unstressed <e> becomes a schwa
	text = rsub(text, "eː", "e")
	-- general phonology
	text = rsub(text, ".", phon)
	text = rsub(text, "^(" .. C_IPA .. ")(?)(" .. C_IPA .. ")", "%2%1%3")
	text = rsub(text, " (" .. C_IPA .. ")(?)(" .. C_IPA .. ")", " %2%1%3")
	text = rsub(text, "ɦ$", "")
	text = rsub(text, "ɦ%s", " ")
	text = rsub(text, "(" .. C_IPA .. ")%1", "%1")
	text = rsub(text, "(" .. C_IPA .. ")(?)(" .. V_IPA .. ")", "%2%1%3")
	return text
end

local function NL(text)
	text = phonemic(text)
	-- final devoicing
	text = rsub(text, "b$", "p")
	text = rsub(text, "b%s", "p ")
	text = rsub(text, "d$", "t")
	text = rsub(text, "d%s", "t ")
	text = rsub(text, "v$", "f")
	text = rsub(text, "v%s", "f ")
	text = rsub(text, "z$", "s")
	text = rsub(text, "z%s", "s ")
	text = rsub(text, "ʒ$", "ʃ")
	text = rsub(text, "ʒ%s", "ʃ ")
	-- tj/dj/sj
	text = rsub(text, "t(?)j", "t͡%1ʃj")
	text = rsub(text, "d(?)j", "t͡%1ʃj")
	text = rsub(text, "s(?)j", "ʃ%1j")
	text = rsub(text, "t (?)j", "t͡ʃ‿%1j")
	text = rsub(text, "s (?)j", "ʃ‿%1j")
	text = rsub(text, "st͡ʃ‿(?)j", "ʃ‿%1j")
	-- final liquids
	text = rsub(text, "r$", "ɹ")
	text = rsub(text, "r%s", "ɹ ")
	text = rsub(text, "r(?)(" .. C_IPA .. ")", "ɹ%1%2")
	text = rsub(text, "l$", "ɰ")
	text = rsub(text, "l%s", "ɰ ")
	text = rsub(text, "l(?)(" .. C_IPA .. ")", "ɰ%1%2")
	-- long vowels
	text = rsub(text, "øː", "øʏ")
	text = rsub(text, "eː", "eɪ")
	text = rsub(text, "oː", "oʊ")
	text = rsub(text, "øʏ()", "øː%1")
	text = rsub(text, "eɪ()", "eː%1")
	text = rsub(text, "oʊ()", "oː%1")
	text = rsub(text, "œy()", "ɛː%1")
	text = rsub(text, "ɛi()", "ɛː%1")
	text = rsub(text, "()ːu", "%1ːuʋ")
	text = rsub(text, "uʋ$", "u")
	text = rsub(text, "uʋ%s", "u ")
	-- phonetic values
	text = rsub(text, "x", "χ")
	text = rsub(text, "r", "ʀ")
	text = rsub(text, "ɣ", "χ")
	text = rsub(text, "ɑu", "aʊ")
	text = rsub(text, "ɛi", "ɛɪ")
	text = rsub(text, "(" .. C_IPA .. "?)()ː", "%1%2ːː")
	text = rsub(text, "()ː", "%1")
	text = rsub(text, "ən%s(" .. C .. ")", "ə(n) %1")
	text = rsub(text, "ən$", "ə(n)")
	text = rsub(text, "ən(?)t", "ə(n)%1t")
	text = rsub(text, "^ə(n)", "ən")
	text = rsub(text, "%sən", " ən")
	text = rsub(text, "(" .. V_IPA .. "ː?" .. V_IPA .. ")", "%1" .. mw.ustring.char(0x032F))
	text = rsub(text, "u̯ʋ", "w")
	text = rsub(text, "ɪ̯%.ə", "ɪ̯ə")
	-- nasal assimilation
	text = rsub(text, "n%s(?)", "m %1")
	text = rsub(text, "n()", "m%1")
	text = rsub(text, "%s(?)", "ɱ %1")
	text = rsub(text, "()", "ɱ%1")
	text = rsub(text, "n%s(?)", "ŋ %1")
	text = rsub(text, "n()", "ŋ%1")
	text = rsub(text, "n%s(?χ)", "ɴ %1")
	text = rsub(text, "nχ", "ɴχ")
	-- devoice fricative after voiceless obstruent
	text = rsub(text, "( ?)v", "%1f")
	text = rsub(text, "( ?)z", "%1s")
	text = rsub(text, "dz", "ts")
	text = rsub(text, "bz", "ps")
	text = rsub(text, "vz", "fs")
	-- voiceless obstruents become voiced before voiced stop
	text = rsub(text, "p()", "b%1")
	text = rsub(text, "t()", "d%1")
	text = rsub(text, "k()", "ɡ%1")
	text = rsub(text, "f()", "v%1")
	text = rsub(text, "s()", "z%1")
	-- retracted /s/, /z/
	text = rsub(text, "()", "%1" .. mw.ustring.char(0x0320))
	-- /t/
	text = rsub(text, "t", "tˢ")
	text = rsub(text, "tˢ͡", "t͡")

	return text
end

local function BE_brab(text)
	text = phonemic(text)
	-- final devoicing (indicate as phonemic?)
	text = rsub(text, "b$", "p")
	text = rsub(text, "b%s", "p ")
	text = rsub(text, "d$", "t")
	text = rsub(text, "d%s", "t ")
	text = rsub(text, "ɣ$", "x")
	text = rsub(text, "ɣ%s", "x ")
	text = rsub(text, "v$", "f")
	text = rsub(text, "v%s", "f ")
	text = rsub(text, "z$", "s")
	text = rsub(text, "z%s", "s ")
	text = rsub(text, "ʒ$", "ʃ")
	text = rsub(text, "ʒ%s", "ʃ ")
	-- voiceless obstruents become voiced before voiced stop (BUT /td/ → )
	text = rsub(text, "p()", "b%1")
	text = rsub(text, "tb", "db")
    text = rsub(text, "td", "tt") -- ! (for now as two t's as degemination isn't implemented yet)
	text = rsub(text, "k()", "ɡ%1")
	text = rsub(text, "f()", "v%1")
	text = rsub(text, "s()", "z%1")
	-- devoice fricative after voiceless obstruent
	text = rsub(text, "( ?)v", "%1f")
	text = rsub(text, "( ?)z", "%1s")
	text = rsub(text, "dz", "ts")
	text = rsub(text, "bz", "ps")
	text = rsub(text, "vz", "fs")
	-- TO DO: degemination (see discussion in 'Wat is de beste transcriptie voor het Nederlands' p.345)
	-- tj/dj/sj
	text = rsub(text, "t(?)j", "t͡%1ʃ")
	text = rsub(text, "d(?)j", "t͡%1ʃ") -- but: djembé
	text = rsub(text, "s(?)j", "ʃ%1")
--	text = rsub(text, "t (?)j", "t͡ʃ‿%1")
--	text = rsub(text, "s (?)j", "ʃ‿%1j")
	-- voiceless final /r/ in coda  (most often with frication, not indicated)
	-- see 'The Sociophonetics and Phonology of Dutch r', p.88-89 (Antwerp)
	text = rsub(text, "r$", "ɾ" .. mw.ustring.char(0x0325))
	text = rsub(text, "r%s", "ɾ" .. mw.ustring.char(0x0325) .. " ") -- but what with following vowel in next word?
	text = rsub(text, "r(?)(" .. C_IPA .. ")", "ɾ" .. mw.ustring.char(0x0325) .. "%1%2")
	text = rsub(text, "r", "ɾ")
	-- final /l/
	text = rsub(text, "l$", "ɫ")
	text = rsub(text, "l%s", "ɫ ")
	text = rsub(text, "l(?)(" .. C_IPA .. ")", "ɫ%1%2")
	-- vowels
	text = rsub(text, "ɛi", "ɛː")
	text = rsub(text, "œy", "œː")
	text = rsub(text, "i", "iː")  -- but: exceptions like  (or is this regular with two consonants in coda?)
	text = rsub(text, "ɪ", "i")
	text = rsub(text, "y", "yː")
	text = rsub(text, "ʏ", "y")
	text = rsub(text, "ɑu", "Å") -- temporary hack
	text = rsub(text, "("..C_IPA..")u", "%1uː")  -- but in many words short: moet/moed-split
	text = rsub(text, "^u", "uː")
	text = rsub(text, "ɑ", "a")
	text = rsub(text, "Å", "ɑu") -- convert back
	text = rsub(text, "ːː", "ː")
	-- TO DO: short vowels in unstressed syllables (also for NL): ; /i/ short when unstressed ?
	
	text = rsub(text, "(" .. V_IPA .. "ː?" .. V_IPA .. ")", "%1" .. mw.ustring.char(0x032F))
	text = rsub(text, "ʋ", "β" .. mw.ustring.char(0x031E))
	text = rsub(text, "ən%s(" .. C .. ")", "ə(n) %1")
	text = rsub(text, "ən$", "ə(n)")
	text = rsub(text, "ən(?)t", "ə(n)%1t")
	text = rsub(text, "^ə(n)", "ən")
	text = rsub(text, "%sən", " ən")
	-- TO DO: glide in 'zeeën'
--	text = rsub(text, "(" .. V_IPA .. "ː?" .. V_IPA .. ")", "%1" .. mw.ustring.char(0x032F))
    -- nasal assimilation
	text = rsub(text, "n%s(?)", "m %1")
	text = rsub(text, "n()", "m%1")
	text = rsub(text, "%s(?)", "ɱ %1")
	text = rsub(text, "()", "ɱ%1")
	text = rsub(text, "n%s(?)", "ŋ %1")
	text = rsub(text, "n()", "ŋ%1")
	
	return text
end
	

function export.IPA(frame)
	local words = {}
	
	for _, word in ipairs(frame:getParent().args) do
		table.insert(words, word)
	end
	
	if #words == 0 then
		error("Please add a first parameter for the pronunciation module!!")
	end
	
	local results_phon = {}
	local results_nl = {}
	local results_be_brab = {}
	
	for _, word in ipairs(words) do
		table.insert(results_phon, { pron = "/" .. phonemic(word) .. "/" })
		if frame:getParent().args ~= "-" then
			table.insert(results_nl, { pron = "" })
		end
		if frame:getParent().args ~= "-" then
			table.insert(results_be_brab, { pron = "" })
		end
	end
	
	local show = ""
	
	if #results_nl ~= 0 or #results_be_brab ~= 0 then
		show = show .. '\n<div class="vsSwitcher" data-toggle-category="pronunciations"><span class="vsToggleElement"></span>'
	end
	show = show .. "\n*" ..  m_IPA.format_IPA_full { lang = lang, items = results_phon }
	if #results_nl ~= 0 or #results_be_brab ~= 0 then
		show = show .. '<div class="vsHide">'
	end
	if #results_nl ~= 0 then
		show = show .. '\n**' .. m_a.format_qualifiers(lang, {"Holland", "Utrecht"}) .. 
				" " .. m_IPA.format_IPA_full { lang = lang, items = results_nl }
	end
	if #results_be_brab ~= 0 then
		show = show .. '\n**' .. m_a.format_qualifiers(lang, {"Antwerp", "Vlaams Brabant"}) .. 
				" " .. m_IPA.format_IPA_full { lang = lang, items = results_be_brab }
	end
	if not #results_nl == 0 or #results_be_brab ~= 0 then
		show = show .. '</div>'
	end

	show = show .. '</div>'
	
	return show
end

return export
Module:User:Thadh/nl-IPA

Wikious

Boobota

Sagapedia