Note: This module is unfinished and should NOT be used in entries.
This module generates IPA from Icelandic orthography, using the rules given at Icelandic orthography.
All tests passed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
![]() | þorn | ˈθɔrtn̥ | ˈθɔrtn̥ |
![]() | himinn | ˈhɪːmɪnː | ˈhɪːmɪnː |
![]() | brúnn | ˈprutn̥ | ˈprutn̥ |
![]() | steinn | ˈstɛi̯tn̥ | ˈstɛi̯tn̥ |
![]() | geimsteinn (respelled geim-steinn) | ˈcɛi̯mstɛi̯tn̥ | ˈcɛi̯mstɛi̯tn̥ |
![]() | loftsteinn (respelled loft-steinn) | ˈlɔftstɛi̯tn̥ | ˈlɔftstɛi̯tn̥ |
![]() | karl | ˈkʰartl̥ | ˈkʰartl̥ |
![]() | rusl | ˈrʏstl̥ | ˈrʏstl̥ |
![]() | bysna | ˈpɪstn̥a | ˈpɪstn̥a |
![]() | ráps (respelled ráp.s) | ˈrau̯ːps | ˈrau̯ːps |
![]() | taka | ˈtʰaːka | ˈtʰaːka |
![]() | þökk | ˈθœhk | ˈθœhk |
![]() | vopn | ˈvɔhpn̥ | ˈvɔhpn̥ |
![]() | brotna | ˈprɔhtn̥a | ˈprɔhtn̥a |
![]() | sakna | ˈsahkn̥a | ˈsahkn̥a |
![]() | kembt | ˈcʰɛm̥t | ˈcʰɛm̥t |
![]() | þið | ˈθɪːð | ˈθɪːð |
![]() | guð | ˈkvʏːð | ˈkvʏːð |
![]() | byggja | ˈpɪcːa | ˈpɪcːa |
![]() | syngja | ˈsinca | ˈsinca |
![]() | munkur | ˈmuŋkʏr | ˈmuŋkʏr |
![]() | öngull | ˈœy̯ŋkʏtl̥ | ˈœy̯ŋkʏtl̥ |
![]() | drengur | ˈtrɛi̯ŋkʏr | ˈtrɛi̯ŋkʏr |
![]() | svangur | ˈsvau̯ŋkʏr | ˈsvau̯ŋkʏr |
![]() | England | ˈɛi̯ŋlant | ˈɛi̯ŋlant |
![]() | segja | ˈsɛi̯ːja | ˈsɛi̯ːja |
![]() | fluga | ˈflʏːɣa | ˈflʏːɣa |
![]() | fljúga | ˈfljuːa | ˈfljuːa |
![]() | bógur | ˈpou̯ːʏr | ˈpou̯ːʏr |
![]() | lágur | ˈlau̯ːʏr | ˈlau̯ːʏr |
![]() | prófa | ˈpʰrou̯ːa | ˈpʰrou̯ːa |
![]() | dags | ˈtaxs | ˈtaxs |
![]() | dragt | ˈtraxt | ˈtraxt |
![]() | guðspjall (respelled guð-spjall) | ˈkvʏðspjatl̥ | ˈkvʏðspjatl̥ |
![]() | september | ˈsɛftɛmpɛr | ˈsɛftɛmpɛr |
![]() | október | ˈɔxtou̯pɛr | ˈɔxtou̯pɛr |
![]() | gjalda | ˈcalta | ˈcalta |
![]() | geta | ˈcɛːta | ˈcɛːta |
![]() | kjósa | ˈcʰou̯ːsa | ˈcʰou̯ːsa |
![]() | keyra | ˈcʰɛi̯ːra | ˈcʰɛi̯ːra |
![]() | kirkja | ˈcʰɪrca | ˈcʰɪrca |
![]() | hlýr | ˈl̥iːr | ˈl̥iːr |
![]() | hratt | ˈr̥aht | ˈr̥aht |
![]() | spara | ˈspaːra | ˈspaːra |
![]() | þykja | ˈθɪːca | ˈθɪːca |
![]() | lofa | ˈlɔːva | ˈlɔːva |
![]() | rós | ˈrou̯ːs | ˈrou̯ːs |
![]() | vaxa | ˈvaxsa | ˈvaxsa |
![]() | mylla, special=true | ˈmɪlːa | ˈmɪlːa |
![]() | nudda | ˈnʏtːa | ˈnʏtːa |
![]() | kaþólikki | ˈkʰaːθou̯lɪhcɪ | ˈkʰaːθou̯lɪhcɪ |
local export = {}
local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
local m_ipa = require("Module:IPA")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local sub = mw.ustring.sub
local find = mw.ustring.find
local format = mw.ustring.format
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower
local split = mw.text.split
local U = require("Module:string/char")
local nonsyllabic = U(0x32F) -- inverted breve below
local voiceless = U(0x325) -- combining ring below
local long = U(0x2D0) -- triangular colon
local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local consonants = "bdðfghjklmnprstvxþ"
local consonant = ""
local vowels = "aɛɪiʏyœɔou"
local vowel = "+" .. nonsyllabic .. "?" .. long .. "?"
local stress = ""
-- pronunciation data
local data = {
-- consonants: initial, internal/word-final in arrays
-- trigraphs
= {
= "mt",
= "m" .. voiceless .. "t",
= "mt",
= "mk",
= "ms",
= "m" .. voiceless .. "t"
},
-- digraphs
= {
= "tl" .. voiceless,
= "ff",
= { "c", "gj" },
= { "cʰ", "c" },
= "rtl" .. voiceless,
= "rtn" .. voiceless,
= "stl" .. voiceless,
= "stn" .. voiceless,
= "kʰv",
= "kʰv",
= "l" .. voiceless,
= "n" .. voiceless,
= "r" .. voiceless,
= "ç"
},
-- single chars
= {
= "p",
= "t",
= { "k", "g" },
= { "pʰ", "p" },
= { "tʰ", "t" },
= { "kʰ", "k" },
= { "kʰ", "k" },
= { "s", "xs"},
= { "f", "v" },
= "θ"
},
-- vowels: regular, before gi, before ng/nk
= {
= {
"œy" .. nonsyllabic,
"œy" .. nonsyllabic,
"œy" .. nonsyllabic
},
= {
"ɛi" .. nonsyllabic,
"ɛi" .. nonsyllabic,
"ɛi" .. nonsyllabic
},
= {
"a",
"ai" .. nonsyllabic,
"au" .. nonsyllabic
},
= {
"au" .. nonsyllabic,
"au" .. nonsyllabic,
"au" .. nonsyllabic
},
= {
"ɛ",
"ei" .. nonsyllabic,
"ɛi" .. nonsyllabic
},
= {
"jɛ",
"jɛ",
"jɛ"
},
= {
"ɪ",
"i",
"i"
},
= {
"i",
"i",
"i"
},
= {
"ɔ",
"ɔi" .. nonsyllabic,
"ɔi" .. nonsyllabic
},
= {
"ou" .. nonsyllabic,
"ou" .. nonsyllabic,
"ou" .. nonsyllabic
},
= {
"ʏ",
"ʏi" .. nonsyllabic,
"u"
},
= {
"u",
"u",
"u"
},
= {
"ai" .. nonsyllabic,
"ai" .. nonsyllabic,
"ai" .. nonsyllabic
},
= {
"œ",
"œy" .. nonsyllabic,
"œy" .. nonsyllabic
}
}
}
-- add data for preaspirated stop clusters
for letter_a in gmatch("ptk", ".") do
data.digraphs = "h" .. letter_a
for letter_b in gmatch("lmn", ".") do
data.digraphs = "h" .. letter_a .. letter_b .. voiceless
end
end
-- list pronunciation substitutions
local rules = {
= {
{ "(" .. stress .. consonant .. "*" .. vowel .. ")nn", "%1tn" .. voiceless },
{ "(" .. vowel .. ")" .. "g" .. "()", "%1ɣ%2" },
{ "(" .. vowel .. ")" .. "g" .. "()", "%1j%2" },
{ "(" .. vowel .. ")" .. "" .. "()", "%1x%2" },
{ "(" .. vowel .. ")" .. "p" .. "()", "%1f%2" },
{ "v" .. "()", "f%1" }
},
= { -- set 2 only applies when special=false
{ "(u" .. nonsyllabic .. "?" .. long .. "?)", "%1" },
{ "kʏ(" .. long .. "?)ð", "kvʏ%1ð" }
},
= {
{ "ng()", "ŋ%1" },
{ "g", "k" },
{ "k(ʰ?)", "c%1" },
{ "k(ʰ?ai)", "c%1" },
{ "kj", "c" },
{ "(" .. long .. "?)jj", "i" .. nonsyllabic .. "%1j" },
{ "nk", "ŋk" },
{ "kc", "c" .. long },
{ "(.)%1", "%1" .. long }
}
}
-- function to track accents
function export.markAccent(term, string)
-- count number of compounds in term
local _, term_count = gsub(term, "", "")
-- build default stress positions if no accent string provided
if not string then
local array = {}
for i = 1, term_count + 1 do
array = "1"
end
return array
end
-- otherwise count number of commas in accent string
local _, string_count = gsub(string, ",", "")
-- ensure correct number of stress positions are present
if term_count ~= string_count then
error(format("Incorrect number of stress positions specified (%d). Specify %d stress positions.", string_count + 1, term_count + 1))
else
-- dash represents no stress in single compound words
if term_count == 0 then
string = gsub(string, "%-", "0")
-- otherwise dash represents default initial stress
else
string = gsub(string, "%-", "1")
end
-- return stressed positions as comma-separated array
return split(string, ",")
end
end
-- function to determine vowel length
local function determineLength(v, next_chars)
-- short if before x as it's treated like two consonants
if find(next_chars, "x") then
return v
-- long if word-final, preceding a single consonant followed by a vowel
-- or preceding the consonant clusters b/d/g/k/p/s/t + j/r/v
elseif len(next_chars) <= 1 or
find(next_chars, consonant .. "") or
find(next_chars, "") then
return v .. long
-- short otherwise
else
return v
end
end
-- function to determine vowel type
local function determineVowel(v, term, pos, is_stressed)
-- check next two chars
local next_chars = sub(term, pos + 1, pos + 2)
-- before ng/nk
if next_chars == "ng" or next_chars == "nk" then
return data.vowels
-- before gi
elseif next_chars == "gi" then
return data.vowels
-- determine vowel length if stressed
elseif is_stressed then
return determineLength(data.vowels, next_chars)
-- otherwise
else
return data.vowels
end
end
-- function to count syllables
local function countSyllables(term)
local count = 0
local poss = {}
-- match positions of all vowels
for i in gmatch(term, vowel) do
count = count + 1
table.insert(poss, i)
end
-- return syllable count
return count, poss
end
-- function to generate rhyme
local function getRhyme(term)
local count, poss = countSyllables(term)
local start = 0
-- mark start of rhyme
if count == 1 then
-- start at last syllable
start = "-" .. term]
else
-- start at second-last syllable
start = "-" .. term]
end
-- return rhymes
return sub(term, start)
end
-- function to generate transcription
function export.toIPA(term, accent, special)
if type(term) ~= "string" then
error('The function "toIPA" requires a string argument.')
end
-- initialise pronunciation
term = lower(term)
local IPA = {}
local pos = 1
local is_initial = true
local compound_index = 1
-- respell some letters that share pronunciations with other letters
term = gsub(term, "c()", "s%1")
term = gsub(term, "", { = "k", = "v", = "i", = "í", = "s" })
-- get current accent value from array
local current_accent = tonumber(accent)
-- handle string
while pos <= len(term) do
-- mark stress when current accent is 1
if current_accent == 1 then
table.insert(IPA, compound_index == 1 and primary_stress or secondary_stress)
current_accent = current_accent - 1
end
-- handle consonant trigraphs
if data.trigraphs then
local trigraph = table.insert(IPA, data.trigraphs)
table.insert(IPA, type(trigraph) == "table" and (is_initial and trigraph or trigraph) or trigraph)
pos = pos + 3
is_initial = false
-- handle consonant digraphs
elseif data.digraphs then
local digraph = data.digraphs
-- special case for ll
if sub(term, pos, pos + 1) == "ll" and special == true then
table.insert(IPA, "ll")
else
table.insert(IPA, type(digraph) == "table" and (is_initial and digraph or digraph) or digraph)
end
pos = pos + 2
is_initial = false
-- handle vowel digraphs (au, ei, ey)
elseif sub(term, pos, pos + 1) == "au" or sub(term, pos, pos + 1) == "ei" then
table.insert(IPA, determineVowel(sub(term, pos, pos + 1), term, pos + 1, current_accent == 0))
current_accent = current_accent - 1
pos = pos + 2
is_initial = false
-- handle single consonant letters
elseif data.single then
local single = data.single
table.insert(IPA, type(single) == "table" and (is_initial and single or single) or single)
pos = pos + 1
is_initial = false
-- handle single vowels
elseif data.vowels then
table.insert(IPA, determineVowel(sub(term, pos, pos), term, pos, current_accent == 0))
current_accent = current_accent - 1
pos = pos + 1
is_initial = false
-- handle compound stress
elseif sub(term, pos, pos) == "-" then
-- check error for invalid stress position
if current_accent > 0 then
error(format("Invalid stress position %s in compound %d", accent, compound_index))
end
-- increment compound index
compound_index = compound_index + 1
current_accent = tonumber(accent)
pos = pos + 1
is_initial = true
-- otherwise
else
table.insert(IPA, sub(term, pos, pos))
pos = pos + 1
is_initial = false
end
end
-- check error for invalid stress position
if current_accent > 0 then
error(format("Invalid stress position %s in compound %d", accent, compound_index))
end
-- combine ipa symbols into single string
local pron = table.concat(IPA)
-- apply phonemic rules
for i, set_of_rules in ipairs(rules) do
-- only use set 2 if special=false
if not (special and i == 2) then
for _, rule in ipairs(set_of_rules) do
local regex, replacement = rule, rule
pron = gsub(pron, regex, replacement)
end
end
end
-- remove secondary stress if primary and secondary stress are both one syllable only
pron = gsub(pron, "(+)(" .. secondary_stress .. "+)", function(a, b)
local count_a, _ = countSyllables(a)
local count_b, _ = countSyllables(b)
return a .. (count_a == 1 and count_b == 1 and gsub(b, secondary_stress, "") or b)
end)
-- remove any unwanted characters (e.g. full stops and commas)
pron = gsub(pron, "", "")
return pron
end
-- main export function
function export.show(frame)
local p, results = {}, {}
local args = frame:getParent().args
if args then
for _, v in ipairs(args) do
table.insert(p, (v ~= "") and v or nil)
end
else
p = { mw.title.getCurrentTitle().text }
end
for i, word in ipairs(p) do
local accent_param = args or (i == 1 and args.accent)
local special_param = args or (i == 1 and args.special)
local accent = export.markAccent(word, accent_param)
local special = require("Module:yesno")(special_param)
local ipa = export.toIPA(word, accent, special)
table.insert(results, { pron = "/" .. ipa .. "/" })
end
return m_ipa.format_IPA_full { lang = lang, items = results }
end
return export