local export = {}
local circumflex = U(0x302) -- circumflex
local macron = U(0x304) -- macron
local ecircumflex = U(0x0CA) -- latin small letter e with circumflex
local ocircumflex = U(0x0F4) -- latin small letter o with circumflex
local amacron = U(0x100) -- latin small letter a with macron
local emacron = U(0x304) -- latin small letter e with macron
local imacron = U(0x12B) -- latin small letter i with macron
local omacron = U(0x14D) -- latin small letter o with macron
local umacron = U(0x16B) -- latin small letter u with macron
local aogonek = U(0x104) -- latin small letter a with ogonek
local iogonek = U(0x12F) -- latin small letter i with ogonek
local uogonek = U(0x173) -- latin small letter u with ogonek
local aogonekmacron = aogonek .. macron -- latin small letter a with ogonek and macron
local iogonekmacron = iogonek .. macron -- latin small letter i with ogonek and macron
local oogonekmacron = oogonek .. macron -- latin small letter o with ogonek and macron
local uogonekmacron = uogonek .. macron -- latin small letter u with ogonek and macron
local oogonekcircumflex = oogonek .. circumflex -- latin small letter o with ogonek and circumflex
local consonant = {"b", "c", "d", "f", "gw", "g", "k", "kw", "l", "m", "ng", "n", "p", "r", "ʀ", "s", "t", "θ", "v", "w", "z", "j", "hw", "h"}
local consonant_cluster = {"br", "bl", "dr", "fl", "fr", "gl", "gr", "kl", "kr", "pl", "pr", "sl", "sr", "tr"}
local vowel = {"a", amacron, aogonek, aogonekmacron, "e", emacron, "i", imacron, iogonek, iogonekmacron, "o", omacron, oogonekmacron, oogonekcircumflex, "u", umacron, uogonek, uogonekmacron}
local diphthong = {"ai", "au", "eu", "ui"}
function syllablize(term)
consonant = consonant .. consonant_cluster
vowel = vowel .. diphthong
--term = preg_replace('/('.vowel.')('.consonant.')('.vowel.')/','$1.$2$3',term) -- before a single consonant
--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.vowel.')/','$1$2.$3$4',term) -- between two consonants
--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.consonant.')('.vowel.')/','$1$2$3.$4$5',term) -- between two consonants
--term = preg_replace('/('.vowel.')('.consonant.')('.vowel.')\z/','$1.$2$3',term)
--term = preg_replace('/(^)('.consonant.')('.vowel.')('.consonant.')\z/','$1.$2$3$4',term)
--term = preg_replace('/^('.consonant.')('.vowel.')('.consonant.')(^)/','$1.$2$3$4',term)
--term = preg_replace('/('.vowel.')('.consonant.')('.consonant.')('.vowel.')\z/','$1$2.$3$4',term)
--term = preg_replace('/(\.)('.consonant.')('.vowel.')('.consonant.')('.vowel.')(\.)/','$1$2$3.$4$5$6',term)
return term
end
function germinate(term)
consonant_except_r = str_replace('r|','',consonant)
--term = preg_replace('/('.vowel.')('.consonant_except_r.')(\.?)(ij|j)/','$1$2$2$3$4',term)
return term
end
function replace_final(pattern, replacement, term)
return mw.ustring.gsub(term, pattern .. "%f", replacement)
end
function umlaut(pattern, replacement, before, term)
vowel_array = str_replace('|','',vowel);
--term = preg_replace('/()('.pattern.')(\.?)('.consonant.')(\.?)('.before.')/','$1'.replacement.'$3$4$5$6',term);
--term = preg_replace('/()('.pattern.')(\.?)('.consonant.')(\.?)('.consonant.')(\.?)('.before.')/','$1'.replacement.'$3$4$5$6$7$8',term);
return term;
end
function export.reconstruct(term, pos, gender)
term = syllablize(term)
local term = umlaut("u","o","a|o",term) -- a-umlaut: */u/ and occasionally */i/ lower to */o/ */e/ before a consonant /o/ /ɑ/ ("nonhigh umlaut")
term = umlaut("i","e","a|o",term) --
--term = preg_replace('/('.vowel.')('.vowel.')('.vowel.')(ˌ)/','.',term)
--term = preg_replace('/('.vowel.')('.vowel.')(ˌ)/','.',term)
--term = preg_replace('/('.vowel.')(ˌ)/','.',term)
term = replace_final("z", "", term) -- *z > ∅ /_# loss of word-final *z
term = replace_final("a", "", term) -- *a > ∅ /_#
term = replace_final(aogonek, "", term) -- *ą > ∅ /_#
term = mw.ustring.gsub(term, "^gw", "g") --
term = mw.ustring.gsub(term, "^kw", "k") -- Cʷ > C /-#_ : delabialization of all labiovelar consonants except word-initially
term = mw.ustring.gsub(term, "^hw", "h") --
term = mw.ustring.gsub(term, emacron, amacron) -- ē > ā : lowering and retraction of *ē to *
term = mw.ustring.gsub(term, "z", "ʀ") -- rhotacism of *z to *ʀ
term = germinate(term) -- germination of all consonants except *r before *j and ij
--term = preg_replace('/(\.)('.str_replace("|j","",consonant).')\z/','$2',term)
term = mw.ustring.gsub(term, (consonant) .. "(j)$", "%1i")
term = mw.ustring.gsub(term, (consonant) .. "(w)$", "%1u")
term = mw.ustring.gsub(term, (consonant) .. (consonant) .. "$", "$1$2") -- loss of word-final *i and *u in long-stem terms
return term
end