This is a private module sandbox of Zhnka, for his own experimentation. Items in this module may be added and removed at Zhnka's discretion; do not rely on this module's stability.
local export = {}
local lang = require("Module:languages").getByCode("hsb")
local m_links = require("Module:links")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local uupper = mw.ustring.upper
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
export.TEMP_CH = u(0xFFF0) -- used to substitute ch temporarily in the default-reducible code
export.TEMP_SOFT_LABIAL = u(0xFFF1)
local lc_vowel = "aeěioóuy"
local uc_vowel = uupper(lc_vowel)
export.vowel = lc_vowel .. uc_vowel
export.vowel_c = ""
export.non_vowel_c = ""
-- Consonants that can never form a syllabic nucleus.
local lc_non_syllabic_cons = "bcčćdfghjklłmnńpqrřsštvwxzž" .. "bj" .. "dź" .. "ch" .. "mj" .. "nj" .. "pj" .. "rj" .. "tř" .. "wj"
local uc_non_syllabic_cons = uupper(lc_non_syllabic_cons)
export.non_syllabic_cons = lc_non_syllabic_cons .. uc_non_syllabic_cons
export.non_syllabic_cons_c = ""
local lc_syllabic_cons = "lrř"
local uc_syllabic_cons = uupper(lc_syllabic_cons)
local lc_cons = lc_non_syllabic_cons .. lc_syllabic_cons
local uc_cons = uupper(lc_cons)
export.cons = lc_cons .. uc_cons
export.cons_c = ""
export.lowercase = lc_vowel .. lc_cons
export.lowercase_c = ""
export.uppercase = uc_vowel .. uc_cons
export.uppercase_c = ""
local lc_velar = "kgh"
local uc_velar = uupper(lc_velar)
export.velar = lc_velar .. uc_velar
export.velar_c = ""
local lc_plain_labial = ""
local lc_labial = lc_plain_labial .. export.TEMP_SOFT_LABIAL
local uc_plain_labial = uupper(lc_plain_labial)
local uc_labial = uupper(lc_labial)
export.plain_labial = lc_plain_labial .. uc_plain_labial
export.labial = lc_labial .. uc_labial
export.labial_c = ""
local lc_paired_palatal = "ndtl"
local uc_paired_palatal = uupper(lc_paired_palatal)
export.paired_palatal = lc_paired_palatal .. uc_paired_palatal
local lc_paired_plain = "ndtłrbpmw"
local uc_paired_plain = uupper(lc_paired_plain)
export.paired_plain = lc_paired_plain .. uc_paired_plain
export.paired_palatal_to_plain = {
= "b",
= "B",
= "p",
= "P",
= "m",
= "M",
= "w",
= "W",
= "n",
= "N",
= "r",
= "R",
= "t",
= "T",
= "d",
= "D",
= "ł",
= "Ł",
}
export.paired_plain_to_palatal = {}
for k, v in pairs(export.paired_palatal_to_plain) do
export.paired_plain_to_palatal = k
end
export.paired_palatal_to_plain = ""
for labial in mw.ustring.gmatch(export.plain_labial, ".") do
export.paired_plain_to_palatal = labial .. export.TEMP_SOFT_LABIAL
end
local lc_inherently_soft = "čćźńjlšž"
local uc_inherently_soft = uupper(lc_inherently_soft)
export.inherently_soft = lc_inherently_soft .. uc_inherently_soft
export.inherently_soft_c = ""
function export.is_monosyllabic(word)
-- Convert all vowels to 'e'.
word = rsub(word, export.vowel_c, "e")
-- All consonants next to a vowel are non-syllabic; convert to 't'.
word = rsub(word, export.cons_c .. "e", "te")
word = rsub(word, "e" .. export.cons_c, "et")
-- Convert all remaining non-syllabic consonants to 't'.
word = rsub(word, export.non_syllabic_cons_c, "t")
-- At this point, what remains is 't', 'e', or a syllabic consonant. Count the latter two types.
word = word:gsub("t", "")
return ulen(word) <= 1
end
function export.apply_vowel_alternation(alt, stem)
local modstem, origvowel
if alt == "quant" then
modstem = rsub(stem, "(.)()(" .. export.cons_c .. "*)$",
function(pre, vowel, post)
origvowel = vowel
if vowel == "ó" then
return pre .. "o" .. post
end
end
)
else
return stem, nil
end
return modstem, origvowel
end
local function make_try(word)
return function(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
end
-- nosy > noša, wozy > woža, dyrbi > dyrbja
function export.iotate(word)
local try = make_try(word)
return
try("s", "š") or
try("z", "ž") or
try("b", "bj") or
try("p", "pj") or
-- not complete
word
end
-- pjeku > pječeš, bjeru > bjerješ, sunu > sunješ, du > dźeš, pletu > plećeš
function export.apply_palatalization(word, is_soft)
local try = make_try(word)
return
try("tr", "tř") or
try("t", "ć") or
try("d", "dź") or
try("k", "c") or
try("ch", "š") or
try("h", "z") or
word
end
function export.addj(word)
local try = make_try(word)
return
try("m", "mj") or
try("b", "bj") or
try("p", "pj") or
try("w", "wj") or
try("ń", "nj") or
try("r", "rj") or
word
end
function export.removej(word)
local try = make_try(word)
return
try("mj", "m") or
try("bj", "b") or
try("pj", "p") or
try("wj", "w") or
try("nj", "n") or
try("rj", "r") or
word
end
function export.reduce(word)
local pre, letter, vowel, post = rmatch(word, "^(.*)()()(" .. export.cons_c .. "+)$")
if not pre then
return nil
end
return pre .. letter .. post
end
function export.dereduce(base, stem)
local pre, letter, post = rmatch(stem, "^(.*)(" .. export.cons_c .. ")(" .. export.cons_c .. ")$")
if not pre then
return nil
end
local epvowel = "e"
return pre .. letter .. epvowel .. post
end
function export.convert_paired_plain_to_palatal(stem, ending)
if ending and not rfind(ending, "^") then
return stem
end
local stembegin, lastchar = rmatch(stem, "^(.*)()$")
if lastchar then
return stembegin .. export.paired_plain_to_palatal
else
return stem
end
end
function export.convert_paired_palatal_to_plain(stem, ending)
-- For stems that alternate between n/t/d and ň/ť/ď, we always maintain the stem in the latter format and convert
-- to the corresponding plain as needed, with e -> ě.
if ending and not rfind(ending, "^") then
stem = stem:gsub(export.TEMP_SOFT_LABIAL, "")
return stem, ending
end
local stembegin, lastchar = rmatch(stem, "^(.*)()$")
if lastchar then
ending = ending and rsub(ending, "^e", "ě") or nil
stem = stembegin .. export.paired_palatal_to_plain
end
-- 'E' has served its purpose of preventing the e -> ě conversion after a paired palatal (i.e. it depalatalizes
-- paired palatals).
ending = ending and rsub(ending, "^E", "e") or nil
return stem, ending
end
function export.combine_stem_ending(base, slot, stem, ending)
if stem == "?" then
return "?"
else
if base.all_uppercase then
stem = uupper(stem)
end
return stem .. ending
end
end
return export