function export.is_valid(word, validation_opts)
Checks whether the provided word is valid according to Bulgarian orthographic rules.
word
: a string
representing a Bulgarian word.
nil
is invalid by default, but that can be overriden via an option.validation_opts
: a table
of Boolean validation options
|nil_is_valid=
: treats nil
as valid inputIf word
is not a string
, the function raises the error "Input must be a string!"
.
The function returns two values: result
and message
:
result
: true
if the word is orthographically valid, false
otherwisemessage
: nil
if result == true
, otherwise identifies the first failing orthographic rulelocal export = {}
local umatch = mw.ustring.match
local ufind = mw.ustring.find
local ulower = mw.ustring.lower
local vowels_lower = "аъоуеиюяѝ"
local vowels_upper = "АЪОУЕИЮЯ"
export.vowels_lower_c = ""
export.vowels_upper_c = ""
export.vowels_c = ""
local consonants_lower = "бвгджзйклмнпрстфхцчшщь"
local consonants_upper = "БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ"
export.cons_lower_c = ""
export.cons_upper_c = ""
export.cons_c = ""
local alpha_lower = vowels_lower .. consonants_lower
local alpha_upper = vowels_upper .. consonants_upper
export.alpha_lower_c = ""
export.alpha_upper_c = ""
export.alphabet_c = ""
export.non_bulgarian_c = ""
local function get_opt(validation_opts, key)
if validation_opts and type(validation_opts) == "table" then
return validation_opts
else
return nil
end
end
local orthographic_rules = {}
orthographic_rules = function(word, opts)
-- All uppercase, all lowercase, or capitalized
return umatch(word, "^%u+$") or umatch(word, "^%l+$") or umatch(word, "^%u%l*$")
end
orthographic_rules = function(word, opts)
if ufind(word, "") then
local lowered = ulower(word)
return umatch(lowered, export.cons_lower_c .. "ьо") ~= nil
end
return true
end
orthographic_rules = function(word, opts)
return umatch(word, "^" .. export.alphabet_c .. "+$") or
umatch(word, "^" .. export.non_bulgarian_c .. "+$")
end
--[==[
Checks whether the provided word is valid according to Bulgarian orthographic rules.
===Parameters===
* word: a {string} representing a Bulgarian word.
*: The empty string is considered valid. {nil} is invalid by default, but that can be overriden via an option.
* validation_opts: a {table} of Boolean validation options
*: |nil_is_valid=: treats {nil} as valid input
===Errors===
If `word` is not a {string}, the function raises the error {"Input must be a string!"}.
===Return values===
The function returns two values: `result` and `message`:
* `result`: {true} if the word is orthographically valid, {false} otherwise
* `message`: {nil} if {result == true}, otherwise identifies the first failing orthographic rule
]==]
function export.is_valid(word, validation_opts)
if not word then
if get_opt(validation_opts, "nil_is_valid") then
return true, nil
else
return false, "no input"
end
end
if type(word) ~= "string" then error("Input must be a string!") end
if word == "" then return true, nil end
for rule_name, rule in pairs(orthographic_rules) do
if not rule(word, validation_opts) then
return false, rule_name
end
end
return true, nil
end
return export