This is a private module sandbox of Suzukaze-c, for their own experimentation. Items in this module may be added and removed at Suzukaze-c's discretion; do not rely on this module's stability.
local export = {}
-- do i need to worry about numbers? like 1,000,000
export.spacing_instructions = {
-- ◇: there would be a space here in normal text
-- ◆: there would not be a space here in normal text
-- i.e. "◆.◇" means "." never has a leading space and always has a trailing space
= {
= "◆;◇", = "◆…◆",
= "◆.◆", = "◆.◇",
= "◆,◇", = "◆,◇",
= "◆!◇", = "◆?◇",
= "◆:◇", = "◆;◇",
= "◇(◆", = "◆)◇",
= "◆|◆", = "◆—◆",
},
= {
= "◇“◆", = "◆”◇",
= "◇“◆", = "◆”◇",
= "◇‘◆", = "◆’◇",
= " ",
},
= {
-- ]
},
}
export.space_management_instructions = {
= "", -- 「◆!◇◆”◇」→「!”」
= "", -- 「◇‘◆◇(◆」→「‘(」
= "", -- remove spaces near ◆
= " ", -- keep spaces near ◇
}
-- return all the characters that can be converted for a language
function export.langRegexRange(lang)
local langs = { lang, 'mul' }
local chars = {}
for _, lang in ipairs(langs) do
if export.spacing_instructions then
for punctuation, _ in pairs(export.spacing_instructions) do
table.insert(chars, punctuation)
end
end
end
return table.concat(chars, '')
end
-- pad punctuation with spaces (no conversion)
function export.space(text, lang)
local out = {}
for char in mw.ustring.gmatch(text, '.') do
if export.convChar(char, lang) then
table.insert(out, ' ' .. char .. ' ')
else
table.insert(out, char)
end
end
return table.concat(out)
end
-- convert single punctuation to the ugly marked up forms
-- return false if impossible
function export.convChar(char, lang)
return ((export.spacing_instructions and export.spacing_instructions) or export.spacing_instructions or false)
end
-- convert punctuation in text to the ugly marked up forms
function export.conv(text, lang)
local out = {}
for char in mw.ustring.gmatch(text, '.') do
table.insert(out, export.convChar(char, lang) or char)
end
return table.concat(out)
end
-- convert marked up forms to final form
function export.main(text)
for pattern, replacement in pairs(export.space_management_instructions) do
text = mw.ustring.gsub(text, pattern, replacement)
end
return mw.text.trim(text)
end
return export