local export = {}
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local match = m_str_utils.match
local reverse = m_str_utils.reverse
local upper = m_str_utils.upper
local lang = require("Module:languages").getByCode("za")
-- FIXME: needs rewrite
-- FIXME: 老壯文 seems to omit marks tones from new Mandarin borrowings (])
-- https://en.wikipedia.orghttps://dictious.com/en/Standard_Zhuang
-- https://baike.baidu.com/item/壮语/7703463
-- 在线学壮文 https://web.archive.org/web/0/http://www.gxmyw.com.cn/plus/list.php?tid=21
-- 基础壮文学习系列:壮文标点符号与书写规则 https://web.archive.org/web/0/http://www.gxmyw.com.cn/wsxzw/2013/1017/57.html
local initialConv = {
= 'p',
= 'ɓ',
= 'm',
= 'f',
= 'β',
= 'pʲ',
= 'mʲ',
= 't',
= 'ɗ',
= 'n',
= 'l',
= 'θ',
= 'ɲ',
= 'ɕ',
= 'j',
= 'k',
= 'ŋ',
= 'ɣ',
= 'kʲ',
= 'ŋʷ',
= 'kʷ',
= 'ʔ',
= 'h',
}
-- ???
local vowelConv = {
= { alone = 'a', wfinal = 'aː' },
= { alone = 'e', wfinal = 'eː' },
= { alone = 'i', wfinal = 'i' },
= { alone = 'o', wfinal = 'oː' },
= { alone = 'u', wfinal = 'u' },
= { alone = 'ɯ', wfinal = 'ɯ' },
= { alone = 'aːi', wfinal = false },
= { alone = 'ei', wfinal = false },
= { alone = 'oːi', wfinal = false },
= { alone = 'uːi', wfinal = false },
= { alone = 'ɯːi', wfinal = false },
= { alone = 'ai', wfinal = 'a' },
= { alone = false, wfinal = 'iː' },
= { alone = false, wfinal = 'o' },
= { alone = false, wfinal = 'uː' },
= { alone = false, wfinal = 'ɯː' },
= { alone = 'aːu', wfinal = false },
= { alone = 'au', wfinal = false },
= { alone = 'eːu', wfinal = false },
= { alone = 'iu', wfinal = false },
= { alone = 'ou', wfinal = false },
= { alone = 'aɯ', wfinal = false },
}
-- ??
-- w/ final only: e?
-- cannot be w/ final: ai, ei, oi, ui, wi, au, aeu, eu, iu, ou, aw // e?
-- cannot be w/o final: ie, oe, ue // e
local finalConv = {
= '',
= 'm',
= 'n',
= 'ŋ',
= 'p',
= 'p',
= 't',
= 't',
= 'k',
= 'k',
}
-- ?g?
local toneConv = {
= '˨˦', --24
= '˧˩', --31 z
= '˥', --55 j
= '˦˨', --42 x
= '˧˥', --35 q
= '˧', --33 h
= '˥', --55
= '˧˥', --35
= '˧', --33
}
local toneConvToNumbers = {
= '1',
= '2',
= '3',
= '4',
= '5',
= '6',
}
local toneConvFromNumbers = {
= '',
= 'z',
= 'j',
= 'x',
= 'q',
= 'h',
= '',
= '',
= '',
}
local consonantConv_1957 = {
= 'ƃ',
= 'ƌ',
= 'ŋ',
= 'ŋv',
}
local vowelConv_1957 = {
= 'ɵ',
= 'ə',
= 'ɯ',
}
local toneConv_1957 = {
= '',
= 'ƨ',
= 'з',
= 'ч',
= 'ƽ',
= 'ƅ',
= '',
= '',
= '',
}
local function fix(text)
local output = {}
for word in gmatch(text, '\'?+*') do
local apostrophe, word, nonword = match(word, '(\'?)(+)(*)')
word = gsub(word, '', toneConvToNumbers) -- excludes h which is ambiguously tone or consonant
-- /CV-CV/...=<CVCV>...
-- /CVC-V/...=<CVC'V>...
-- regex (pattern?) wildcards are greedy from the beginning of the string
-- so counteract this by reversing the string
-- so if we look for "()" it will first match what was originally the last CVC sequence
-- (or something)
word = reverse(word)
word = '|' .. gsub(word, '(g??)(??+)(???)', '%1%2%3|')
-- "+" seems to be needed after ""
-- correct: "daeuz"→"daeuz" wrong: "daeuz"→"da|euz"
word = reverse(word)
mw.log('za1>' .. word)
-- fix bad initial consonant: "|hya"→"h|ya", "|ngya"→"n|gya"
word = gsub(word, '(|)()()(?)()', function(x,a,b,c,d)
if not initialConv then
return a..x..b..c..d
end
end)
word = gsub(word, '(+)(g?)(|)', function(v,c,x)
-- if there is a final consonant,
if c ~= '' then
-- and vowel sequence is not a sequence that only appears before finals,
if not match(v, '^e?$') then
-- detect valid ...VC sequence at end of string
return reverse(gsub(reverse(v..c..x), '(|)(+)(e?)', '%1%2%3|'))
end
end
end)
word = gsub(word, '|gvu', 'g|vu')
mw.log('za2>' .. word)
word = gsub(word, 'h|', '6|')
word = gsub(word, '(+)|', function(a)
if match(a, '$') then
return a..'7|'
elseif match(a, '$') and not match(a, 'ng$') then
return a..'8|'
else
return a..'1|'
end
end)
mw.log('za3>' .. word)
table.insert(output, apostrophe .. gsub(word, '|', '') .. nonword)
end
return table.concat(output)
end
function export.convert(text, scheme, new_bor)
if type(text) == "table" then
text, scheme, new_bor = text.args, text.args, text.args
end
local converted = {}
local extra_pre = match(text, '^*')
text = fix(text)
mw.log('za4>' .. text)
for syllable in gmatch(text, '+%d*') do
local initial, vowel, final, tone, extra = match(syllable, '^(???)(??)(?g?)(%d)(*)$')
local caps = false
mw.log('za5>' .. initial, vowel, final, tone, extra)
if find(initial .. vowel .. final, '') then
caps = true
initial, vowel, final = lower(initial), lower(vowel), lower(final)
end
if scheme == 'IPA' then
initial = initialConv
vowel = final == '' and vowelConv.alone or vowelConv.wfinal
final = finalConv
if tone == '7' and find(vowel, 'ː') then
tone = '7:'
elseif new_bor and tone == '1' then
tone = '5'
end
tone = toneConv
syllable = initial .. vowel .. final .. tone
table.insert(converted, syllable)
elseif scheme == 'old' then
initial = consonantConv_1957 or initial
vowel = gsub(vowel, 'e', vowelConv_1957)
vowel = gsub(vowel, 'w', vowelConv_1957)
final = consonantConv_1957 or final
tone = toneConv_1957
if vowel == 'ə' and final == '' then
vowel = 'əi'
elseif vowel == 'aɯ' and final == '' then
vowel = 'əɯ'
end
syllable = initial .. vowel .. final .. tone .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'hyphenation' then
tone = toneConvFromNumbers
extra = gsub(extra, '\'', '')
syllable = initial .. vowel .. final .. tone .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'tone_numbers' then
if new_bor and tone == '1' then
tone = '5'
end
extra = gsub(extra, '\'', '')
syllable = initial .. vowel .. final .. '<sup>' .. tone .. '</sup>' .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'raw_syllables' then
table.insert(converted, syllable)
else
error('Convert to what representation?')
end
end
if scheme == 'IPA' then
converted = '/' .. table.concat(converted, ' ') .. '/'
elseif scheme == 'old' then
converted = extra_pre .. table.concat(converted, '')
converted = mw.ustring.gsub(mw.ustring.gsub(converted, "()'", "%1"), "()'", "%1")
elseif scheme == 'hyphenation' then
converted = gsub(extra_pre .. table.concat(converted, '‧'), ' ', '')
elseif scheme == 'tone_numbers' then
converted = extra_pre .. table.concat(converted, '')
elseif scheme == 'raw_syllables' then
-- (pass)
end
return converted
end
function export.show(frame)
local params = {
= { },
= { type = "boolean" },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local text, new_bor = args, args
if not text then text = mw.title.getCurrentTitle().text end
local ret = {}
table.insert(
ret,
require("Module:accent qualifier").format_qualifiers(lang, {"Standard Zhuang"}) ..
" " ..
require("Module:IPA").format_IPA_full {
lang = lang,
items = {{ pron = export.convert(text, "IPA", new_bor) }}
}
)
table.insert(
ret,
'Tone numbers: ' ..
export.convert(text, 'tone_numbers', new_bor)
)
table.insert(
ret,
'Hyphenation: ' ..
export.convert(text, 'hyphenation', new_bor) ..
']'
)
return table.concat(ret, '\n* ')
end
function export.is_latin(frame)
local text = frame.args
if find(text, '') then
return ''
elseif find(text, '') then
return 'y'
else
return '' -- CJK is too much of a pain to detect
end
end
return export