This module will sort Zhuang language text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{sortkey}}
.
Within a module, use Module:languages#Language:makeSortKey.
For testcases, see Module:za-sortkey/testcases.
makeSortKey(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.a | ae | (ə) | b | by | c | d | e | f | g | gv | gy | h | i | k | l | m | mb | (ƃ) | my | n | nd | (ƌ) | ng | (ŋ) | ngv | (ŋv) | ny | o | oe | (ɵ) | p | r | s | t | u | v | w | (ɯ) | y |
A | A₂ | (A₂ₐ) | B | B₂ | C | D | E | F | G | G₂ | G₃ | H | I | K | L | M | M₂ | (M₂ₐ) | M₃ | N | N₂ | (N₂ₐ) | N₃ | (N₃ₐ) | N₄ | (N₄ₐ) | N₅ | O | O₂ | (O₂ₐ) | P | R | S | T | U | V | W | Wₐ | Y |
Note: letters from the old orthography (in brackets) are sorted immediately after their new equivalents.
z | (ƨ) | j | (з) | x | (ч) | q | (ƽ) | h | (ƅ) |
² | (²ᵃ) | ³ | (³ᵃ) | ⁴ | (⁴ᵃ) | ⁵ | (⁵ᵃ) | ⁶ | (⁶ᵃ) |
Note: "h" will sort as H
if used as a consonant, or ⁶
if used as a tone letter.
If a syllable has no tone letter but ends with a consonant, then the following tone values are used:
m | n | ng | (ŋ) | k | p | t | b | d | g |
M¹ | N¹ | N₃¹ | (N₃ₐ¹) | K⁷ | P⁷ | T⁷ | B⁸ | D⁸ | G⁸ |
If new_bor=y
or new_bor=1
are detected as parameters of {{za-pron}}
on the page, then tone 5 is substituted for tone 1 in the sortkey. If {{za-1957 spelling of}}
or {{za-1957 orthography of}}
are detected on the page, then the page for the new orthography is checked for new_bor=y
or new_bor=1
as well.
N₂A₂³ DAN₃³ VUEN₃²DA₂⁵ SIEN₃¹ DAN₃³ SIEN¹
LWG⁸FWN₃²G₃AN₃¹
LAN₃⁶ BIT⁷ RO₂N₃² RA₂M⁴
FAN₃²CWN₃²GAN₃³ᵃ
GIEN²N₂AN₃¹CAN₃⁵
Tone 5 substitution:
GUN₃⁵CAN³DAN₃³
GUN₃⁵CAN³DAN₃³ᵃ
new_bor=1
parameter on gungcanjdangj)
B₂A¹
)B₂A²
)B₂A²ᵃ
)B₂A³
)B₂A³ᵃ
)B₂A⁴
)B₂A⁴ᵃ
)B₂A⁵
)B₂A⁵ᵃ
)B₂A⁶
)B₂A⁶ᵃ
)B₂AB⁸
)B₂AD⁸
)B₂AG⁸
)B₂AK⁷
)B₂AM¹
)B₂AN¹
)B₂AN₃¹
)B₂AN₃¹!
)B₂AP⁷
)B₂AT⁷
)
A¹A¹
)A¹BA¹
)A¹BA¹
)A¹DA¹
)A¹DA¹
)A¹GA¹
)A¹GA¹
)A¹HA¹
)A¹HA¹
)A¹KA¹
)A¹MA¹
)A¹MA¹
)A¹NA¹
)A¹NA¹
)A¹N₃A¹
)A¹PA¹
)A¹TA¹
)A²A¹
)A²A¹!
)A³A¹
)A³A¹!
)A⁴A¹
)A⁴A¹!
)A⁵A¹
)A⁵A¹!
)A⁶A¹
)A⁶A¹!
)AB²A¹
)AB²A¹!
)AB³A¹
)AB³A¹!
)AB⁴A¹
)AB⁴A¹!
)AB⁵A¹
)AB⁵A¹!
)AB⁶A¹
)AB⁶A¹!
)AB⁸A¹
)AB⁸HA¹
)AB⁸HA¹
)AD²A¹
)AD²A¹!
)AD³A¹
)AD³A¹!
)AD⁴A¹
)AD⁴A¹!
)AD⁵A¹
)AD⁵A¹!
)AD⁶A¹
)AD⁶A¹!
)AD⁸A¹
)AD⁸HA¹
)AD⁸HA¹
)AG²A¹
)AG²A¹!
)AG³A¹
)AG³A¹!
)AG⁴A¹
)AG⁴A¹!
)AG⁵A¹
)AG⁵A¹!
)AG⁶ᵃ
)AG⁶A¹
)AG⁸A¹
)AG⁸HA¹
)AG⁸HA¹
)AK²A¹
)AK²A¹!
)AK³A¹
)AK³A¹!
)AK⁴A¹
)AK⁴A¹!
)AK⁵A¹
)AK⁵A¹!
)AK⁶A¹
)AK⁶A¹!
)AK⁷HA¹
)AK⁷HA¹
)AM¹A¹
)AM¹HA¹
)AM¹HA¹
)AM²A¹
)AM²A¹!
)AM³A¹
)AM³A¹!
)AM⁴A¹
)AM⁴A¹!
)AM⁵A¹
)AM⁵A¹!
)AM⁶A¹
)AM⁶A¹!
)AN¹A¹
)AN¹GA¹
)AN¹HA¹
)AN¹HA¹
)AN²A¹
)AN²A¹!
)AN³A¹
)AN³A¹!
)AN⁴A¹
)AN⁴A¹!
)AN⁵A¹
)AN⁵A¹!
)AN⁶A¹
)AN⁶A¹!
)AN₃¹!
)AN₃¹A¹
)AN₃¹HA¹
)AN₃¹HA¹
)AN₃²A¹
)AN₃²A¹!
)AN₃³A¹
)AN₃³A¹!
)AN₃⁴A¹
)AN₃⁴A¹!
)AN₃⁵A¹
)AN₃⁵A¹!
)AN₃⁶A¹
)AN₃⁶A¹!
)AP²A¹
)AP²A¹!
)AP³A¹
)AP³A¹!
)AP⁴A¹
)AP⁴A¹!
)AP⁵A¹
)AP⁵A¹!
)AP⁶A¹
)AP⁶A¹!
)AP⁷HA¹
)AP⁷HA¹
)AT²A¹
)AT²A¹!
)AT³A¹
)AT³A¹!
)AT⁴A¹
)AT⁴A¹!
)AT⁵A¹
)AT⁵A¹!
)AT⁶A¹
)AT⁶A¹!
)AT⁷HA¹
)AT⁷HA¹
)local export = {}
local u = require("Module:string/char")
local UTF8_char = "*"
local a, b, c, d, e, f, g = u(0xF000), u(0xF001), u(0xF002), u(0xF003)
local b2 = u(0xF100)
local g2, g3 = u(0xF200), u(0xF201)
local m2, m4 = u(0xF300), u(0xF301)
local n2, n4, n6, n7, n8 = u(0xF400), u(0xF401), u(0xF402), u(0xF403), u(0xF404)
local remove_diacritics = "'" -- apostrophe
local oneCharInit = {
= "2", = "2!", = "3", = "3!", = "4", = "4!", = "5", = "5!", = "6!"
}
local twoCharsInit = {
= b2, = g2, = g3, = m2, = m4, = n2, = n4, = n7, = n8
}
local threeCharsInit = {
= n6
}
local conditionalTones1 = {
= "6"
}
local conditionalTones2 = {
= "m1", = "n1", = n4 .. "1", = "ŋ1", = "k7", = "p7", = "t7", = "b8", = "d8", = "g8"
}
local oneCharFinal = {
= "a" .. a .. "!", = "b" .. a, = "g" .. a, = "g" .. b, = "m" .. a, = "m" .. a .. "!", = "m" .. b, = "n" .. a, = "n" .. a .. "!", = "n" .. b, = "n" .. b .. "!", = "n" .. c, = "n" .. c .. "!", = "n" .. d, = "o" .. a .. "!", = "w!"
}
local twoCharsFinal = {
= "a" .. a, = "o" .. a
}
function export.makeSortKey(text, lang, sc)
local origText = text
text = mw.ustring.lower(text)
-- convert any consonant clusters to single characters, which is necessary for later regexes, and unconditional tone letters to numbers
for from, to in pairs(threeCharsInit) do
text = text:gsub(from, to)
end
for from, to in pairs(twoCharsInit) do
text = text:gsub(from, to)
end
text = text:gsub(UTF8_char, oneCharInit)
-- conditionally convert any conditional tone letters to numbers (e.g. "h" can be a consonant or a tone letter)
for from, to in pairs(conditionalTones1) do
text = text:gsub(from .. "$", to)
text = mw.ustring.gsub(text, from .. "()", to .. "%1")
end
-- conditionally add a tone number to any syllable-final consonants which do not have them
for from, to in pairs(conditionalTones2) do
text = text:gsub(from .. "$", to)
text = mw.ustring.gsub(text, from .. "()", to .. "%1")
end
-- conditionally add a tone number to any syllable-final vowels which do not have them
text = mw.ustring.gsub(text, "()$", "%11")
text = mw.ustring.gsub(text, "()1$", "%1")
text = mw.ustring.gsub(text, "()()", "%11%2")
-- convert clusters and non-ASCII characters to final form, to achieve correct order
for from, to in pairs(twoCharsFinal) do
text = text:gsub(from, to)
end
text = text:gsub(UTF8_char, oneCharFinal)
-- move "!" to the end and remove any duplicates, to ensure old orthography terms are sorted immediately after their new equivalents
for old in text:gmatch("!") do text = text:gsub("(!)(.+)", "%2%1") end
text = text:gsub("!+", "!")
-- if tone 5 is substituted for tone 1 in pronunciation, also substitute in sortkey (i.e. as though "q" were written)
local page = mw.title.new(origText):getContent() or ""
if mw.ustring.match(page, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(page, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5")
-- if the page has the old orthography template, then check the modern orthography page and substitute if present there (i.e. as though "ƽ" were written)
elseif mw.ustring.match(page, "{{za%-1957 spelling of|.*}}") then
local parentPage = mw.title.new(mw.ustring.match(page, "{{za%-1957 spelling of|(.-)}}")):getContent() or ""
if mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5" .. a)
end
elseif mw.ustring.match(page, "{{za%-1957 orthography of|.*}}") then
local parentPage = mw.title.new(mw.ustring.match(page, "{{za%-1957 orthography of|(.-)}}")):getContent() or ""
if mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=1}}") or mw.ustring.match(parentPage, "{{za%-pron|.*new_bor=y}}") then
text = mw.ustring.gsub(text, "1", "5" .. a)
end
end
-- decompose, remove appropriate diacritics, then recompose again
return mw.ustring.upper(mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(text), "", "")))
end
local za = require("Module:languages").getByCode("za")
local function tag(text)
return require("Module:script utilities").tag_text(text, za)
end
local showsubst1 = {
= "⁰", = "¹", = "²", = "³", = "⁴", = "⁵", = "⁶", = "⁷", = "⁸"
}
local showsubst2 = {
= "²ᵃ", = "³ᵃ", = "⁴ᵃ", = "⁵ᵃ", = "⁶ᵃ", = "A₂", = "A₂ₐ", = "B₂", = "G₂", = "G₃", = "M₂", = "M₂ₐ", = "M₃", = "N₂", = "N₂ₐ", = "N₃", = "N₃ₐ", = "N₄", = "N₄ₐ", = "N₅", = "O₂", = "O₂ₐ", = "Wₐ"
}
function export.showSortkey(frame)
local output = {}
for _, word in ipairs(frame.args) do
local sc = za:findBestScript(word):getCode()
local sortkey = export.makeSortKey(word, "za", sc)
for from, to in pairs(showsubst2) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
for from, to in pairs(showsubst1) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
local example = "\n* <code>" .. sortkey .. "</code>\n: " .. tag(word)
table.insert(output, example)
end
return table.concat(output)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:memoize")(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1) < makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
local sc = za:findBestScript(term):getCode()
local sortkey = export.makeSortKey(term, "za", sc)
for from, to in pairs(showsubst2) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
for from, to in pairs(showsubst1) do
sortkey = mw.ustring.gsub(sortkey, from, to)
end
terms = "\n* " .. tag(term) .. " (<code>" .. sortkey .. "</code>)"
end
return table.concat(terms)
end
return export