This module provides lexicographic tools for Vietnamese language text.
In a template:
{{#invoke:vi|toReformedTones|xóa}}
→ xoá{{#invoke:vi|toTraditionalTones|xoá}}
→ xóa{{#invoke:vi|removeDiacritics|thay đổi gần đây}}
→ thay doi gan day{{#invoke:vi|removeDiacritics|thay đổi gần đây|tones=0}}
→ thay dỏi gàn day{{#invoke:vi|removeDiacritics|thay đổi gần đây|accents=0}}
→ thay dôi gân dây{{#invoke:vi|removeDiacritics|thay đổi gần đây|đ=0}}
→ thay đoi gan đayIn another module:
viet = require "Module:vi"
t = {"an ninh", "bóng rổ", "Ả Rập", "bóng đá", "ăn", "Á Châu"}
table.sort(t, viet.comp)
causes t
to hold:
For best results, call _toTraditionalTones()
or _toReformedTones()
on each string before sorting them using comp()
.
---Lexicographic tools for Vietnamese language text.
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local format = string.format
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local lower = m_str_utils.lower
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local upper = m_str_utils.upper
local lang = require("Module:languages").getByCode("vi")
local export = {}
---Converts the given text to traditional tone marks.
function export.toTraditionalTones(text)
if type(text) == "table" then
text = text.args
end
return (gsub(text, "%a+", function (word)
if match(word, "^qu$") then return word end
return (gsub(word, "%a%a$", {
= "óa", = "òa", = "ỏa", = "õa", = "ọa",
= "óe", = "òe", = "ỏe", = "õe", = "ọe",
= "úy", = "ùy", = "ủy", = "ũy", = "ụy"
}))
end))
end
---Converts the given text to reformed tone marks.
function export.toReformedTones(text)
if type(text) == "table" then
text = text.args
end
return (gsub(text, "%a+", function (word)
return (gsub(word, "%a%a$", {
= "oá", = "oà", = "oả", = "oã", = "oạ",
= "oé", = "oè", = "oẻ", = "oẽ", = "oẹ",
= "uý", = "uỳ", = "uỷ", = "uỹ", = "uỵ"
}))
end))
end
---Generate alternative orthographies.
function export.allSpellings(main_spelling, makeLinks)
local frame = nil
if type(main_spelling) == "table" then
frame = main_spelling
main_spelling, makeLinks = frame.args, frame.args.link
end
local xformers = {
export.toTraditionalTones, export.toReformedTones,
}
local spellings = {}
for i, xformer in ipairs(xformers) do
local alt_spelling = xformer(main_spelling)
if not spellings then
table.insert(spellings, alt_spelling)
spellings = true
end
end
if makeLinks then
local m_links = require("Module:links") -- ]
for k, link in ipairs(spellings) do
spellings = m_links.full_link({lang = lang, term = link})
end
end
return frame and table.concat(spellings, "/") or spellings
end
---Unicode codepoints for combining Vietnamese tone marks.
export.combiningToneMarks = u(
0x300, -- à
0x301, -- á
0x303, -- ã
0x309, -- ả
0x323 -- ạ
)
---Unicode codepoints for combining Vietnamese accent marks.
export.combiningAccentMarks = u(
0x302, -- â
0x306, -- ă
0x31b -- ơ
)
---Strips Vietnamese diacritical marks from the given text.
-- @param tones Set to “0” to leave tone marks intact.
-- @param accents Set to “0” to leave accent marks intact.
-- @param đ Set to “0” to leave “Đ” and “đ” intact.
function export.removeDiacritics(text, toneMarks, accentMarks, stroke)
if type(text) == "table" then
text, toneMarks, accentMarks, stroke = text.args,
not text.args.tones or tonumber(text.args.tones) == 1,
not text.args.accents or tonumber(text.args.accents) == 1,
not text.args or tonumber(text.args) == 1
end
text = toNFD(text)
if toneMarks then
text = gsub(text, "", "")
end
if accentMarks then
text = gsub(text, "", "")
end
if stroke then
text = gsub(text, "", { = "D", = "d"})
end
return toNFC(text)
end
---Vietnamese letters for use in comp().
export.letters = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"
---Compare two syllables according to Vietnamese dictionary sorting order.
function export.compWord(word1, word2)
if find(word1, word2, 1, true) == 0 then return false end
if find(word2, word1, 1, true) == 0 then return true end
do
local func1, static1, var1 = gmatch(word1, "")
local func2, static2, var2 = gmatch(word2, "")
while true do
local c1 = func1(static1, var1)
local c2 = func2(static2, var2)
if c1 == nil or c2 == nil then break end
local idx1 = find(export.letters, c1, 1, true)
local idx2 = find(export.letters, c2, 1, true)
if idx1 and idx2 then
if idx1 < idx2 then return true end
if idx1 > idx2 then return false end
end
end
end
return word1 < word2
end
---Compare two strings according to Vietnamese dictionary sorting order.
function export.comp(text1, text2)
if text1 == text2 then return false end
do
local func1, static1, var1 = gmatch(text1, "%a+")
local func2, static2, var2 = gmatch(text2, "%a+")
while true do
local word1 = func1(static1, var1)
local word2 = func2(static2, var2)
if word1 == nil then return true end
if word2 == nil then return false end
if word1 ~= word2 then
local lower1 = lower(word1)
local lower2 = lower(word2)
local noTones1 = export.removeDiacritics(lower1, true, false, false)
local noTones2 = export.removeDiacritics(lower2, true, false, false)
-- Compare base letters.
if noTones1 ~= noTones2 then
return export.compWord(noTones1, noTones2)
end
-- Compare letters case-insensitively.
if lower1 ~= lower2 then
return export.compWord(lower1, lower2)
end
-- Compare letters including tones.
assert(word1 ~= word2)
return export.compWord(word1, word2)
end
end
end
return text1 < text2
end
-- pruby variable for phien thiet hyperlinks (used by export.readings() and export.ruby())
local pruby = {}
---Abbreviations and text for Han tu references (used by export.createRefTag())
---]
export.refAbbreviations = {
tvctdhv = "Trần (1999)";
hvttd = "Nguyễn (1974)";
vntd = "Văn Mới (1954)";
tchvtd = "Thiều Chửu (1942)";
tdcndg = "Nguyễn (2014)",
tdcntd = "Nguyễn et al. (2009)",
gdhn = "Trần (2004)",
dtdcn = "Vũ (1998)",
btcn = "Hồ (1976)",
bonet = "Bonet (1899)",
genibrel = "Génibrel (1898)",
taberd = "Taberd & Pigneau de Béhaine (1838)",
}
---Creates a ref tag containing ].
---Expands abbreviations using export.refAbbreviations.
function export.createRefTag(ref)
local refFullName = export.refAbbreviations or ref
return mw.getCurrentFrame():extensionTag("ref", format("{{vi-ref|%s.}}", refFullName), {name = ref})
end
---]
function export.readings(hanviet, nom, rs, phienthiet, reading)
local pagename = mw.title.getCurrentTitle().text
if type(hanviet) == "table" then
local args = hanviet:getParent().args
hanviet, nom, rs, phienthiet, reading =
args.hanviet or args.hv, args.nom or args.n, args.rs or args.sort,
args.phienthiet or args.phth or args.fanqie, args.reading or args.readings
end
local lines = {}
local styles = {
{
link = "Hán Việt",
cat = "Vietnamese Chữ Hán",
list = hanviet and mw.text.split(hanviet, "%s*,%s*"),
phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*")
},
{
link = "chữ Nôm|Nôm",
cat = "Vietnamese Nom",
list = nom and mw.text.split(nom, "%s*,%s*"),
},
{
link = "Hán Nôm",
cat = "Vietnamese Han characters with unconfirmed readings",
list = reading and mw.text.split(reading, "%s*,%s*")
},
}
for i, style in ipairs(styles) do
if style.list and #style.list > 0 and #style.list > 0 then
local readings = style.list
-- table.sort(readings, export.comp)
for j, reading in ipairs(readings) do
local ref
local a, b = match(reading, "(.-)%s*%-%s*(.+)")
if a then
reading, ref = a, b
end
local spellings = export.allSpellings(reading, true)
readings = table.concat(spellings, "/")
-- Linking of "切" to "fanqie" for English explanation
if style.phienthiet and style.phienthiet then
pruby = "link"
local ruby = export.ruby(match(mw.text.trim(style.phienthiet),
"(%a+) +(.+)"))
pruby = {}
if ruby then
pruby = "nocolor"
local suffix = export.ruby("切", "thiết")
pruby = {}
readings = format("%s (%s])",
readings, ruby, suffix)
end
end
-- References
if ref then
for ref in mw.text.gsplit(ref, "%s*;%s*") do
readings = readings .. export.createRefTag(ref)
end
end
end
if #readings > 0 then
local sortkey = rs or mw.title.getCurrentTitle().text
readings = table.concat(readings, ", ")
table.insert(lines, format("<span class='Hani' lang='vi' style='font-size: 135%%;'>%s</span>: ''']''' readings: %s] ] ]</br>",
pagename, style.link, readings, style.cat, sortkey))
end
end
end
return table.concat(lines, "\n")
end
---]
function export.ruby(characters, readings, mark, alts)
if type(characters) == "table" then
local args = characters:getParent().args
characters, readings, mark, alts =
args or "",
args or "",
args.mark or mw.title.getCurrentTitle().text,
((args.alts and mw.text.split(args.alts, "%s+")) or
(args.ids and mw.text.split(args.ids, "%s+")) or {})
end
if not readings then
return characters
end
readings = mw.text.split(readings, "+")
local result = {}
local character_idx = 1
local alt_idx = 1
for character in gmatch(characters, ".") do
local is_alt = false
if character == "*" and alts then
character = alts
is_alt = true
alt_idx = alt_idx + 1
end
if is_alt or (match(character, "^%a$") and not character:match("^%w$")) then
local reading = readings
if mark and character == mark then
character = format("<mark>%s</mark>", character)
reading = format("<mark>%s</mark>", reading)
end
if pruby == 'link' then
character = format(
"<ruby><rb><span class='Hani'; span style='font-size: 100%%'>]</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em; font-size: 135%%;'>]</span></rt><rp>)</rp></ruby>",
character, character, reading, reading)
end
if pruby == 'nocolor' then
character = format(
"<ruby><rb><span class='Hani' style='color:#000000;'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em; font-size: 125%%;'>%s</span></rt><rp>)</rp></ruby>",
character, reading)
end
if pruby ~= 'link' and pruby ~= 'nocolor' then
character = format(
"<ruby><rb><span class='Hani'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em;'>%s</span></rt><rp>)</rp></ruby>",
character, reading)
end
character_idx = character_idx + 1
end
table.insert(result, character)
end
return format("<span lang='vi' style='font-size: 135%%;'>%s</span>", table.concat(result))
end
function export.hantutab()
local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
local hantu = gsub(mw.title.getCurrentTitle().text, '', '')
local table_head = '<table class="floatright wikitable" style="text-align:center; font-size:small;"><tr><th colspan="' ..
len(hantu) ..
'" style="font-weight:normal;">] in this term</th></tr><tr lang="vi" class="Hani" style="font-size:2em; background:white; line-height:1em;">'
return table_head ..
gsub(hantu, '(.)', '<td style="padding:0.5em;">]</td>') ..
'</tr></table>'
end
---Returns the categories indicated by the given wikitext.
function export.classifierCategories(frame)
local src = frame.args
local classifiers = {}
for classifier in gmatch(gsub(src, "<->", ""), "+") do
if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and
classifier ~= "Vietnamese" then
local cat = format("]",
classifier)
table.insert(classifiers, cat)
end
end
return table.concat(classifiers)
end
function export.new(frame)
local title = mw.title.getCurrentTitle().subpageText
local args = frame:getParent().args
local pos = args or ""
local def = args or "{{rfdef|vi}}"
local pos2 = args or (args and "" or false)
local def2 = args or "{{rfdef|vi}}"
local pos3 = args or (args and "" or false)
local def3 = args or "{{rfdef|vi}}"
local etym = args or false
local head = args or false
local cat = args or false
local reg = args or false
local cls = args or false
local rdp = args or false
local nom = args or false
local pic = args or false
local picc = args or false
nom = nom and gsub(nom, "(.)", "], ") or false
nom = nom and gsub(nom, ", $", "") or false
if args then
etym = "{{vi-etym-sino|" .. args .. "}}."
end
if not etym and match(title, " ") then
etym = "{{com|vi"
for word in mw.text.gsplit(title, " ") do
etym = etym .. "|" .. word
end
etym = etym .. "}}."
end
if etym == "-" then etym = false end
if etym then etym = gsub(etym, "^%<", "From") end
local result = ""
local function genTitle(text)
local pos_title = {
= "Noun", = "Noun", = "Proper noun", = "Proper noun", = "Pronoun",
= "Verb", = "Verb", = "Adjective", = "Adjective", = "Adverb",
= "Preposition", = "Postposition", = "Conjunction",
= "Particle", = "Suffix",
= "Proverb", = "Idiom", = "Phrase", = "Interjection", = "Interjection",
= "Classifier", = "Classifier", = "Numeral", = "Abbreviation", = "Determiner"
};
return pos_title or upper(sub(text, 1, 1)) .. sub(text, 2, -1)
end
local function genHead(text)
local pos_head = {
= "noun", = "noun", = "proper noun", = "proper noun", = "verb", = "verb form", = "adj",
= "post", = "conj", = "particle", = "pronoun",
= "proverb", = "idiom", = "phrase", = "interj",
= "abbr", = "classifier", = "det"
};
return pos_head or text
end
local function other(class, title, args)
local code = ""
if class == "der" and args then
code = code .. "\n\n===" .. title .. "===\n{{col3|vi|" .. args
i = 2
while args do
code = code .. "|" .. args
i = i + 1
end
code = code .. "}}"
elseif args then
code = code .. "\n\n===" .. title .. "===\n* {{l|vi|" .. args .. "}}"
i = 2
while args do
code = code .. "\n* {{l|vi|" .. args .. "}}"
i = i + 1
end
end
return code
end
result = result .. "==Vietnamese=="
if args then result = result .. "\n{{wikipedia|lang=vi" ..
(args == "y" and "" or "|" .. args) .. "}}" end
if pic then result = result .. "\n[[File:" .. pic .. "|thumb|" ..
(picc or gsub(title, '^%l', upper) .. ".") .. "]]" end
result = result .. other("alt", "Alternative forms", args)
if etym then result = result .. "\n\n===Etymology===\n" .. etym end
result = result .. "\n\n===Pronunciation===\n{{vi-IPA}}"
result = result .. "\n\n===" .. genTitle(pos) .. "===\n{{vi-" .. genHead(pos) .. (head and ("|head=" .. head) or "") ..
((genHead(pos) == "noun" and cls) and "|cls=" .. cls or "") ..
(((genHead(pos) == "adj" or genHead(pos) == "verb" or genHead(pos) == "adv") and rdp) and "|rdp=" .. rdp or "") ..
(nom and "|" .. nom or "") ..
"}}\n\n# " .. def
result = result .. other("syn", "=Synonyms=", args)
result = result .. other("ant", "=Antonyms=", args)
result = result .. other("der", "=Derived terms=", args)
result = result .. other("also", "=See also=", args)
if pos2 then
result = result .. "\n\n===" .. genTitle(pos2) .. "===\n{{vi-" .. genHead(pos2) .. (head and ("|head=" .. head) or "") ..
((genHead(pos) == "noun" and cls) and "|cls=" .. cls or "") ..
(((genHead(pos2) == "adj" or genHead(pos2) == "verb" or genHead(pos2) == "adv") and rdp) and "|rdp=" .. rdp or "") ..
(nom and "|" .. nom or "") ..
"}}\n\n# " .. def2
end
if pos3 then
result = result .. "\n\n===" .. genTitle(pos3) .. "===\n{{vi-" .. genHead(pos3) .. (head and ("|head=" .. head) or "") ..
((genHead(pos) == "noun" and cls) and "|cls=" .. cls or "") ..
(((genHead(pos3) == "adj" or genHead(pos3) == "verb" or genHead(pos3) == "adv") and rdp) and "|rdp=" .. rdp or "") ..
(nom and "|" .. nom or "") ..
"}}\n\n# " .. def3
end
if cat then result = result .. "\n\n{{C|vi|" .. cat .. "}}" end
return result
end
function export.new_der(frame)
local title = mw.title.getCurrentTitle().subpageText
local data_module = require("Module:vi/vocab-list")
local args = frame:getParent().args
local result = {}
for _, arg in ipairs(args) do
table.insert(result, arg)
end
for _, word in ipairs(data_module) do
if find(word, title) and word ~= title and not find(word, title .. "") and not find(word, "" .. title) then
table.insert(result, word)
end
end
local hash, res = {}, {}
for _, element in ipairs(result) do
if not hash then
res = element
hash = true
end
end
local vi_sort_module = require("Module:vi-sortkey")
local makeSortKey = require("Module:fun").memoize(vi_sort_module.makeSortKey)
table.sort(res, function(term1, term2) return makeSortKey(term1) < makeSortKey(term2) end)
return "{{vi-der|" .. table.concat(res, "|") .. "}}"
end
function export.derived(frame)
local tu_lay_note = "<span style=\"padding-left:4px; padding-right:4px\"> </span><span style=\"background:#ffffe0\">('']'')</span>"
local m_columns = require("Module:columns")
local lang = require("Module:languages").getByCode("vi")
local m_links = require("Module:links")
local args = frame:getParent().args
local pagename = mw.title.getCurrentTitle().text
local result = {}
local length = 0
unfold = args and true or false
title = args or false
title_text = title or "Derived terms"
for i, word in ipairs(args) do
word, is_tu_lay = gsub(word, "%:tl", "")
tu_lay = is_tu_lay > 0 and tu_lay_note or ""
local word_parts = mw.text.split(gsub(word, "\n", "" ), ":")
table.insert(result, m_links.full_link({
lang = lang,
term = word_parts,
gloss = word_parts or nil }) ..
tu_lay)
length = math.max(len(word), length)
end
return
m_columns.create_table(
(length > 15 and 2 or 3),
result,
1,
"#F5F5FF",
((unfold or #result < 7) and false or true),
"Derived terms",
title_text,
nil,
nil,
lang
)
end
return export