This module will transliterate Lao language text per the LC (Library of Congress) scheme per WT:LO TR.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:lo-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
local initial_conv = {
= "k", = "kh", = "kh", = "ng",
= "ch", = "s", = "s", = "ny",
= "d", = "t", = "th", = "th", = "n",
= "b", = "p", = "ph", = "f", = "ph", = "f", = "m",
= "y", = "r", = "l", = "w",
= "h", = "ʼ", = "h",
= "ng",
= "ny",
= "n", = "n",
= "m", = "m",
= "r", = "r",
= "l", = "l",
= "w"
}
local vowel_conv = {
= "a", = "a",
= "i",
= "ư", = "u",
= "e", = "e",
= "æ", = "æ",
= "o", = "o",
= "ǫ", = "ǫ",
= "œ",
= "ia", = "ia",
= "ưa",
= "ua", = "ua", = "ua",
= "ai", = "ai", = "ai",
= "ao",
= "uau",
= "am", = "am",
= "uam",
= "ā",
= "āo",
= "ī",
= "ư̄",
= "ū",
= "ē",
= "ǣ",
= "ō",
= "ōi", = "ōi",
= "ǭ", = "ǭ",
= "ǭi", = "ǭi",
= "œ̄",
= "œ̄i", = "œ̄i",
= "īa", = "īa", = "īa",
= "ư̄a",
= "ūa", = "ūa",
= "uāi", = "uāi", = "uīan",
= "āi", = "āi",
= "uā",
= "uāi", = "uāi",
= "ǣu", -- ແ_ວ can bo both ǣu and uǣ. The first is more common.
= "īu", = "iu",
= "iāu",
= "uīu",
}
local coda_conv = {
= "k", = "k", = "k",
= "ng",
= "t", = "t", = "t",
= "ny",
= "t", = "t", = "t", = "t",
= "n",
= "p", = "p", = "p", = "p", = "p", = "p",
= "m",
= "y",
= "n", = "n",
= "w",
= "",
}
local sp_symbols = {
= "〃", = "〃",
= "",
= "0", = "1", = "2", = "3", = "4",
= "5", = "6", = "7", = "8", = "9"
}
local char_type = {
= 'coda', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'ambig',
= 'coda', = 'coda', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'coda', = 'coda', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'coda', = 'ambig',
= 'coda', = 'cons', = 'ambig', = 'cons',
= 'iter_symbol',
= 'vowel_let', = 'suf_vowel', = 'vowel_let', = 'suf_vowel',
= 'suf_vowel', = 'suf_vowel', = 'suf_vowel', = 'suf_vowel',
= 'suf_vowel', = 'suf_vowel', = 'suf_vowel',
= 'cons',
= 'vowel_let',
= 'pref_vowel', = 'pref_vowel',
= 'pref_vowel', = 'pref_vowel', = 'pref_vowel',
= 'iter_symbol',
= 'tone', = 'tone', = 'tone', = 'tone',
= 'canc_symbol', = 'suf_vowel',
= 'number', = 'number', = 'number', = 'number', = 'number',
= 'number', = 'number', = 'number', = 'number', = 'number',
= 'cons', = 'cons'
}
function export.tr(text, lang, sc)
text = gsub(text, '', '')
for lao_text in mw.ustring.gmatch(text, '+') do
local word, c, chartype, output = {}, {}, {}, {}
local curr_word, curr_initial, curr_vowel, curr_coda = {}, {}, {}, {}
local i = 1
local original_text = lao_text
for i = 1, len(lao_text) do
c = sub(lao_text, i, i)
chartype = char_type] or table.insert(word, c)
end
for i = 1, #c + 1 do
if chartype == 'pref_vowel' or i == #c + 1 then
if #curr_word ~= 0 then
table.insert(word, table.concat(curr_word))
curr_word, curr_initial, curr_vowel, curr_coda = {c}, {}, {c}, {}
else
table.insert(curr_vowel, c)
table.insert(curr_word, c)
end
elseif chartype == 'suf_vowel' then
table.insert(curr_vowel, c)
table.insert(curr_word, c)
elseif chartype == 'ambig' then
if #curr_initial ~= 0 and vowel_conv] and (chartype ~= 'suf_vowel' or match(c, '')) and #curr_coda == 0 then
table.insert(curr_vowel, c)
table.insert(curr_word, c)
elseif (#curr_initial == 0 and char_type == 'pref_vowel') or (#curr_initial ~= 0 and initial_conv]) then
table.insert(curr_initial, c)
table.insert(curr_word, c)
else
if #curr_word ~= 0 then
table.insert(word, table.concat(curr_word))
end
curr_word, curr_initial, curr_vowel, curr_coda = {c}, {c}, {}, {}
end
elseif chartype == 'vowel_let' then
table.insert(curr_vowel, c)
table.insert(curr_word, c)
elseif chartype == 'coda' and #curr_coda == 0 and #curr_initial ~= 0 and chartype ~= 'suf_vowel' and chartype ~= 'vowel_let' and not (chartype == 'ambig' and match(chartype or '', 'co')) and table.concat(curr_vowel) ~= "ວີວ" then
table.insert(curr_coda, c)
table.insert(curr_word, c)
elseif chartype == 'cons' or chartype == 'coda' then
if #curr_coda == 0 and initial_conv] and (#curr_vowel == 0 or char_type == 'pref_vowel') then
table.insert(curr_initial, c)
table.insert(curr_word, c)
else
table.insert(word, table.concat(curr_word))
curr_word, curr_initial, curr_vowel, curr_coda = {c}, {c}, {}, {}
end
elseif chartype == 'iter_symbol' then
if #curr_word ~= 0 then
table.insert(word, table.concat(curr_word))
end
curr_word, curr_initial, curr_vowel, curr_coda = {c}, {}, {}, {}
elseif chartype == 'canc_symbol' then
table.insert(curr_word, c)
elseif chartype == 'number' then
table.insert(curr_word, sp_symbols])
end
end
for i = 1, #word do
word = gsub(word, '^(?)(ຫ?ຼ?)(*)(໌?)$', function(a, b, c, e)
local d = ""
if match(sub(c, -1, -1), '') then
d = sub(c, -1, -1)
c = sub(c, 1, -2)
end
if a..c == '' then
c = 'ະ'
end
local vowel = vowel_conv or (vowel_conv or a .. c) .. (coda_conv or d)
if match(vowel, '') then
vowel = gsub(vowel, '^(.*)()(.*)$', function(x, y, z)
return (vowel_conv or x) .. ' ' .. (initial_conv or y) .. (vowel_conv or z) end)
end
return (initial_conv or b) .. vowel .. e end)
if char_type] == 'iter_symbol' and i >= 2 then
word = '<small><u>' .. word .. '</u></small>'
end
if match(word, '໌') and len(word) > 1 then
word = gsub(word, '(.)໌', '<small><del>%1</del></small>')
end
table.insert(output, word)
end
lao_text = table.concat(output, " ")
lao_text = gsub(lao_text, '.', sp_symbols)
text = gsub(text, original_text, lao_text)
end
return text
end
return export