This module will transliterate Lao language text per the LC (Library of Congress) scheme per WT:LO TR.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:lo-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local export = {}
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
-- Mapping of initial consonants.
local initial_conv = {
= 'k', = 'kh', = 'kh', = 'ng',
= 'ch', = 's', = 's', = 'ny',
= 'd', = 't', = 'th', = 'th', = 'n',
= 'b', = 'p', = 'ph', = 'f', = 'ph', = 'f', = 'm',
= 'y', = 'r', = 'l', = 'w',
= 'h', = 'ʼ', = 'h',
= 'ng',
= 'ny',
= 'n', = 'n',
= 'm', = 'm',
= 'r',
= 'l', = 'l',
= 'w',
= 'kr', = 'kl',
= 'khr', = 'khr', = 'khl', = 'khl',
= 'pr', = 'pl',
= 'phr', = 'fr', = 'phl', = 'fl',
= 'dr', = 'tr'
}
-- Mapping of glides.
local glide_conv = {
= 'r'
}
-- Mapping of vowel combinations.
local vowel_conv = {
= 'a', = 'a',
= 'i',
= 'ư', = 'u', = 'ui',
= 'e', = 'e',
= 'æ', = 'æ',
= 'o', = 'o',
= 'ǫ', = 'ǫ',
= 'œ',
= 'ia', = 'ia',
= 'ưa',
= 'ua', = 'ua', = 'ua',
= 'ai', = 'ai', = 'ai',
= 'ao',
= 'uau',
= 'am', = 'am',
= 'uam',
= 'ā',
= 'āo',
= 'ī',
= 'ư̄',
= 'ū',
= 'ē',
= 'ǣ',
= 'ō',
= 'ōi', = 'ōi',
= 'ǭ', = 'ǭ',
= 'ǭi', = 'ǭi',
= 'œ̄',
= 'œ̄i', = 'œ̄i',
= 'īa', = 'īa', = 'īa',
= 'ư̄a', = 'ư̄ai',
= 'ūa', = 'ūa',
= 'uāi', = 'uāi',
= 'āi', = 'āi',
= 'uā',
= 'uāi', = 'uāi',
= 'ǣu', -- ແ_ວ can either be ǣu and uǣ with the first one being more common.
= 'īu', = 'iu',
= 'iāu',
= 'uīu',
}
-- Mapping of coda consonants.
local coda_conv = {
= 'k', = 'k', = 'k',
= 'ng',
= 't', = 't',
= 't', = 't', = 't', = 't',
= 's',
= 'n',
= 'p', = 'p', = 'p', = 'p',
= 'm',
= 'y',
= 'n', = 'n',
= 'w',
= '',
}
-- Special symbols.
local sp_symbols = {
= '〃', = '〃',
= '',
= '0', = '1', = '2', = '3', = '4',
= '5', = '6', = '7', = '8', = '9'
}
-- List of character types.
local char_type = {
= 'coda', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'ambig',
= 'coda', = 'coda', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'cons', = 'cons', = 'coda', = 'coda', = 'coda',
= 'coda', = 'coda', = 'coda', = 'ambig',
= 'coda', = 'cons', = 'ambig', = 'cons',
= 'cons', = 'cons',
= 'iter_symbol',
= 'vowel_let', = 'suf_vowel', = 'vowel_let', = 'suf_vowel',
= 'suf_vowel', = 'suf_vowel', = 'suf_vowel', = 'suf_vowel',
= 'suf_vowel', = 'suf_vowel', = 'suf_vowel',
= 'glide',
= 'vowel_let',
= 'pref_vowel', = 'pref_vowel',
= 'pref_vowel', = 'pref_vowel', = 'pref_vowel',
= 'iter_symbol',
= 'tone', = 'tone', = 'tone', = 'tone',
= 'canc_symbol', = 'suf_vowel',
= 'number', = 'number', = 'number', = 'number', = 'number',
= 'number', = 'number', = 'number', = 'number', = 'number'
}
-- List of consonant classes
local cons_class = {
= 'mid', = 'high', = 'low', = 'low',
= 'mid', = 'high', = 'low', = 'low',
= 'mid', = 'mid', = 'high', = 'low', = 'low',
= 'mid', = 'mid', = 'high', = 'high', = 'low', = 'low', = 'low',
= 'mid', = 'low', = 'low', = 'low',
= 'high', = 'mid', = 'low'
}
-- Reset the syllable table.
local function reset_syllable()
return { curr = {}, initial = {}, glide = {}, vowel = {}, tone = {}, coda = {}, sp = {} } -- current (i.e. full syllable), initial, vowel, tone, coda, sp(ecial)
end
-- Store the current syllable, then reset the syllable table.
local function store_and_reset(syllables, curr_syll)
table.insert(syllables, {
curr = curr_syll.curr,
initial = curr_syll.initial,
glide = curr_syll.glide,
vowel = curr_syll.vowel,
tone = curr_syll.tone,
coda = curr_syll.coda,
sp = curr_syll.sp
})
return reset_syllable()
end
-- Split the entry into individual syllables.
function export.split_syll(text, debug)
-- Store the split syllables.
local syllables = {}
local debug_syllables = {}
local curr_syll = reset_syllable()
-- Iterate through Lao characters.
for lao_text in gmatch(text, '+') do
local c, c_types = {}, {}
-- Classify each character in the syllable.
for i = 1, len(lao_text) do
c = sub(lao_text, i, i)
c_types = char_type]
end
-- Parse the entry by identifying each character's type.
for i = 1, #c + 1 do
local type_curr, type_next = c_types, c_types
local curr_vowel_full = table.concat(curr_syll.vowel)
-- Prefix vowels are always the start of a new syllable.
if type_curr == 'pref_vowel' or i == #c + 1 then
if #curr_syll.curr ~= 0 then
curr_syll = store_and_reset(syllables, curr_syll)
end
table.insert(curr_syll.vowel, c)
table.insert(curr_syll.curr, c)
-- Glide consonants always follow the initial consonant.
elseif type_curr == 'glide' then
table.insert(curr_syll.glide, c)
table.insert(curr_syll.curr, c)
-- Suffix vowels and vowel letters are always part of the same syllable.
elseif type_curr == 'suf_vowel' or type_curr == 'vowel_let' then
table.insert(curr_syll.vowel, c)
table.insert(curr_syll.curr, c)
-- Same with tone marks.
elseif type_curr == 'tone' then
table.insert(curr_syll.tone, c)
table.insert(curr_syll.curr, c)
-- Some consonants can end a syllable.
elseif type_curr == 'coda' then
if #curr_syll.coda == 0 and initial_conv] and (#curr_syll.vowel == 0 or char_type == 'pref_vowel') then
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
elseif #curr_syll.coda == 0 and #curr_syll.initial ~= 0 and (type_next ~= 'glide' and type_next ~= 'suf_vowel' and type_next ~= 'vowel_let' and type_next ~= 'tone')
and not (type_next == 'ambig' and match(c_types, 'co'))
and not ((c_types ~= 'tone' and c_types ~= 'suf_vowel' and c ~= 'ອ') and type_next == 'ambig' and match(c, '')) then
table.insert(curr_syll.coda, c)
table.insert(curr_syll.curr, c)
else
curr_syll = store_and_reset(syllables, curr_syll)
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
end
-- However, some consonants can only start a syllable.
elseif type_curr == 'cons' then
if #curr_syll.coda == 0 and initial_conv] and (#curr_syll.vowel == 0 or char_type == 'pref_vowel') then
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
else
curr_syll = store_and_reset(syllables, curr_syll)
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
end
-- Ambiguous characters can both start or end a syllable.
elseif type_curr == 'ambig' then
if #curr_syll.curr > 0 and c == 'ອ' and type_next == 'suf_vowel' then
curr_syll = store_and_reset(syllables, curr_syll)
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
elseif #curr_syll.initial == 0 or char_type == 'pref_vowel' then
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
elseif c == 'ຍ' and c == 'າ' then -- quick hack (FIXME)
table.insert(curr_syll.vowel, c)
table.insert(curr_syll.curr, c)
elseif c == 'ຍ' and c ~= 'ຫ' and #curr_vowel_full == 0 then
curr_syll = store_and_reset(syllables, curr_syll)
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
elseif #curr_syll.initial ~= 0 and (#curr_vowel_full == 0 or vowel_conv] and
(type_next ~= 'glide' and type_next ~= 'suf_vowel' and type_next ~= 'vowel_let' and type_next ~= 'tone')) then
table.insert(curr_syll.vowel, c)
table.insert(curr_syll.curr, c)
else
curr_syll = store_and_reset(syllables, curr_syll)
table.insert(curr_syll.initial, c)
table.insert(curr_syll.curr, c)
end
-- Iteration and cancel symbols should be treated as part of the same syllable.
elseif type_curr == 'iter_symbol' or 'canc_symbol' then
table.insert(curr_syll.curr, c)
table.insert(curr_syll.sp, c)
-- However, numbers should be treated in their own syllable.
elseif type_curr == 'number' then
if curr_syll.initial ~= 0 or curr_syll.glide ~= 0 or curr_syll.vowel ~= 0 or curr_syll.tone ~= 0 or curr_syll.coda ~= 0 then
curr_syll = store_and_reset(syllables, curr_syll)
end
table.insert(curr_syll.curr, c)
table.insert(curr_syll.sp, c)
end
end
end
-- For debug mode, return concatenated `curr` values.
if debug then
for _, syll in ipairs(syllables) do
table.insert(debug_syllables, table.concat(syll.curr))
end
return table.concat(debug_syllables, '-')
-- Otherwise, return full syllable information.
else
return syllables
end
end
-- Generate the transliteration of a Lao entry given the split syllables.
function export.tr(text, lang, sc)
-- Split the entry into syllables.
local syllables = export.split_syll(text, false)
-- Store the transliteration.
local translit = {}
-- Iterate through each syllable.
for _, syllable in ipairs(syllables) do
-- Handle various edge cases.
if table.concat(syllable.initial) == 'ຫ' and table.concat(syllable.glide) == 'ຼ' then -- ຫຼ
syllable.initial = {'ຫ', 'ຼ'}
syllable.glide = {}
end
if table.concat(syllable.initial) ~= '' and table.concat(syllable.vowel) == '' then -- null vowel is pronounced like ະ given an initial consonant
syllable.vowel = {'ະ'}
end
-- Handle cases where ambiguous vowels are put in the initial consonant position when it really should be a vowel.
if #syllable.initial > 1 and syllable.initial == 'ວ' then
table.remove(syllable.initial)
table.insert(syllable.vowel, 'ວ')
end
if #syllable.initial > 1 and syllable.initial == 'ຍ' then
table.remove(syllable.initial)
table.insert(syllable.vowel, 'ຍ')
end
-- Handle cases where ຍ is in the vowle position but should be in initially position with ຫ.
if #syllable.vowel > 1 and syllable.vowel == 'ຍ' and syllable.initial == 'ຫ' then
table.remove(syllable.vowel, 1)
table.insert(syllable.initial, 'ຍ')
end
-- Map consonants, glides, vowels and codas mapped to their transliterations.
local initial = initial_conv or ''
local glide = glide_conv or ''
local vowel = vowel_conv or ''
local coda = coda_conv or ''
-- Special symbols can just be added directly.
local sp = ''
for c in gmatch(table.concat(syllable.curr), ".") do
sp = sp .. (sp_symbols or '')
end
-- ແ_ວ is uǣ with certain initial consonants (ກຂຄງຈສຊຖທລອຮ) plus a coda.
if match(table.concat(syllable.initial), '') and match(table.concat(syllable.vowel), 'ແວ') and coda ~= '' then
vowel = 'uǣ'
end
-- _ວຍ is ūai when the initial consonant is ຫ.
if match(table.concat(syllable.initial), 'ຫ') and match(table.concat(syllable.vowel), 'ວຍ') then
vowel = 'ūai'
end
-- _ວຽ is uīa when the coda is ນ.
if match(table.concat(syllable.coda), 'ນ') and match(table.concat(syllable.vowel), 'ວຽ') then
vowel = 'uīa'
end
-- Construct the transliterated syllable string.
local syll_string = initial .. glide .. vowel .. coda .. sp
-- Check if '໌' is present, which indicates a cancel symbol.
if match(table.concat(syllable.sp), '໌') then
syll_string = gsub(syll_string, '.$', '<small><del>%0</del></small>')
end
-- Then check if ຯ or ໆ is present, which indicates an iteration symbol.
if match(table.concat(syllable.sp), '') and (initial ~= '' or glide ~= '' or vowel ~= '' or coda ~= '') then
-- Add the transliteration of the syllable to the list with another small underlined version.
syll_string = gsub(syll_string, '〃', '')
table.insert(translit, syll_string)
table.insert(translit, '<small><u>' .. syll_string .. '</u></small>')
else
-- Add the transliteration of the syllable to the list only once.
table.insert(translit, syll_string)
end
end
-- Return the transliteration as a concatenated string.
return table.concat(translit, ' ')
end
return export