This module will transliterate Hittite language text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:hit-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local export = {}
local bit32 = require('bit32')
local m_tag = require('Module:hit-translit/tag')
local sign_list = mw.loadData('Module:hit-translit/data')
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local segments = {
-- vowels
= 'a',
= 'a',
= 'a',
= 'e',
= 'e',
= 'e',
= 'i',
= 'i',
= 'i',
= 'u',
= 'u',
= 'u',
-- consonants with voicing alternaternates
= 'b',
= 'p',
= 'd',
= 't',
= 'g',
= 'k',
= 'q',
-- single consonants
= 'h',
= 'r',
= 'l',
= 'm',
= 'n',
= 's',
= 'z',
= 'y',
= 'w',
-- numbers
= '0',
= '1',
= '2',
= '3',
= '4',
= '5',
= '6',
= '7',
= '8',
= '9',
}
--[=[
-- Set up bit array to for marking which onsets and codas are available for ambiguous characters
]=]
local sort_order = {
-- vowels
= 2 ^ 0,
= 2 ^ 1, -- I've chosen "i" over "e"
= 2 ^ 2,
= 2 ^ 3,
-- consonants with voicing alternaternates
= 2 ^ 4,
= 2 ^ 5,
= 2 ^ 6,
= 2 ^ 7,
= 2 ^ 8,
= 2 ^ 9,
= 2 ^ 10,
-- single consonants
= 2 ^ 11,
= 2 ^ 12,
= 2 ^ 13,
= 2 ^ 14,
= 2 ^ 15,
= 2 ^ 16,
= 2 ^ 17,
= 2 ^ 18,
= 2 ^ 19,
-- numbers
= 2 ^ 20,
= 2 ^ 21,
= 2 ^ 22,
= 2 ^ 23,
= 2 ^ 24,
= 2 ^ 25,
= 2 ^ 26,
= 2 ^ 27,
= 2 ^ 28,
= 2 ^ 29,
}
local function inplace_multikey_sort(t)
-- Sorts a table inplace by the onset and then coda
table.sort(t, function(a, b)
if a.o ~= b.o then
return sort_order < sort_order
end
return sort_order < sort_order
end)
return t
end
local function find_seg(syl, rev)
-- [=[
-- A helper function that iterates forwards or backwards (if "rev" is set)
-- in order to find the first phonetic segment and return the normalized
-- form of that segment. Thus:
-- find_seg("šaq") gives "s"
-- find_seg("luḫ", true) gives "h"
-- -- ]=]
local f
for i = 1, ulen(syl) do
f = usub(syl, rev and -i or i, rev and -i or i)
if segments then --
return segments
end
end
error('Could not find a ' .. (rev and 'coda' or 'onset') .. ' for the syllable "' .. syl .. '".')
end
function export.find_onset(syl)
-- [=[
-- Find the normalized onset character of a syllable
-- -- ]=]
return find_seg(syl)
end
function export.find_coda(syl)
-- [=[
-- Find the normalized coda character of a syllable
-- -- ]=]
return find_seg(syl, true)
end
function export.hash_sign(sign)
-- [=[
-- Turn the list of Hittite syllables into a list of list containing:
-- The syllable
-- The normalized onset character of the syllable
-- The normalized coda character of the syllable
-- And add a hashes of the onsets and codas in the syllables. Thus
-- { "it", "id", "et", "ed", hit = true }
-- becomes:
-- {
-- { "it", o = "i", c = "t" },
-- { "id", o = "i", c = "d" },
-- { "et", o = "e", c = "t" },
-- { "ed", o = "e", c = "d" },
-- o_hash = 6, c_hash = 192, hit = true
-- }
-- -- ]=]
local ret = {o_hash = 0, c_hash = 0, hit = true} -- init onset and coda hashes for signs
for i, syl in ipairs(sign) do
ret = {syl, o = export.find_onset(syl), c = export.find_coda(syl)}
ret.o_hash = bit32.bor(ret.o_hash, sort_order.o])
ret.c_hash = bit32.bor(ret.c_hash, sort_order.c])
end
return ret
end
function export.copy_sign(sign)
-- sort and set up new sign
local new = sign_list
if new.hit then -- has Hittite signs
new = export.hash_sign(new)
inplace_multikey_sort(new)
end
return new
end
local function remove_syls(first, second, mask)
-- [=[
-- For two adjacent sets of Hittite syllables and a mask of their shared characters,
-- go through each one and remove the unnecessary values, and update the hashes.
-- -- ]=]
local new_o_hash, new_c_hash, new_first, new_second = 0, 0, { hit = true }, { hit = true }
for _, syl in ipairs(first) do
if bit32.band(sort_order, mask) > 0 then
table.insert(new_first, syl)
new_o_hash = bit32.bor(new_o_hash, sort_order) -- unnecessary, but useful for tracking
end
end
new_first.o_hash = new_o_hash
new_first.c_hash = mask
for _, syl in ipairs(second) do
if bit32.band(sort_order, mask) > 0 then
table.insert(new_second, syl)
new_c_hash = bit32.bor(new_c_hash, sort_order)
end
end
new_second.o_hash = mask
new_second.c_hash = new_c_hash
return new_first, new_second
end
local related_character_masks = {
-- voicing alternates
bit32.bor(sort_order, sort_order),
bit32.bor(sort_order, sort_order),
bit32.bor(sort_order, sort_order, sort_order),
-- "u" patterns next to "w"
bit32.bor(sort_order, sort_order),
-- numbers pattern together
bit32.bor(sort_order, sort_order, sort_order, sort_order, sort_order,
sort_order, sort_order, sort_order, sort_order, sort_order),
}
local function approx_match(first_hash, second_hash)
-- [=[
-- Builds a bit mask for all approximate matches like "p" and "b", or "t" and "d".
-- -- ]=]
local new_mask = 0
for _, mask in ipairs(related_character_masks) do
if bit32.band(mask, first_hash) > 0 and bit32.band(mask, second_hash) > 0 then
new_mask = bit32.bor(new_mask, mask)
end
end
return new_mask
end
function export.fit_signs(first, second)
-- [=[
-- Takes two adjacent signs and removes unlikely Hittite syllables.
-- -- ]=]
if first and second then -- two signs
if first.hit and second.hit then -- both have Hittite syllables
local match_mask = bit32.band(first.c_hash, second.o_hash)
if match_mask > 0 then -- there are matching chars in each
return remove_syls(first, second, match_mask)
end
match_mask = approx_match(first.c_hash, second.o_hash)
if match_mask > 0 then -- there are approxiamtely matching chars in each
return remove_syls(first, second, match_mask)
end
end
elseif first then -- final sign
-- nothing yet
else -- initial sing
-- nothing yet
end
return first, second
end
local function assemble_word(signs)
-- [=[
-- Choose all the signs, tag when appropriate, then concatenate
-- -- ]=]
local word = {}
for _, sign in ipairs(signs) do
if sign.hit then -- If Hittite, take first sign
table.insert(word, sign)
elseif sign.sum then -- If Sumerogram, take and tag first sign
table.insert(word, m_tag.tag_sumerogram(sign))
elseif sign.akk then -- If Akkadogram, take and tag first sign
table.insert(word, m_tag.tag_akkadogram(sign))
elseif sign.hurr then -- If Hurrian, take and tag first sign
table.insert(word, m_tag.tag_hurrian_tr(sign))
elseif sign.hatt then -- If Hattic, take and tag first sign
table.insert(word, m_tag.tag_hattic_tr(sign))
elseif sign.glossenkeil then -- If Glossenkeil, display it
table.insert(word, m_tag.glossenkeil())
end
end
return table.concat(word, '-')
end
function export.transpose(text)
-- [=[
-- Takes a continuous Cuneiform string and converts it to transliteration
-- -- ]=]
local signs = {}
while ulen(text) > 0 do
if sign_list then
table.insert(signs, export.copy_sign(usub(text, 1, 3))) -- add in new sign
text = usub(text, 4) -- truncate string
elseif sign_list then
table.insert(signs, export.copy_sign(usub(text, 1, 2)))
text = usub(text, 3)
elseif sign_list then
table.insert(signs, export.copy_sign(usub(text, 1, 1)))
text = usub(text, 2)
end
signs, signs = export.fit_signs(signs, signs) -- fit two signs
end
signs = export.fit_signs(signs, nil) -- fit end of word
return assemble_word(signs)
end
function export.tr(text, lang, sc)
if sc ~= "Xsux" then
return nil
end
text = mw.ustring.gsub(text, '+', export.transpose)
return m_tag.tag_hittite_tr(text)
end
return export