This module will transliterate Hebrew language text per WT:HE TR.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:he-translit/testcases.
tr(text, lang, sc)
text
written in the script specified by the code sc
, and language specified by the code lang
.nil
.local export = {}
--Contributors: Malku H₂n̥rés, Sartma, Erutuon, Metaknowledge
local m_str_utils = require("Module:string utilities")
local gcodepoint = m_str_utils.gcodepoint
local match = m_str_utils.match
local s = m_str_utils.gsub
local U = m_str_utils.char
local bidirectional_control_characters =
U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E)
.. U(0x2066) .. "-" .. U(0x2069)
local word_end = "%f"
local word_start = "%f"
-- Bidirectional control characters should be avoided as much as possible,
-- but they are easily picked up when copying and pasting, so the module needs
-- to account for them.
-- This list is from ].
local V = "?́?"
local C = ""
local c = { --direct translit
--full char ie. C
= "ʔ",
= "ḇ",
= "ḡ",
= "ḏ",
= "h",
= "w",
= "z",
= "ḥ",
= "ṭ",
= "y",
= "l",
= "s",
= "ʕ",
= "q",
= "r",
= "ß",
= "ṯ",
--miscellaneous:
= "'", --geresh
= "-", --hyphen
= " .", --dot
= "ˊ", --sin dot
= "ˇ", --shin dot
= "·", --dagesh
= "^", --oleh
= "+", --meteg
--niqqud ie. V
= "a",
= "ɔ",
= "ɛ",
= "e",
= "i",
= "ɔ̆",
= "ă",
= "ɛ̆",
= "ə",
= "o",
= "u",
= "ɔ",
}
local b = { --BH
--when different final form
{"", "ḵ"},
{"", "m"},
{"", "n"},
{"", "f"},
{"", "ṣ"},
{"(" .. V .. ")(·?)(+?)(^?)(?'?)", "%5%2%1%4%3"}, --order: s(h)in dot, geresh, dagesh, vowel (niqqud), oleh, meteg
--bgdkft: fricative + dagesh > stop
{"ḇ·", "b"},
{"ḡ·", "g"},
{"ḏ·", "d"},
{"ṯ·", "t"},
{"ḵ·", "k"},
{"f·", "p"},
--s(h)in dot
{"ß(·?)ˇ", "š%1"},
{"ß(·?)ˊ", "ś%1"},
--vowel lengthenings
{"i(?)y", "ī%1"}, --V > long / _{jw}{no V no dagesh}
{"ī(?" .. V .. ")", "iy%1"},
{"ī·", "iy·"},
{"e(?)y", "ē%1"},
{"ē(?" .. V .. ")", "ey%1"},
{"ɛ(?)y", "E%1"}, --see E > ɛ̄ below
{"E(?" .. V .. ")", "ɛy%1"},
{"(" .. C .. "·?)wo", "%1ō"},
{"(" .. V .. "?)w·", "%1U"},
{"w·", "ū"},
{"U", "w·"},
{"(" .. C .. "·?)y·", "%1ī"},
--h > circumflex / V_{no V no dagesh}
{"(" .. V .. "?)h", "%1H"},
{"H(" .. V .. ")", "h%1"},
{"H·", "h"},
{"e(?)H", "ê%1"},
{"o(?)H", "ô%1"},
{"ɛ(?)H", "ɛ̂%1"},
{"ɔ(?)H", "ɔ̂%1"},
{"a(?)H", "â%1"},
{"(" .. V .. "?%s?)(.)·(%s?" .. V .. ")", "%1%2%2%3"}, --dagesh gemination
{"", ""}, --deletion of unpointed s(h)ins and useless dageshim
--schwa: Ə means "kept"
{"ə" .. word_end, ""},
{"ə()", "Ə%1"},
{"()(" .. C .. ")ə", "%1%2Ə"},
{"E", "ɛ̄"}, --see >E above
{"(" .. C .. "ə?" .. C .. ")ə", "%1Ə"},
{"(" .. C .. ")Ə(" .. C .. ")()", "%1ə%2Ə"},
{word_start .. "(?a?" .. C .. ")ə", "%1Ə"},
{"ə", ""},
{"Ə", "ə"},
{"()a(" .. word_end .. ")", "^a%1%2"}, --final /a/-guttural inversion
--penultimate stress: segolates & -áyiC
{"(" .. C .. ")(%+?".. C .. "ɛ" .. C .. ")" .. word_end, "%1^%2"},
{"(" .. C .. "a)(%+?".. C .. C .. "?a" .. C ..")" .. word_end, "%1^%2"},
{"ayi(" .. C .. ")" .. word_end, "a^yi%1"},
--stress marking
{"a^", "á"},
{"e^", "é"},
{"i^", "í"},
{"o^", "ó"},
{"u^", "ú"},
{"ɛ^", "ɛ́"},
{"ɔ^", "ɔ́"},
{"ā^", "ā́"},
{"ē^", "ḗ"},
{"ī^", "ī́"},
{"ō^", "ṓ"},
{"ū^", "ū́"},
{"ɛ̄^", "ɛ̄́"},
{"ɔ̄^", "ɔ̄́"},
{"ê^", "ế"},
{"ô^", "ố"},
{"ɛ̂^", "ɛ̂́"},
{"ɔ̂^", "ɔ̂́"},
{"ɔyw(" .. word_end .. ")", "ɔw%1"}, --irregular…
{"(" .. V .. "?)()(" .. V .. ")", "%1%2%2%3"}, --dagesh bgdkft gemination
{"f", "p̄"}, --bc p̄ are 2 chars
{"%s%.", "."}, --quotes: " ." > "." (esthetics)
}
--MH
local m = { --direct change
= "d",
= "g",
= "s",
= "″", --gershayim
= "k",
= "i",
= "u",
= "^", --stress marking conversion below
}
local l = {
--indirect
{"p̄", "f"},
{"", ""},
{"ḥ'", "ḫ"},
{"ṯ'", "T"},
{"ṭ'", "ẓ"},
{"g'", "j"},
{"z'", "Z"},
{"ṣ'", "C"},
{"d'", "D"},
{"'", "ġ"},
{"(.)%1", "%1"},
{"", "v"},
{"", "K"},
{"", "t"},
{"'", ""},
{"", "'"},
--above: loss of vowel length, loss of gemination, turning n-grams into 1 char, MH mergers.
--schwa
--prefixes
-- {word_start .. "()ə", "%1e"},
-- {"(u)ə", "%1e"},
--initial C clusters
{word_start .. "()ə", "%1e"},
{word_start .. "(" .. C .. ")ə()", "%1e%2"},
--internal
{"(" .. C .. ")ə", "%1e"},
{"(" .. C .. C .. ")ə", "%1e"},
{"", ""}, --deletion of remaining schwa and metegim
--put here not above to avoid e/ə confusion
{"", "a"},
{"", "e"},
{"", "o"},
{"", "é"},
{"", "ó"},
{"(" .. word_start .. "-)(" .. C .. "?" .. C .. "?)" .. word_end, "%1^%2"}, --module-explicit default final stress...
--same articulation > schwa insertion
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"()()", "%1e%2"},
{"(K)(K)", "%1e%2"},
{"(r)(r)", "%1e%2"},
{"''", "'e'"},
--a/o, including kol
{"ɔ(" .. C .. C .. ")", "o%1"},
{"ɔ(" .. C .. ")" .. word_end, "o%1"},
{"(" .. word_start .. ")ɔ(^l" .. word_end .. ")", "%1o%2"},
{"()ɔ(^l" .. word_end .. ")", "%1o%2"},
-- {"(m)ɔ(^l" .. word_end .. ")", "%1o%2"},
{"(" .. word_start .. "u)ɔ(^l" .. word_end .. ")", "%1o%2"},
{"(ha)ɔ(^l" .. word_end .. ")", "%1o%2"},
{"ɔ", "a"},
{"(" .. word_start .. C .. C .. "?" .. V .. ")^(" .. C .. "?" .. C .. "?" .. word_end .. ")", "%1%2"}, --…reader-implicit acute accent in monosyllabic
--stress marking
{"a^", "á"},
{"e^", "é"},
{"i^", "í"},
{"o^", "ó"},
{"u^", "ú"},
--glottal stops: kept when {CV}'V,
{"(" .. word_start .. ")'", "%1"},
{"'(" .. C .. ")", "%1"},
{"'(" .. word_end .. ")", "%1"},
--fake digraphs
{"()h", "%1'h"},
--one char > displaying
{"ṣ", "ts"},
{"š", "sh"},
{"T", "t'"},
{"Z", "zh"},
{"C", "ch"},
{"D", "d'"},
{"K", "kh"},
}
function export.BH(text)
text = s(s(text, '.', c), "", "") --remove cantillation marks so that it works for quotes too
for a = 1, #b do
text = s(text, b, b)
end
return text
end
function export.BH_tr(text)
return s(export.BH(text), "+", "") --metegim kept for MH
end
function export.MH_tr(text)
local acronym = false
text = s(export.BH(text), '.', m) --.BH() to keep metegim, m is applied
if match(text, "″") and not match(text, V) then --acronym = gershayim & no V
text = s(s(s(text, "p̄", "p"), "ḇ", "b"), "ḵ", "k")
acronym = true
end
for a = 1, #l do --in any case, l is applied
text = s(text, l, l)
end
if acronym == true then
text = mw.ustring.upper(text)
end
return text
end
function export.tr(text, lang, sc)
if not sc then
sc = require("Module:languages").getByCode(lang):findBestScript(text):getCode()
end
if sc ~= "Hebr" or not match(text, "") then
return nil
elseif lang == "he" then
return export.MH_tr(text)
elseif lang == "hbo" then --though useless
return export.BH_tr(text)
end
end
function export.tr_all(frame)
return export.BH_tr(frame.args) .. ", " .. export.MH_tr(frame.args)
end
--Erutuon's code for code points below
--[[
local Array = require "Module:array"
local function show_code_point_names(text)
if not text then return "" end
local names = Array()
for cp in gcodepoint(text) do
-- Remove HEBREW LETTER, HEBREW POINT, etc.
local name = require "Module:Unicode data".lookup_name(cp)
:gsub(
"^HEBREW (%w+) ",
function(type)
if type == "ACCENT" then return "ACCENT " else return "" end
end)
:lower()
names:insert(name)
end
return names:concat ", "
end
local old_s = s
function s(...)
local old = ...
local new = old_s(...)
if old ~= new then
mw.log(show_code_point_names(old), show_code_point_names(new), ...)
end
return new
end
--]]
return export