This module powers {{tl-bay sc}}
, which is used to automatically generate Baybayin forms of Tagalog words.
-- Based on ] by ].
-- Adaptation by ].
local export = {}
local lang = require("Module:languages").getByCode("tl")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")
local m_str_utils = require("Module:string utilities")
local tl_utils = require("Module:tl-utilities")
local u = m_str_utils.char
local rmatch = m_str_utils.match
local ugmatch = m_str_utils.gmatch
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local ulower = m_str_utils.lower
local ulen = m_str_utils.len
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀
local CFLEX = u(0x0302) -- circumflex = ̂
local MACRON = u(0x0304) -- macron
local vowel = "aeəiou" -- vowel
local V = ""
local accent = AC .. GR .. CFLEX .. MACRON
local separator = accent .. "# ./"
local C = "" -- consonant
local supported_chars = "a-zA-Záéíóúâêîôûàèìòùë%-.ñŋ#'/,"
local baybayin_chars = {
= "ᜀ",
= "ᜁ",
= "ᜂ",
= "ᜊ",
= "ᜃ",
= "ᜇ",
= "ᜄ",
= "ᜑ",
= "ᜎ",
= "ᜋ",
= "ᜈ",
= "ᜅ",
= "ᜉ",
= "ᜍ",
= "ᜐ",
= "ᜆ",
= "ᜏ",
= "ᜌ"
}
local baybayin_marks = {
= "",
= "ᜒ",
= "ᜓ",
= "᜔",
= "᜕"
}
local special_words = {
= "nang",
= "manga"
}
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
local function get_pronunciation(text, diph)
-- Convert hyphens to dot
text = rsub(text, "%-", ".")
-- canonicalize multiple spaces again, which may have been introduced by hyphens
text = canon_spaces(text)
-- now eliminate punctuation
text = rsub(text, "", "")
-- put # at word beginning and end and double ## at text/foot boundary beginning/end
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
-- Move this early for now
--c, gü/gu+e or i, q
text = rsub(text, "c()", "s%1")
text = rsub(text, "()gü()", "%1ɡw%2")
text = rsub(text, "gü()", "ɡuw%1")
text = rsub(text, "gu()", "ɡ%1") -- Only e, so words like "biguin" will not be read as "bigin"
text = rsub(text, "qu()", "k%1")
text = rsub(text, "ü", "u")
--ll
text = rsub(text, "ll(?)()", "ly%2")
-- Correction for vowels with in-between glottal stop, now default
text = rsub_repeatedly(text, "(" .. V .. ")(" .. V .. ")", "%1.%2")
-- Reenable "j" sound be equivalent to "dy"
-- Ex. gaja = ga(r)ya not gariya
text = rsub(text, "(?)j(".. V .. ")" , "dy%2")
-- handle certain combinations; ch ng and sh handling needs to go first
text = rsub(text, "(?)ch", "ts") --not the real sound
text = rsub(text, "(?)g̃", "ŋ") -- Spanish spelling support
text = rsub(text, "ng", "ŋ")
text = rsub(text, "sh", "ʃ")
--ck
text = rsub(text, "ck", "k") -- foreign sound in case
--x
text = rsub(text, "()x()", "%1s%2")
text = rsub(text, "x", "ks")
--alphabet-to-phoneme
text = rsub(text, "",
--="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{ = "k", = "ɡ", = "ĵ", = "ɲ", = "k", = "b", = "s"})
text = rsub(text, "ɲ", "ny") -- ñ
text = rsub(text, "rr", "r") -- r
--determining whether "y" is a consonant or a vowel
--Baybayin treats as consonant regardless
text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
text = rsub(text,"()y(?)()","%1i%2%3")
text = rsub(text, "()y#", "%1i#")
text = rsub(text, "w(" .. V .. ")","w%1")
-- text = rsub(text,"w(?)()","u%1%2")
-- text = rsub(text, "(" .. C .. ")w#","%1u")
text = rsub(text, "ŋ", "Ŋ")
text = rsub(text, "ë", "e") -- mark as e for now
text = tl_utils.syllabify_from_spelling(text, text)
text = rsub(text, "Ŋ", "ŋ")
-- Remove accent marks
text = tl_utils.remove_accents(text)
text = rsub(text, "sh", "ʃ")
text = rsub(text, "ts", "ĉ") -- ts-not the real sound
text = rsub(text, "j", "ĵ")
if (not diph) then
--Corrections for diphthongs
text = rsub(text,"()i","%1j") --y
text = rsub(text,"()u","%1w") --w
end
text = rsub(text,"dĵ",".ĵ") --/d/ before /j/
text = rsub_repeatedly(text,"(n)(?)()","ŋ%2%3") -- /n/ before /k/ (some proper nouns)
-- After processing pronunciation, Baybayin Start Translate
text = rsub(text, "", "i")
text = rsub(text, "", "u")
return text
end
-- ĵ, ɟ and ć are used internally to represent , and
function export.transcribe(text, trad, diph, force_r)
text = ulower(text)
text = canon_spaces(text)
-- Convert punctuation to kulit
text = rsub(text, "%s*", "/")
text = rsub(text, "()%s*", "%1 //")
local words = rsplit(text, " ")
for i, word in ipairs(words) do
if special_words then
words = special_words
end
words = rsub(words, "^%-(" .. V .. ")", "◌%1") -- suffix/infix if vowel, remove glottal stop at start
end
text = table.concat(words, " ")
-- Convert slashes to bantasan, kulit divider
text = rsub(text, "//", " ᜶ ")
text = rsub(text, "/", trad and ' ᜶ ' or " ᜵ ")
text = get_pronunciation(text, diph)
-- Check if there are errors with vowels again
text = rsub(text,"()(?)()","%1.%2%3")
local function baybay_syllable(syll, post, last_vowel)
local syll2 = ""
local bay_double = {
= "t", = "d",
= "n", = "s",
= "g"
}
local function baybay(character)
local bay_soundpre = ''
character = rsub(character, "", "g")
if rmatch(character, '') then
bay_soundpre = bay_double
bay_soundpre = baybayin_chars .. baybayin_marks
if character == 'ĉ' then
if trad then bay_soundpre = '' end
character = rsub(character, "", "s")
else
character = rsub(character, "", "y")
end
end
if not force_r or trad then
character = rsub(character, "", "d")
end
character = rsub(character, "", "p")
character = rsub(character, "", "y")
return bay_soundpre .. baybayin_chars
end
if not trad then
-- Remove /h/ as it is not pronounced in between
syll = rsub(syll, "(+)(h+)", "%1")
post = rsub(post, "(h+)", "")
post = rsub(post, "ɲ", bay_double)
post = rsub(post, "ɡ", bay_double)
post = rsub(post, "ʃ", bay_double)
post = rsub(post, "ĵ", bay_double .. 's')
post = rsub(post, "ĉ", bay_double .. 's')
for c in ugmatch(post, '.') do
syll2 = syll2 .. baybay(c) .. baybayin_marks
end
end
syll = rsub(syll, "(" .. C .. "*)(" .. V .. "+)",
function(consonant, vowel)
local bay_char = ''
if ulen(consonant) == 0 then
bay_char = baybay(vowel)
elseif ulen(consonant) == 1 and consonant ~= "◌" then
bay_char = baybay(consonant) .. baybayin_marks
elseif rmatch(consonant, "^(.*)ll$") then
for c in ugmatch(consonant, '^(.)ll$') do
bay_char = bay_char .. baybay(c) .. baybayin_marks
end
bay_char = bay_char .. baybay("l") .. baybayin_marks
bay_char = bay_char .. baybay("y") .. baybayin_marks
else
-- Two character unicode problems
consonant = rsub(consonant, "()", function(char)
return bay_double
end)
consonant = rsub(consonant, "ĉ", bay_double .. (trad and 'y' or 's'))
consonant = rsub(consonant, "ɟ", "y")
if consonant == "◌" then
bay_char = bay_char .. (vowel == "a" and "◌" or "") .. baybayin_marks
last_vowel = nil
else
for c in ugmatch(consonant, '.') do
bay_char = bay_char .. baybay(c) .. baybayin_marks
last_vowel = nil
end
end
bay_char = rsub(bay_char, baybayin_marks .. "$", baybayin_marks)
end
return bay_char
end
)
return syll .. syll2
end
local words = rsplit(text, " ")
for i, word in ipairs(words) do
-- (C)/y/ and --(C)w fixes
-- /h/ being pronounced like fahm, paham
if trad then
word = rsub(word, "()(w)(" .. V .. ")(" .. C .. "*)(+)", "%1u.%2%3%4%5")
word = rsub(word, "()(ɟ)(" .. V .. ")(" .. C .. "*)(+)", "%1i.%2%3%4%5")
word = rsub(word, "(" .. C .. "*)(" .. V .. ")(h)(" .. C .. "+)(+)", "%1%2.%3%2%4%5")
end
local syllables = rsplit(word, "")
local last_vowel = nil
for j = 1, #syllables do
if rmatch(syllables, V) then
syllables = rsub(syllables, "^(*)(" .. C .. "*)(" .. V .. "+)(" .. C .. "*)(*)$",
function(temp1 ,pre, vowel, post, temp2)
retval = temp1 .. baybay_syllable(pre .. vowel, post, last_vowel) .. temp2
last_vowel = rmatch(post, "") and vowel or nil
return retval
end
)
elseif not rmatch(syllables, "") then
-- This is only a fallback when no vowel is entered
syllables = rsub(syllables, "^(*)(" .. C .. "+)(*)$",
function(temp1 , consonant , temp2)
return temp1 .. (trad and baybay_syllable(consonant .. "a", "") or baybay_syllable("", consonant)) .. temp2
end
)
end
end
words = table.concat(syllables, "")
end
text = table.concat(words, " ")
-- remove # symbols at word and text boundaries
text = rsub(text, "#", "")
text = canon_spaces(text)
return mw.ustring.toNFC(text)
end
function export.show(frame)
local params = {
= {},
= {type = "boolean", default = false},
= {type = "boolean", default = false},
= {type = "boolean", default = false},
= {type = "boolean", default = false},
= {type = "number", default = 0},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local results = ""
local text = args or mw.title.getCurrentTitle().text
for supported, unsupported in text:gmatch("(*)(*)") do
results = results .. export.transcribe(supported, args.trad, args.diph, args.r) .. unsupported
end
local tr = ""
-- Baybayin to Latin
if args.tr == 1 then
tr = rsub(lang:transliterate(results, sc_Tglg), "%s()", "%1")
elseif args.tr == 2 then
tr = text
tr = rsub(tr, "", "")
tr = rsub(tr, "//", ".")
tr = rsub(tr, "/", ",")
else
tr = ''
end
if tr ~= "" then
tr = " " .. table.concat({
'<span class="mention-gloss-paren annotation-paren">(</span>',
'<span lang="tl-Latn" class="tr Latn">',
tr,
'</span>',
'<span class="mention-gloss-paren annotation-paren">)</span>'
})
end
if args.trad then
results = rsub_repeatedly(results, "() ()", "%1%2")
end
if args.disp then
results = '<span class="' .. sc_Tglg:getCode() .. '" lang="' .. lang:getCode() .. '">' .. results .. "</span>"
end
return results .. tr
end
return export