Module:okm-translit

Hello, you have come here looking for the meaning of the word Module:okm-translit. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:okm-translit, but we will also tell you about its etymology, its characteristics and you will know how to say Module:okm-translit in singular and plural. Everything you need to know about the word Module:okm-translit you have here. The definition of the word Module:okm-translit will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:okm-translit, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module will transliterate Middle Korean language text. It is also used to transliterate Early Modern Korean. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:okm-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub

local chars_Hani = require('Module:scripts').getByCode('Hani'):getCharacters()
local chars_Hang = require('Module:scripts').getByCode('Hang'):getCharacters()

-- https://github.com/szc126/rime-slg-korean/blob/main/slg_break_jamo.yaml
-- https://github.com/szc126/rime-slg-korean/blob/main/soolegi_yethangeul.custom.yaml
local tt_complex = {
='ᄇᄉᄀ',
='ᄇᄉᄃ',
='ᄇᄉᄇ',
='ᄇᄉᄉ',
='ᄇᄉᄌ',
='ᄉᄇᄀ',
='ᄉᄉᄉ',
='ᄅᄀᄀ',
='ᄅᄃᄃ',
='ᄅᄇᄇ',
='ᄇᄉᄐ',
='ᄉᄉᄇ',
='ᄌᄌᄒ',
='ᄀᄀ',
='ᄃᄃ',
='ᄇᄇ',
='ᄉᄉ',
='ᄌᄌ',
='ᄂᄀ',
='ᄂᄂ',
='ᄂᄃ',
='ᄂᄇ',
='ᄃᄀ',
='ᄅᄂ',
='ᄅᄅ',
='ᄅᄒ',
='ᄆᄇ',
='ᄇᄀ',
='ᄇᄂ',
='ᄇᄃ',
='ᄇᄉ',
='ᄇᄌ',
='ᄇᄎ',
='ᄇᄐ',
='ᄇᄑ',
='ᄫᄫ',
='ᄉᄀ',
='ᄉᄂ',
='ᄉᄃ',
='ᄉᄅ',
='ᄉᄆ',
='ᄉᄇ',
='ᄉᄋ',
='ᄉᄌ',
='ᄉᄎ',
='ᄉᄏ',
='ᄉᄐ',
='ᄉᄑ',
='ᄉᄒ',
='ᄼᄼ',
='ᄾᄾ',
='ᄋᄀ',
='ᄋᄃ',
='ᄋᄆ',
='ᄋᄇ',
='ᄋᄉ',
='ᄋᅀ',
='ᄋᄋ',
='ᄋᄌ',
='ᄋᄎ',
='ᄋᄐ',
='ᄋᄑ',
='ᄌᄋ',
='ᅎᅎ',
='ᅐᅐ',
='ᄎᄏ',
='ᄎᄒ',
='ᄑᄇ',
='ᄒᄒ',
='ᄀᄃ',
='ᄂᄉ',
='ᄂᄌ',
='ᄂᄒ',
='ᄃᄅ',
='ᄃᄆ',
='ᄃᄇ',
='ᄃᄉ',
='ᄃᄌ',
='ᄅᄀ',
='ᄅᄃ',
='ᄅᄆ',
='ᄅᄇ',
='ᄅᄫ',
='ᄅᄉ',
='ᄅᄌ',
='ᄅᄏ',
='ᄆᄀ',
='ᄆᄃ',
='ᄆᄉ',
='ᄇᄏ',
='ᄇᄒ',
='ᄋᄅ',
='ᄋᄒ',
='ᄐᄐ',
='ᄑᄒ',
='ᄒᄉ',
='ᅙᅙ',

='@ᅩ@ᅡ@',
='@ᅮ@ᅥ@',
='@ᅡ@ᅩ',
='@ᅩ@ᅡ',
='@ᅩ@ᅥ',
='@ᅮ@ᅥ',
='@ᅥ@ᅡ',
='@ᅮᅥ@',
='@ᅩᅡ@',
='@ᅮᅡ@',
='ᅩ@ᅥ@',
='ᅮ@ᅥ@',
='ᅩ@ᅡ@',
='ᅵ@ᅡᅩ',
='ᅵ@ᅡ@',
='ᅵ@ᅥ@',
='@ᅡ@',
='@ᅥ@',
='@ᅡᅩ',
='@ᅥᅩ',
='@ᅥᅮ',
='@ᅩᅩ',
='@ᅩ@',
='@ᅮᅡ',
='@ᅮᅥ',
='@ᅮᅮ',
='@ᅮ@',
='@ᅡᅮ',
='@ᅩᅡ',
='@ᅩᅥ',
='@ᅮᅩ',
='ᅵ@ᅡ',
='ᅩ@ᅡ',
='ᅩ@ᅥ',
='ᅮ@ᅥ',
='ᅵ@ᅥ',
='ᅵ@ᅩ',
='ᅵ@ᅮ',
='ᅩᅡ@',
='ᅮᅥ@',
='ᅩᅥ@',
='ᅮᅡ@',
='ᅮᅥᅳ',
='ᅳᅵᅮ',
='ᅩᅩᅵ',
='ᅮᅵ@',
='ᅳᅥ@',
='ᅵᅩᅵ',
='ᆞᅥ@',
='@ᅡ',
='@ᅥ',
='@ᅩ',
='@ᅮ',
='ᅡ@',
='ᅥ@',
='ᅩᅡ',
='ᅩ@',
='ᅮᅥ',
='ᅮ@',
='ᅳ@',
='ᅡᅩ',
='ᅡᅮ',
='ᅥᅩ',
='ᅥᅮ',
='ᅥᅳ',
='ᅩᅥ',
='ᅩᅩ',
='ᅩᅮ',
='ᅮᅡ',
='ᅮᅮ',
='ᅳᅮ',
='ᅳᅳ',
='ᅵᅡ',
='ᅵᅩ',
='ᅵᅮ',
='ᅵᅳ',
='ᅵᆞ',
='ᆞᅥ',
='ᆞᅮ',
='ᆞ@',
='ᆞᆞ',
='ᅡᅳ',
='ᅳᅡ',
='ᅳᅥ',
='ᅳᅩ',
='ᅵ@',
='ᆞᅡ',

='ᆨᆺᆨ',
='ᆯᆨᆺ',
='ᆯᆮᇂ',
='ᆯᆷᆨ',
='ᆯᆷᆺ',
='ᆯᆸᆺ',
='ᆯᆸᇂ',
='ᆯᆺᆺ',
='ᆷᆺᆺ',
='ᇰᆨᆨ',
='ᆮᆮᆸ',
='ᆮᆺᆨ',
='ᆯᆨᆨ',
='ᆯᆨᇂ',
='ᆯᆯᆿ',
='ᆯᆷᇂ',
='ᆯᆸᆮ',
='ᆯᆸᇁ',
='ᆯᇹᇂ',
='ᆷᆫᆫ',
='ᆷᆸᆺ',
='ᆸᆯᇁ',
='ᆸᆺᆮ',
='ᆺᆺᆨ',
='ᆺᆺᆮ',
='ᆽᆸᆸ',
='ᆨᆨ',
='ᆨᆺ',
='ᆫᆽ',
='ᆫᇂ',
='ᆯᆨ',
='ᆯᆷ',
='ᆯᆸ',
='ᆯᆺ',
='ᆯᇀ',
='ᆯᇁ',
='ᆯᇂ',
='ᆸᆺ',
='ᆺᆺ',
='ᆨᆯ',
='ᆫᆨ',
='ᆫᆮ',
='ᆫᆺ',
='ᆫᇫ',
='ᆫᇀ',
='ᆮᆨ',
='ᆮᆯ',
='ᆯᆫ',
='ᆯᆮ',
='ᆯᆯ',
='ᆯᇦ',
='ᆯᇫ',
='ᆯᆿ',
='ᆯᇹ',
='ᆷᆨ',
='ᆷᆯ',
='ᆷᆸ',
='ᆷᆺ',
='ᆷᇫ',
='ᆷᆾ',
='ᆷᇂ',
='ᆸᆯ',
='ᆸᇁ',
='ᆸᇂ',
='ᆺᆨ',
='ᆺᆮ',
='ᆺᆯ',
='ᆺᆸ',
='ᇰᆨ',
='ᇰᇰ',
='ᇰᆿ',
='ᇰᆺ',
='ᇰᇫ',
='ᇁᆸ',
='ᇂᆫ',
='ᇂᆯ',
='ᇂᆷ',
='ᇂᆸ',
='ᆨᆫ',
='ᆨᆸ',
='ᆨᆾ',
='ᆨᆿ',
='ᆨᇂ',
='ᆫᆫ',
='ᆫᆯ',
='ᆫᆾ',
='ᆮᆮ',
='ᆮᆸ',
='ᆮᆺ',
='ᆮᆽ',
='ᆮᆾ',
='ᆮᇀ',
='ᆯᇰ',
='ᆷᆫ',
='ᆷᆷ',
='ᆷᆽ',
='ᆸᆮ',
='ᆸᆷ',
='ᆸᆸ',
='ᆸᆽ',
='ᆸᆾ',
='ᆺᆷ',
='ᆺᇦ',
='ᆺᇫ',
='ᆺᆽ',
='ᆺᆾ',
='ᆺᇀ',
='ᆺᇂ',
='ᇫᆸ',
='ᇫᇦ',
='ᇰᆷ',
='ᇰᇂ',
='ᆽᆸ',
='ᆽᆽ',
='ᇁᆺ',
='ᇁᇀ',

-- compatibility jamo
='ᄅᄀᄉ',
='ᄅᄇᄉ',
='ᄇᄉᄀ',
='ᄇᄉᄃ',
='ᄀᄀ',
='ᄃᄃ',
='ᄇᄇ',
='ᄀᄉ',
='ᄂᄌ',
='ᄂᄒ',
='ᄅᄀ',
='ᄅᄆ',
='ᄅᄇ',
='ᄅᄉ',
='ᄅᄐ',
='ᄅᄑ',
='ᄅᄒ',
='ᄇᄉ',
='ᄉᄉ',
='ᄌᄌ',
='ᄂᄂ',
='ᄂᄃ',
='ᄂᄉ',
='ᄂᅀ',
='ᄅᄃ',
='ᄅᅀ',
='ᄅᅙ',
='ᄆᄇ',
='ᄆᄉ',
='ᄆᅀ',
='ᄇᄀ',
='ᄇᄃ',
='ᄇᄌ',
='ᄇᄐ',
='ᄫᄫ',
='ᄉᄀ',
='ᄉᄂ',
='ᄉᄃ',
='ᄉᄇ',
='ᄉᄌ',
='ᄋᄋ',
='ᅌᄉ',
='ᅌᅀ',
='ᄒᄒ',
='ᄀ',
='ᄂ',
='ᄃ',
='ᄅ',
='ᄆ',
='ᄇ',
='ᄉ',
='ᄋ',
='ᄌ',
='ᄎ',
='ᄏ',
='ᄐ',
='ᄑ',
='ᄒ',
='ᅟ', -- filler
='ᄝ',
='ᄫ',
='ᅀ',
='ᅌ',
='ᅗ',
='ᅙ',

='@ᅩ@ᅡᅵ',
='@ᅮ@ᅥᅵ',
='@ᅩ@ᅡ',
='@ᅮ@ᅥ',
='@ᅡᅵ',
='@ᅥᅵ',
='ᅩᅡᅵ',
='ᅮᅥᅵ',
='@ᅩᅵ',
='@ᅮᅵ',
='ᅡᅵ',
='@ᅡ',
='ᅥᅵ',
='@ᅥ',
='ᅩᅡ',
='ᅩᅵ',
='@ᅩ',
='ᅮᅥ',
='ᅮᅵ',
='@ᅮ',
='ᅳᅵ',
='ᅡ',
='ᅥ',
='ᅩ',
='ᅮ',
='ᅳ',
='ᅵ',
='ᆞ',
}

local tt = [==[
BREAK	1

# remove hanja from (ex.) 사뎐(辭典)
# caps prob. isn't necessary since the "base" text is actually hangeul?
# Hani regex is a reasonable subset of Hani from ],
# last checked on 20220221
%(+%)	×

# to yale

# non-simple
gᄋ	Ğ # voiced velar fricative /ɣ/
ᄋᄋ	Ő
@ᅮ	yu
@ᅩ	yo
ᅩᅡ	wa
ᅮᅥ	we
ᅵᆞ	yo
ᆞᆞ	yo

# choseong
ᄀ	K
ᄂ	N
ᄃ	T
ᄅ	L
ᄆ	M
ᄇ	P
ᄉ	S
ᄋ	Ø
ᄌ	C
ᄎ	CH
ᄏ	KH
ᄐ	TH
ᄑ	PH
ᄒ	H
ᄝ	◆
ᄫ	Ƃ
ᅗ	◆
ᄛ	◆
ᅌ	Ŋ
ᅀ	Z
ᅙ	Q
ᄼ	◆
ᅎ	◆
ᅔ	◆
ᄾ	◆
ᅐ	◆
ᅕ	◆
ᅟ	× # filler

# jungseong
@	y
ᅡ	a
ᅥ	e
ᅩ	wo
ᅮ	wu
ᅳ	u
ᅵ	i
ᆞ	o
ᅠ	× # filler

# jongseong
ᆨ	k
ᆫ	n
ᆮ	t
ᆯ	l
ᆷ	m
ᆸ	p
ᆺ	s
ᆼ	ø
ᆽ	c
ᆾ	ch
ᆿ	kh
ᇀ	th
ᇁ	ph
ᇂ	h
ᇢ	◆
ᇦ	ƃ
ᇴ	◆
ퟝ	◆
ᇰ	ŋ
ᇫ	z
ᇹ	q

# tone
〮	↑
〯	→

# tone diacritic location
(+)(?)()	%1%3%2

# hyphens within syllables
# CV-y
# CVC-C
# CV-C
# C-V
%-%-%-%-(.-+)(y)	%1-%2
%-%-%-(.-+)()	%1-%2
%-%-%-(.-+)	%1-
%-%-(.-)()	%1-%2

# 子(ᄌᆞ)ㅣ
(%))(%-?)i	%1%2y

Ø	×

BREAK	2

↑	́
→	̌
↓	̀

ğ	G
ő	OO
Ø	NG # capitalized hanja readings
ø	ng
ƃ	W
Ŋ	NG # capitalized hanja readings
ŋ	ng
]==]

tt = mw.text.trim(tt)
tt = mw.ustring.gsub(tt, '%s*#+', '') -- remove comments
tt = mw.ustring.gsub(tt, '\n+', '\n') -- remove empty lines

local a, b, c, d = 'ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑᄒᄝᄫᅗᄛᅌᅀᅙᄼᅎᅔᄾᅐᅕᅟ', '@ᅡᅥᅩᅮᅳᅵᆞᅠ', 'ᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿᇀᇁᇂᇢᇦᇴퟝᇰᇫᇹ', '〮〯'

function export.tr(text, lang, sc)
	text = gsub(text, "%<%/?r%>", "")
	text = gsub(text, "%<%/?ruby%>", "")

	if not mw.ustring.match(text, '') then
		return nil
	end

	local bool_tone_marking = mw.ustring.find(text, (''):format(d))

	text = mw.ustring.toNFD(text)

	text = mw.ustring.gsub(text, '.', tt_complex)

	for line in mw.text.gsplit(tt, '\n') do
		local _, __, pattern, repl = mw.ustring.find(line, '(.+)\t(.+)')

		if pattern .. repl == 'BREAK1' then
			-- add period between hanja readings
			text = mw.ustring.gsub(text, '()%((.-)%)', function(hanja, reading)
				return hanja .. '(' .. mw.ustring.gsub(reading, ('(+)'):format(a), '.%1') .. ')'
			end)

			if bool_tone_marking then
				-- move the location of tone marks for easier handling and
				-- mark low tone
				text = mw.ustring.gsub(text, ('(+)(+)(*)(*)'):format(a, b, c, d), function(a, b, c, d)
					return a .. b .. (d == '' and '↓' or d) .. (c == '' and '' or c)
				end)
			end
		elseif pattern .. repl == 'BREAK2' then
			text = mw.ustring.lower(text)

			-- hanja readings
			-- ref. ]
			text = mw.ustring.gsub(text, '()(+)%((.-)%)()', function(start_pos, hanja, reading, end_pos)
				-- treat final ieung as null if tones are marked (is this a safe assumption?)
				if bool_tone_marking then
					reading = mw.ustring.gsub(reading, 'ø', '')
				end
				-- convert to uppercase
				reading = mw.ustring.upper(reading)
				return reading
			end)
			-- remove hanja reading leading period
			text = mw.ustring.gsub(text, '^%.', '')
			text = mw.ustring.gsub(text, "'''%.", "'''")
			text = mw.ustring.gsub(text, '(%s)%.', '%1')
		else
			if repl == '×' then
				repl = ''
			end
			text = mw.ustring.gsub(text, pattern, repl)
		end
	end

	-- track failed romanizations
	-- (black diamond instead of U+FFFD to avoid warnings when saving this page)
	if mw.ustring.match(text, '◆') then
		require('Module:debug').track('okm-translit/failed romanization')
	end

	return text
end

return export