Module:sandbox/nan-pron

This module sandbox lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}

local data = mw.loadData("Module:nan-pron/data")

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local sub = m_str_utils.sub
local match = m_str_utils.match
local len = m_str_utils.len
local lower = m_str_utils.lower
local split = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD

-- We use this table to encode digraphs and other multi-byte sequences.
-- Capitalization is encoded as a preceding "^", e.g. Tân → ^ta5n
-- Special treatments for digraphs: the diacritic that goes in between is
--   moved to the end, e.g. ńg → G2, ó͘ → O2, ó̤ → o_2, ṳ́ → u_2
local encoding = {
	ph = "P", th = "T", kh = "K", ng = "G", ch = "c", chh = "C", sh = "S",
	 = "E",  = "N",
	 = "2", -- á
	 = "3", -- à
	 = "5", -- â
	 = "6", -- ã (POJ T6)
	 = "&", -- ǎ (TL T6)
	 = "7", -- ā
	 = "8", -- a̍
	 = "9",	-- ă (POJ T9)
	 = "0", -- a̋ (TL T9)
	 = "_",
	 = "O", -- o͘
}
local decoding = {}
for key,val in pairs(encoding) do
	decoding = key
end

-- e.g. Tân → ^ta5n
function export.do_encode(text)
	text = toNFD(text)
		:gsub("",function(c) return "^"..c:lower() end)
		:gsub("chh",encoding)
		:gsub("ⁿ",encoding)
		:gsub("",encoding)
		:gsub("n(?)g","G%1")
		:gsub("o(?)\205\152","O%1")
	return text
end

-- e.g. ^ta5n → Tân
function export.do_decode(text)
	text = text:gsub("G(?)","n%1g")
		:gsub("",decoding)
		:gsub("%^(.)",string.upper)
	return toNFC(text)
end
-- simpler version that ignores numbers or uppercase
local function decode(text)
	return toNFC(text:gsub("",decoding))
end

-- convert to an internal representation that uses the encoding above
-- and also place the tone at the end
-- and splits 4 and 8 into 4A 4B 8A 8B
-- e.g. Khóng-chú → ^KoG2-cu2
function export.poj_to_internal(text, check_diminutive)
	if text:find("^",1,true) then
		error("Hokkien: Invalid character found.")
	end
	text = export.do_encode(text)
	local checked_category = {p="A",t="A",k="A",h="B"}
	text = text:gsub("+",function(syl)
		if check_diminutive and syl == "a2" then
			error("The diminutive should be specified with 仔.")
		end
		if syl == "仔" then
			return syl
		end
		local tone = syl:match("")
		local detone = syl:gsub("","")
		local category = checked_category
		if not tone then
			tone = category and ("4"..category) or "1"
		elseif tone == "8" then
			tone = "8" .. category
		else
			tone = ({="6T",="9T"}) or tone
		end
		if detone:find("^",2,true) then
			error("Only the first letter in a syllable may be capitalised.")
		end
		return detone..tone
	end)
	return text
end

local tone_placement_order = "aOoEeuiymG"
local tone_placement_exceptions = {
	ioa=2, ioaN=2,
	oa=1, oaN=1, oeh=2, oehN=2
}
-- convert the internal representation as described above back to POJ
-- this function determines the canonical tone placement
function export.internal_to_poj(text)
	text = text:gsub("+",function(syl)
		local i,f,t = syl:match("^(?)(?*h??)(%dT?)?$")
		if not i then
			if syl == "仔" then
				return syl
			end
			error("Syllable decomposition failed: " .. decode(syl))
		end
		if t == "1" or t == "4" then
			return i..f
		else
			t = ({="&", ="0"}) or t
		end
		-- tone placement rule: a>O>o>E>e>u>i>y>m>G, exceptions specified above
		local idx = tone_placement_exceptions
		if not idx then
			for j=1,#tone_placement_order do
				idx = f:find(tone_placement_order:sub(j,j),1,true)
				if idx then break end
			end
		end
		if not idx then
			error("Tone placement failed: " .. decode(syl))
		end
		return i..f:sub(1,idx)..t..f:sub(idx+1)
	end)
	return export.do_decode(text)
end

function export.poj_check_invalid(text)
	if not text then
		return nil
	end
	local title = mw.title.getCurrentTitle().text
	local check_diminutive = (title:find("子") or title:find("仔")) and title ~= "明仔早"
	local reading = mw.text.split(text, "/", true)
	local internal,loc = {},{}
	for i=1,#reading do
		local colon = reading:find(':',1,true)
		if colon then
			loc, reading = reading:sub(1,colon-1), reading:sub(colon+1)
		end
		internal = export.poj_to_internal(reading, check_diminutive)
		local normalized = export.internal_to_poj(internal)
		if reading ~= normalized then
			error("Invalid POJ input \"" .. reading .. "\": please change it to \"" .. normalized .. "\"")
		end
	end
	return reading, internal, loc
end

local function check_canonical_POJ(text)
	for syl in text:gmatch("+") do
		local i,f,t = syl:match("^(?)(?*h??)(%d)?$")
		if not data.poj.canonical_final or t=="6" or t=="9" then
			return false
		end
	end
	return true
end

function export.poj_display_one(reading, internal)
	reading = reading:gsub("仔", "á"):gsub("#", "")
	if check_canonical_POJ(internal) then
		reading = "]"
	end
	return reading
end

function export.poj_display(readings, internals)
	for i = 1, #readings do
		readings = export.poj_display_one(readings, internals)
	end
	return table.concat(readings, " / ")
end

local tl_conv = {
	O="oo", u_="ir", o_="er", E="ee", hN="nnh",
	c="ts", C="tsh",
	="\204\140", -- ã → ǎ
	="\204\139", -- ă → a̋
}
local tl_tone_placement_exceptions = {
	ere=3
}
function export.internal_to_tl(text)
	text = text:gsub("仔","a2")
		:gsub("#","")
		:gsub("e()","i%1")		-- eng/ek → ing/ik
		:gsub("o()","u%1")		-- oa/oe/oɛ → ua/ue/uɛ
		:gsub("(h?)N", "nn%1")		-- (h)ⁿ → nn(h)
		:gsub("_?", tl_conv)	-- ɛ/o͘/ṳ/o̤ → ee/oo/ir/er
	-- place tones
		:gsub("+",function(syl)
			local i,f,t = syl:match("^(?)(*)(%d)?$")
			if not i then
				error("Syllable decomposition failed: " .. decode(syl))
			end
			if t == "1" or t == "4" then -- no tone diacritic needed
				return i..f
			end
			t = tl_conv or t
			-- tone placement rule: if i or u occurs before other vowel, put on the other vowel
			local idx = tl_tone_placement_exceptions
				or f:match("^?u?()")
				or f:match("^()")
			if not idx then
				error("Tone placement failed: " .. decode(syl))
			end
			return i..f:sub(1,idx)..t..f:sub(idx+1)
		end)
		:gsub("", tl_conv)		-- ch → ts
	return export.do_decode(text)
end

function export.poj_check_syllable(initial, final, loc)
	if not ((validInitials or moreValidInitials) and (validFinals or moreValidFinals)) then
		--error("The syllable " .. initial .. "+" .. final .. " does not appear to be a valid " .. loc .. " POJ syllable.")
		return " .. "]]"
	end
	return nil
end

function export.internal_to_psdb(text)
	-- TODO
	return text
end

function export.internal_to_IPA(text,loc)
	-- TODO
	return 'IPA '..text.." "..data.IPA.tone
end

local default_location_list = { "Xiamen", "Quanzhou", "Zhangzhou", "Taiwan" }
local default_IPA_location_list = { "Xiamen", "Quanzhou", "Zhangzhou", "Taipei", "Kaohsiung" }
local loc_overrides = {
	 = { 'Taipei', 'Kaohsiung' },
	 = { 'Xiamen-d' },
	 = { 'Xiamen', 'Quanzhou', 'Zhangzhou' }
}
-- returns a list of locations for the header, and a list of locations for the IPA
-- these generally coïncide except for the overrides defined above, or
--   when it is tagged with -d (dated) etc.
function export.parse_locations(loc)
	if not loc then
		return default_location_list, default_IPA_location_list
	end
	local locations, IPA_locations = {},{}
	for location_abbrev in mw.text.gsplit(loc, ",", true) do
		if data.loc.invalid_code_hint then
			error("Invalid Hokkien location code: " .. location_abbrev .. ", maybe you meant: " .. data.loc.invalid_code_hint)
		end
		local loc_name = data.loc.code
		if not loc_name then
			error("The region label '" .. location_abbrev .. "' cannot be found. Please see ].")
		end
		table.insert(locations, loc_name)
		loc_name = gsub(loc_name, '^Taiwan%-?(?)$', {  = 'Taipei',  = 'Kaohsiung',  = 'Taiwan' })
		if loc_overrides then
			for _,IPA_loc in ipairs(loc_overrides) do
				table.insert(IPA_locations, IPA_loc)
			end
		else
			loc_name = gsub(loc_name, '%-d$', '')
			table.insert(IPA_locations, loc_name)
		end
	end
	return locations, IPA_locations
end

-- used by ] and ]
function export.generate_all(text)
	local nan_pronunc
	if type(text) == "table" then
		text, nan_pronunc = text.args, text.args
	end
	
	local output_text = {}
	local prefix = (nan_pronunc and nan_pronunc ~= "") and "\n" or "\n*"
	local function fmt(item, content) --formatting
		table.insert(output_text,
			prefix .. data.boilerplate.leading
				.. (content or "") .. data.boilerplate.trailing)
	end
	
	-- e.g. poj={"koe","ke"}, internal={"koe1","ke1"}, loc={"xm,qz","zz"}
	local poj,internal,loc = export.poj_check_invalid(text)
	local locations, IPA_locations, display, tl, psdb, ipa
	local backwards_compatibility = not text:find(":",1,true)
	
	for i=1,#poj do
		locations, IPA_locations = export.parse_locations(loc)
		fmt("POJ", export.poj_display_one(poj,internal))
		fmt("TL", export.internal_to_tl(internal))
		if not find(text, "%-%-") then
			psdb = export.internal_to_psdb(internal)
			ipa = {} -- store the generated ipa AND which lects have which ipa
			for _,IPA_location in ipairs(IPA_locations) do
				if data.loc.IPA_available then
					local generated_IPA = export.internal_to_IPA(internal,IPA_location)
					if not ipa then
						table.insert(ipa, generated_IPA)
						ipa = {}
					end
					IPA_location = IPA_location:gsub("%-d$","")
					table.insert(ipa, IPA_location)
				end
			end
		end
	mw.logObject({poj=poj,int=internal,loc=loc,loca=locations,ipaloc=IPA_locations,dis=display,tl=tl,psdb=psdb,ipa=ipa})
	end
	
	return table.concat(output_text)
--[==[
	if not find(text, ":") then
		table.insert(output_text, fmt("LV2")
			.. fmt("POJ", export.poj_display(poj,internal))
			.. fmt("TL", export.poj_to_tl_conv(text)))
		
		if not find(text, "%-%-") then
			local psdb_hash = export.poj_to_psdb_conv(text)
			if not find(psdb_hash, "error") then
				table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing)
			end
			for _, IPA_location in ipairs(IPA_available_list) do
				IPA_location = IPA_location == "Taiwan" and { "Taipei", "Kaohsiung" } or { IPA_location }
				for _, location in ipairs(IPA_location) do
					table.insert(output_text, formatting.IPA.leading .. location_link .. formatting.IPA.trailing)
					local reading_IPA_hash = {}
					for poj_reading in gsplit(text, "/", true) do
						table.insert(reading_IPA_hash, export.generate_IPA(poj_reading, location))
					end
					table.insert(output_text, table.concat(reading_IPA_hash, ", "))
					if #reading_IPA_hash > 1 then
						table.insert(output_text, string.format("]", #reading_IPA_hash))
					end
				end
			end
		end
	else
		for i, poj_reading in ipairs(all_readings) do
			table.insert(output_text, formatting.LV_two.leading)
			
			local location_hash = {}
			for _, location_name in ipairs(locations) do
				table.insert(location_hash, location_link)
			end
			table.insert(output_text, ": " .. table.concat(location_hash, ", ") .. formatting.LV_two.trailing)
			
			table.insert(output_text, formatting.POJ.leading .. export.poj_display(poj_reading) .. formatting.POJ.trailing ..
				formatting.TL.leading .. export.poj_to_tl_conv(poj_reading) .. formatting.TL.trailing)
			
			if not find(poj_reading, "%-%-") then
				local psdb_hash = export.poj_to_psdb_conv(poj_reading)
				if not find(psdb_hash, "error") then
					table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing)
				end
				
				local IPA_readings = {}
				for j, location_name in ipairs(locations) do
					location_name = gsub(location_name, '^Taiwan%-?(?)$', {  = 'Taipei',  = 'Kaohsiung',  = 'Taiwan' })
					loc = {
						 = { 'Taipei', 'Kaohsiung' },
						 = { 'Xiamen-d' },
						 = { 'Xiamen', 'Quanzhou', 'Zhangzhou' }
					}
					location_name = loc or { gsub(location_name, '%-d$', '') }
					for k, location in ipairs(location_name) do
						local loc = gsub(location, '%-d$', '')
						if IPA_available then
							local poj_to_ipa = export.generate_IPA(poj_reading, location)
							if IPA_readings then
								table.insert(IPA_readings, location_link)
							else
								IPA_readings = { j + (k/10), { location_link } }
							end
						end
					end
				end
				for reading, reading_info in pairs(IPA_readings) do
					table.insert(output_text, formatting.IPA.leading .. table.concat(reading_info, ", ") ..
						formatting.IPA.trailing .. reading)
				end
			end
		end
	end
]==]
end

function export.generate_IPA(text, location)
	-- (Wyang) I can't seem to find an example where 'triple' is used.. The code is below: 
	
	--if match(p, "%(") then
	--	p = gsub(p, "", "")
	--	triple = true
	--end
	--if triple then
	--	if tone == "一" then
	--		ipa = (initial .. final .. "一至七 " .. initial .. final .. "一至七 " .. initial .. final .. (i == #tone and "一" or "一至七"))
	--	elseif tone == "二" then
	--		ipa = (initial .. final .. "二至一 " .. initial .. final .. "二至一 " .. initial .. final .. (i == #tone and "二" or "二至一"))
	--	elseif tone == "三" then
	--		ipa = (initial .. final .. "三至二 " .. initial .. final .. "三至二 " .. initial .. final .. (i == #tone and "三" or "三至二"))
	--	elseif tone == "四A" then
	--		ipa = (initial .. final .. "四至八 " .. initial .. final .. "四至八 " .. initial .. final .. (i == #tone and "四" or "四至八"))
	--	elseif tone == "四B" then
	--		final = gsub(final, "ʔ", "(ʔ)")
	--		ipa = (initial .. final .. "四至二 " .. initial .. final .. "四至二 " .. initial .. final .. (i == #tone and "四" or "四至二"))
	--	elseif tone == "五" then
	--		if loc == "Quanzhou" or loc == "Taipei" then
	--			ipa = (initial .. final .. "五 " .. initial .. final .. "五至三 " .. initial .. final .. (i == #tone and "五" or "五至三"))
	--		else
	--			ipa = (initial .. final .. "五 " .. initial .. final .. "五至七 " .. initial .. final .. (i == #tone and "五" or "五至七"))
	--		end
	--	elseif tone == "七" then
	--		ipa = (initial .. final .. "七至一 " .. initial .. final .. "七至三 " .. initial .. final .. (i == #tone and "七" or "七至三"))
	--	elseif tone == "八A" then
	--		ipa = (initial .. final .. "八至四 " .. initial .. final .. "八至四 " .. initial .. final .. (i == #tone and "八" or "八至四"))
	--	elseif tone == "八B" then
	--		final = gsub(final, "ʔ", "(ʔ)")
	--		ipa = (initial .. final .. "八至五 " .. initial .. final .. "八至三 " .. initial .. final .. (i == #tone and "八" or "八至三"))
	--	end
	--end

	if type(text) == "table" then text, location = text.args, text.args end
	
	local tone_from_mark = {
		 = "1", 
		 = "2",
		 = "3",
		 = "4A",  = "4A",  = "4A",
		 = "4B",
		 = "5",
		 = "6",
		 = "7",
		 = "8A",  = "8A",  = "8A",
		 = "8B",
		 = "9",
		 = "9",
	}
	
	local initial_ipa = {
		 = "p",  = "pʰ",  = "m",  = "b",  = "f",
		 = "t",  = "tʰ",  = "n",  = "l",  = "d",
		 = "t͡s",  = "t͡sʰ",  = "d͡z",  = "s",   = "ʃ",
		 = "k",  = "kʰ",  = "ŋ",  = "ɡ", 
		 = "h",  = "ɹ",  = "w",  = "j",  = "",
		
		 = "z",
	}
	
	local final_ipa = {
		 = "a",  = "aʔ",  = "ãʔ",
		 = "ai",  = "aiʔ",  = "ãi",  = "ãiʔ",
		 = "ak̚",  = "am",  = "an",  = "ã", 
		 = "aŋ",  = "ap̚",  = "at̚",
		 = "au",  = "auʔ",  = "ãuʔ",  = "ãu",
		
		 = "e",  = "ɛ",  = "ɛʔ", 
		 = "ɛk̚",  = "ɛŋ",
		 = "eʔ",  = "ẽʔ",  = "ei",  = "iɪk̚", 
		 = "ɛm",  = "ɛn",  = "ẽ", 
		 = "iɪŋ",  = "ɵy",  = "ə",
		 = "əʔ",  = "əm",  = "ən",
		 = "ət̚",  = "ək̚",  = "ɛt̚",  = "ep̚",
		 = "eu",  = "ẽu",
            
		 = "i",  = "ia",  = "iaʔ",
		 = "iãʔ",  = "iak̚", 
		 = "iam",  = "iɛn",  = "iã",
		 = "iaŋ",  = "iap̚",  = "iɛt̚",
		 = "iau",  = "iauʔ",  = "iãuʔ",  = "iãu",
		 = "ie",  = "iɛ",  = "iɛ̃",
		 = "iʔ",  = "ĩʔ",
		 = "im",  = "in",  = "ĩ",  = "iŋ",
		 = "io",  = "iua",  = "iuã",  = "ioʔ",  = "iɔʔ",  = "iɔ",
		 = "iop",  = "iɔk̚",  = "iɔ̃",  = "iom",  = "iɔŋ",
		 = "ip̚",  = "ɯ",  = "ɯʔ",  = "ən",  = "it̚",
		 = "iu",  = "iua",  = "iuʔ",  = "iũ",  = "iuã",  = "iũʔ",
         = "ie",  = "iɛ",  = "iɛ̃", 
		
		 = "m̩",  = "m̩ʔ",
		 = "ŋ̍",  = "ŋ̍ʔ",
		
		 = "o",  = "ɔ",  = "ua",  = "uaʔ",  = "uãʔ",  = "uai",
		 = "uaiʔ",  = "uãiʔ",  = "uãi",  = "uan",  = "uã", 
		 = "uaŋ",  = "uat̚",  = "uak̚",
		 = "ue",  = "ueʔ",  = "uẽʔ",  = "uẽ",  = "uɛ",
		 = "oʔ",  = "ɔʔ",  = "ɔ̃ʔ",  = "ɔi",  = "ɔ̃i",
		 = "ɔk̚",  = "ɔm",  = "ɔ̃",  = "ɔŋ",  = "ɔp̚",
		 = "ɔt̚",  = "ou",
		
		 = "u",  = "ũ",  = "uʔ",  = "ũʔ",
		 = "ui",  = "uĩ",  = "uiʔ",  = "uĩʔ",
		 = "ok̚",  = "om",
		 = "un",  = "oŋ",  = "ut̚",
		
		 = "y",  = "yn",
		
		 = "iai",
		 = "iei",
		 = "ɛ̃",  = "ɛ̃",  = "ɛ̃",
		 = "ɛ̃ʔ",  = "ɛ̃ʔ",
		 = "ɤ",  = "iɤ",
		 = "ɤʔ",  = "iɤʔ",
		 = "ɤ",  = "iɤ",
		 = "ɤʔ",  = "iɤʔ",
		 = "uɛ",
		 = "uɛ̃",
		 = "uɛʔ",
		 = "ɛŋ",  = "ɛk̚",
		 = "eŋ",  = "ek̚",
		 = "eŋ",  = "ek̚",  = "ik̚",
		 = "ɔu",  = "ɔ̃u",
		 = "eŋ",  = "ek̚",
         = "ɔ",
		 = "iɔ",
		 = "ɔʔ",
		 = "iɔʔ",
         = "eu",  = "ẽu",
		 = "eŋ",  = "ek̚",
		 = "en",  = "et̚",
		 = "em",
         = "oŋ",  = "ok̚", 
         = "ioŋ",  = "iok̚",
         = "õ",  = "iõ",
         = "uɛ̃",
         = "z̩", 
         = "ɨ", 
		 = "eŋ",  = "ek̚",
	}
	
	local tone_sandhi = { }
		-- (Wyang) I'm not sure about the 'Xd' ones, when tone X is followed by the diminutive 仔.
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "8A",  = "2",
		 = "7",  = "3",  = "4A",  = "3",
	}
	tone_sandhi = tone_sandhi
	tone_sandhi = { -- 2 and 4 are special cases
		 = "7",  = "10",
		 = "9",  = "9",  = "11",  = "11",
	}
	tone_sandhi = {
		 = "1",  = "5",  = "2",  = "8A",  = "4B",
		 = "6",  = "6",  = "6",  = "S",  = "S",
	}
	tone_sandhi = {
		 = "1",  = "5",  = "2",  = "8A",  = "4B",
		 = "S1",  = "S1",  = "S1",  = "S2",  = "S2",
	}
	tone_sandhi = {
		 = "1",  = "5",  = "2",  = "4A",  = "4B",
		 = "6",  = "6",  = "6",  = "S",  = "S",
	}
	tone_sandhi = {
		 = "1",  = "5",  = "2",  = "5",  = "4B",
		 = "6",  = "6",  = "6",  = "S",  = "S",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "S1",  = "8B",  = "8B",
		 = "7",  = "3",  = "S2",  = "S2",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "S",  = "2", 
		 = "7",  = "3",  = "3",  = "3",
		 = "1",  = "7",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "8A",  = "2",
		 = "7",  = "3",  = "4A",  = "3",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "4A",  = "4B",
		 = "7",  = "3",  = "7",  = "7",
	}
	tone_sandhi = {
		 = "1",
		 = "5",  = "5",  = "2",  = "2",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "8A",  = "2", 
		 = "3",  = "3",  = "4A",  = "3",  = "9",
		 = "1",  = "1",  = "7",  = "7",  = "7",
	}
	tone_sandhi = {
		 = "7",  = "1",  = "2",  = "8A",  = "2",
		 = "7",  = "3",  = "4A",  = "3",  = "9",
		 = "1",  = "1",  = "7",  = "7",  = "7",
	}
	tone_sandhi = {
		 = "1",  = "1",  = "2",  = "8A",  = "2",
		 = "3",  = "3",  = "3",  = "S",  = "S",
	}
	tone_sandhi = {
		 = "1",  = "1",  = "2",  = "8A",  = "2",
		 = "3",  = "3",  = "3",  = "S",  = "S",
	}
	tone_sandhi = {
		 = "7",  = "S1",  = "2",  = "8A",  = "2",
		 = "7",  = "S2",  = "S3",  = "S3",  = "9",
	}
	tone_sandhi = {
		 = "1",  = "8A",  = "S1",  = "4A",  = "S1",
		 = "S2",  = "S2",  = "S2",  = "S3",  = "S3",  = "9",
	}
	tone_sandhi = {
		 = "7",  = "S",  = "2",  = "8A",  = "2",
		 = "7",  = "3",  = "4A",  = "4B",  = "9",
	}
	tone_sandhi = { -- 3 and 4B are special cases
		 = "7",  = "5",  = "8A",
		 = "3",  = "3",  = "4A",  = "3"
	}
	tone_sandhi = { --Xiamen/Zhangzhou-like
		 = "7",  = "5",  = "2",  = "8As",  = "2",
		 = "3",  = "3",  = "3",  = "3"
	}
	tone_sandhi = {
		 = "7",  = "1",  = "1",  = "8A",  = "8B", 
		 = "7",  = "6",  = "3",  = "4A",  = "4B",  = "9"
	}
	tone_sandhi = {
		 = "1",  = "S2",  = "S3",  = "S3",  = "S3",
		 = "S1",  = "S1",  = "8B",  = "8B",
	}
	tone_sandhi = tone_sandhi

	local tone_value = { }
	tone_value = {
		 = "44",  = "53",  = "21",  = "32",  = "32", 
		 = "24",  = "22",  = "4",  = "4",
	}
	tone_value = tone_value
	tone_value = {
		 = "44",  = "31",  = "112",  = "32",  = "32", 
		 = "24",  = "22",  = "53",  = "53",
		 = "11",  = "42",  = "1",  = "4" --sandhi-only tones
	}
	tone_value = {
		 = "33",  = "554",  = "41",  = "5",  = "5",
		 = "24",  = "22",  = "41",  = "24",  = "24",
		 = "2", --sandhi-only
	}
	tone_value = {
		 = "33",  = "554",  = "41",  = "5",  = "5",
		 = "24",  = "33",  = "41",  = "24",  = "24",
		 = "22",  = "2", --sandhi-only
	}
	tone_value = {
		 = "33",  = "554",  = "31",  = "5",  = "5",
		 = "24",  = "22",  = "31",  = "23",  = "23",
		 = "2", --sandhi-only
	}
	tone_value = {
		 = "33",  = "54",  = "21",  = "4",  = "4",
		 = "24",  = "22",  = "21",  = "23",  = "23",
		 = "2", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "21",  = "32",  = "32",
		 = "24",  = "22",  = "24",  = "4",
		 = "53",  = "21", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "21",  = "32",  = "32",
		 = "13",  = "22",  = "121",  = "121",
		 = "5", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "21",  = "32",  = "32", 
		 = "24",  = "22",  = "3",  = "3",
	}
	tone_value = {
		 = "55",  = "53",  = "11",  = "32",  = "32",
		 = "213",  = "33",  = "14",  = "14",
	}
	tone_value = {
		 = "334",  = "21",  = "213",  = "5",  = "5",
		 = "11",  = "53",  = "55",  = "32",  = "32",
		 = "34", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "11",  = "32",  = "32", 
		 = "24",  = "33",  = "4",  = "4",  = "35"
	}
	tone_value = {
		 = "44",  = "41",  = "21",  = "32",  = "32", 
		 = "23",  = "33",  = "4",  = "4",  = "35"
	}
	tone_value = {
		 = "33",  = "51",  = "11",  = "31",  = "31", 
		 = "13",  = "31",  = "11",  = "5",  = "5",
		 = "1", --sandhi-only
	}
	tone_value = {
		 = "33",  = "51",  = "11",  = "31",  = "31", 
		 = "13",  = "31",  = "11",  = "5",  = "5",
		 = "1", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "21",  = "32",  = "32", 
		 = "24",  = "33",  = "4",  = "4",  = "35",
		 = "55",  = "11",  = "1", --sandhi-only
	}
	tone_value = {
		 = "33",  = "55",  = "31",  = "5",  = "5", 
		 = "24",  = "33",  = "31",  = "35",  = "35",  = "35",
		 = "53",  = "22",  = "2", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "21",  = "2",  = "2", 
		 = "24",  = "33",  = "5",  = "5",  = "35",
		 = "55", --sandhi-only
	}
	tone_value = {
		 = "44",  = "53",  = "12",  = "32",  = "32", 
		 = "24",  = "22",  = "54",  = "54"
	}
	tone_value = { --Xiamen/Zhangzhou-like
			 = "44",  = "42",  = "21",  = "32",  = "32", 
			 = "24",  = "22",  = "43",  = "43",  = "4"
	}
	tone_value = {
		 = "33",  = "445",  = "21",  = "3",  = "3", 
		 = "23",  = "55",  = "21",  = "4",  = "4",  = "5"
	}
	tone_value = {
		 = "33",  = "53",  = "31",  = "53",  = "53",
		 = "24",  = "31",  = "3",  = "3",
		 = "22",  = "34",  = "54", --sandhi-only
	}
	tone_value = tone_value
	
	local function get_sandhi_from_post(location, current, post)
		if post then
			if location == "Tong'an" then
				if current == "2" then
					if find(post, "^$") or find(post, "^8$") then
						return "7"
					else
						return "5"
					end
				elseif find(current, "^4$") then
					if post == "2" then
						return "10"
					else
						return "12"
					end
				end
			elseif location == "Kinmen" then
				if current == "3" or current == "4B" then
					if find(post, "^$") or find(post, "^4$") then
						return "1"
					else
						return "2"
					end
				end
			elseif location == "Longyan" then
				if current == "2" then
					if post == "2" or post == "5" then
						return "3"
					else
						return "2"
					end
				elseif current == "3" then
					if post == "2" or post == "5" then
						return "3"
					else
						return "2"
					end
				elseif find(current, "^4$") then
					if post == "2" or post == "5" then
						return current
					else
						return "S"
					end
				elseif current == "7" then
					if post == "2" or post == "5" then
						return "7"
					else
						return "1"
					end
				end
			end
		end
	end
	
	local function get_tone(text)
		local tone = gsub(text, "^+(?)*(?)ⁿ?", function(tone_symbol, coda)
			return tone_from_mark end)
		return tone
	end
	
	local function nasalize(final)
		if find(final, "^mh?$") or find(final, "^ngh?$") then return final end
		if find(final, "o͘h?$") then
			final = gsub(final, "͘", "")
		elseif find(final, "oh?$") then
			error("Invalid POJ: nasal initial cannot go with -" .. final)
		elseif find(final, "eeh?$") then
			final = gsub(final, "ee", "e")
		end
		return final .. "ⁿ"
	end
	
	local formatting = {
		leading = "<span class=\"IPA\">/",
		trailing = "/</span>"
	}
	
	local tone_superscript = {  = "¹",  = "²",  = "³",  = "⁴",  = "⁵",  = "⁻" }
	local word_result = {}
	local attention = {}
	
	if location ~= 'Xiamen-d' then
		location = gsub(location, '%-d$', '')
	end
	
	text = gsub(text, " ", "-")
	text = gsub(text, ",", "#")
	text = gsub(text, "%-?%.%.%.%-?", "#")
	text = gsub(text, "#$", "")
	text = gsub(text, "#%-?", " ")
	text = toNFD(lower(text))
	for word in gsplit(text, " ", true) do
		local initial, final, tone, diminutive, sandhi, result = {}, {}, {}, {}, {}, {}
		local syllables = split(word, "-", true)
		syllables.length = #syllables
		for index, syllable in ipairs(syllables) do
			if syllable == "仔" then
				syllable = "a".."́"
				diminutive = true
			end
			local original_syllable = syllable
			syllable = gsub(syllable, "", "")
			if not find(syllable, "") then
				final = match(syllable, "^?h?h?(ngh?)$") or match(syllable, "^h?(mh?)$")
				initial = syllable ~= final and sub(syllable, 1, len(syllable) - len(final)) or "" --original code: "ʔ"
			else
				initial = match(syllable, "^??h?")
				final = sub(syllable, len(initial) + 1, -1)
			end
			tone = get_tone(sub(original_syllable, len(initial) + 1, -1))
			local nasal_initial = match(initial, "^g?$")
			if nasal_initial then
				if find(final, "ⁿ") then
					error("Too much nasality in POJ. " .. original_syllable .. " should be " .. gsub(original_syllable, "ⁿ", ""))
				end
				if location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore" then --exception for Penang, Philippines and Singapore
					final = nasalize(final)
				end
			end
			
			if location == "Longyan" and find(final, "h$") then
				final = gsub(final, "h", "")
			end
			
			local nasal_final = match(final, "^") or match(final, "ⁿ")
			local not_nasal_initial = match(initial, "^$")
			if ((nasal_initial and not nasal_final) or (not_nasal_initial and nasal_final)) and (location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore") then --exception for Penang, Philippines and Singapore
				error("POJ error: nasality of initial and final not synchronized.")
			end
			
			table.insert(attention, export.poj_check_syllable(initial, final, location))
			
			initial = initial_ipa] or initial_ipa]
			final = final_ipa] or final_ipa]
				or error("Cannot recognise " .. final .. ".")
			if index < syllables.length then
				final = gsub(final, "ʔ", "(ʔ)")
			end
		end
		
		for index = 1, syllables.length do
			sandhi = tone_value]
			local sandhi_hash = get_sandhi_from_post(location, tone, tone)
				or tone_sandhi..(diminutive and "d" or "")]
				or tone_sandhi]
			if index < syllables.length and tone_value ~= tone_value] then
				sandhi = sandhi .. "-" .. tone_value
			end
			table.insert(result, initial .. final .. sandhi)
		end
		table.insert(word_result, table.concat(result, " "))
	end
	return (gsub(formatting.leading .. table.concat(word_result, " ") ..
		formatting.trailing, "", tone_superscript)) .. table.concat(attention)
end


local psdb_initial = {
	 = "'p",  = "ph",  = "'b",
	 = "'d",  = "'t",
	 = "'k",  = "'q",  = "'g",
	 = "c",  = "z",
	 = "ch",  = "zh",
	 = "s",  = "s",
	 = "j",
	 = "l",  = "'h",
	 = "m",  = "n",  = "ng",
	 = "'"
}

local function psdb_final(text)
	local basic_psdb = {
		--single vowel tone 12357
		 = "af",  = "ar",  = "ax",  = "aa",  = "a",
		 = "y",  = "ie",  = "ix",  = "ii",  = "i",
		 = "w",  = "uo",  = "ux",  = "uu",  = "u",
		 = "ef",  = "ea",  = "ex",  = "ee",  = "e",
		 = "of",  = "or",  = "ox",  = "oo",  = "o",
		 = "oy",  = "oir",  = "oix",  = "ooi",  = "oi",
		 = "'ngf",  = "'ngr",  = "'ngx",  = "'ngg",  = "'ng",
		 = "'mf",  = "'mr",  = "'mx",  = "'mm",  = "'m",
		--double vowel tone 12357
		 = "ay",  = "ae",  = "aix",  = "aai",  = "ai",
		 = "aw",  = "ao",  = "aux",  = "aau",  = "au",
		 = "iaf",  = "iar",  = "iax",  = "iaa",  = "ia",
		 = "iaw",  = "iao",  = "iaux",  = "iaau",  = "iau",
		 = "ioy",  = "ioir",  = "ioix",  = "iooi",  = "ioi",
		 = "iw",  = "iuo",  = "iux",  = "iuu",  = "iu",
		 = "oaf",  = "oar",  = "oax",  = "oaa",  = "oa",
		 = "oay",  = "oae",  = "oaix",  = "oaai",  = "oai",
		 = "oef",  = "oea",  = "oex",  = "oee",  = "oe",
		 = "uy",  = "uie",  = "uix",  = "uii",  = "ui",
		--nasal vowel tone 12357
		--nasal ending tone 12357
		 = "iefn",  = "iern",  = "iexn",  = "ieen",  = "ien",
		 = "iofng",  = "iorng",  = "ioxng",  = "ioong",  = "iong",
		--stopped single vowel tone 48
		 = "ob",  = "op",
		 = "od",  = "ot",
		 = "og",  = "ok",
		--stopped double vowel tone 48
		 = "iob",  = "iop",
		 = "iod",  = "iot",
		 = "iog",  = "iok",
	}
	text = gsub(text, "", { = "1",  = "2",  = "3",  = "4",  = "5",  = "6",  = "7",  = "8"})
	if find(text, "ⁿ$") then
		local basic = gsub(text, "ⁿ", "")
		basic = gsub(basic, "^o()$", "oo%1")
		if basic_psdb then
			return "v" .. basic_psdb
		end
	elseif find(text, ".g?$") and not find(text, "^ian$") and not find(text, "^iong$") then
		local basic = gsub(text, "g?()$", "%1")
		local ending = match(text, "(g?)$")
		basic = gsub(basic, "^o()$", "oo%1")
		if basic_psdb then
			return basic_psdb .. ending
		end
	elseif find(text, "ⁿ?4$") and not find(text, "^i?o4$") then
		local basic = gsub(text, "(ⁿ?)4$", "%1") .. "7"
		local ending = match(text, "()ⁿ?4$")
		ending = gsub(ending, "",{p = "b", t = "d", k = "g", h = "q"})
		if find(basic, "ⁿ") then
			basic = gsub(basic, "ⁿ", "")
			basic = gsub(basic, "^o()$", "oo%1")
			if basic_psdb then
				return "v" .. basic_psdb .. ending
			end
		else
			if basic_psdb then
				return basic_psdb .. ending
			end
		end
	elseif find(text, "ⁿ?8$") and not find(text, "^i?o8$") then
		local basic = gsub(text, "(ⁿ?)8$", "%1") .. "7"
		local ending = match(text, "()ⁿ?8$")
		if find(basic, "ⁿ") then
			basic = gsub(basic, "ⁿ", "")
			basic = gsub(basic, "^o()$", "oo%1")
			if basic_psdb then
				return "v" .. basic_psdb .. ending
			end
		else
			if basic_psdb then
				return basic_psdb .. ending
			end
		end
	else
		return basic_psdb
	end
end

function export.poj_to_psdb_conv(text)
	if type(text) == "table" then text = text.args end
	
	local readings = split(lower(text), "/", true)
	
	for i = 1, #readings do
		-- will ignore # boundary marker
		local parts = split(gsub(readings, "#", ""), " ", true)
		for j = 1, #parts do
			local initial = {}
			local final = {}
			local psdb = {}
			local tone = {}
			local tonesandhi = {}
			local neutral = {}
			parts = gsub(parts, "%-%-", "-0")
			local p = split(parts, "-", true)
			local ar = {}
			local triple = {}
			for i, item in ipairs(p) do
				if find(item, "仔") then
					item = gsub(item, "仔", "á")
					ar = true
				end
				if find(item, "%(") then
					item = gsub(item, "", "")
					triple = true
				end
				if find(item, "^0") then
					item = gsub(item, "0", "")
					neutral = true
				end
				item = gsub(item, "ớ", "óo")
				item = gsub(item, "ờ", "òo")
				item = gsub(item, "ơ̂", "ôo")
				item = gsub(item, "ơ̄", "ōo")
				item = gsub(item, "ơ̍", "o̍o")
				item = gsub(item, "ơ", "oo")
				item = gsub(item, "͘", "o")
				item = gsub(item, "",{ = "捌",  = "伍",  = "柒",  = "叁"})
				if find(item, "?") or find(item, "?g?") then
					if find(item, "捌") then
						tone = "八"
					else
						tone = "四"
					end
				elseif find(item, "") then
					tone = "二"
				elseif find(item, "") then
					tone = "三"
				elseif find(item, "") then
					tone = "五"
				elseif find(item, "") then
					tone = "七"
				else
					tone = "一"
				end
				item = gsub(item, "",{ = "a",  = "i",  = "u",  = "e",  = "o",  = "m",  = "n",  = "",  = "a",  = "i",  = "u",  = "e",  = "o",  = "n",  = "",  = "a",  = "i",  = "u",  = "e",  = "o",  = "",  = "a",  = "i",  = "u",  = "e",  = "o",  = "",  = ""})
				if sub(item,1,3) == "chh" then
					initial = "chh"
					final = sub(item,4,-1)
				elseif sub(item,1,1) == "m" then
					if sub(item,2,2) == "h" then
						initial = ""
						final = "mh"
					elseif sub(item,2,2) == "" then
						initial = ""
						final = "m"
					else
						initial = "m"
						final = sub(item,2,-1)
					end
				elseif sub(item,1,2) == "ng" then
					if sub(item,3,3) == "h" then
						initial = ""
						final = "ngh"
					elseif sub(item,3,3) == "" then
						initial = ""
						final = "ng"
					else
						initial = "ng"
						final = sub(item,3,-1)
					end
				elseif find(item, "^h") then
					initial = sub(item,1,2)
					final = sub(item,3,-1)
				elseif find(item, "^") then
					initial = sub(item,1,1)
					final = sub(item,2,-1)
				else
					initial = ""
					final = item
				end
				if find(initial, "^chh?$") or initial == "s" then
					if find(final, "^i") then
						initial = initial .. "i"
					end
				end
				p = item
			end
			for i = 1, #p do
				if tone == "一" then
					tonesandhi = "七"
				elseif tone == "二" then
					tonesandhi = "一"
				elseif tone == "三" then
					tonesandhi = ar and "一" or "二"
				elseif tone == "四" then
					tonesandhi = "八"
				elseif tone == "五" then
					tonesandhi = "七"
				elseif tone == "七" then
					tonesandhi = ar and "七" or "三"
				elseif tone == "八" then
					tonesandhi = "四"
				end
				if triple then
					local tonesandhi1 = nil
					if tone == "五" then
						tonesandhi1 = "五"
					elseif tone == "七" then
						tonesandhi1 = "一"
					end
					psdb = (psdb_initial] or "error")
							..(psdb_final(final..(tonesandhi1 or tonesandhi)) or "error")
							..psdb_initial]
							..psdb_final(final..tonesandhi)
							..psdb_initial]
							..psdb_final(final..(i == #tone and tone or tonesandhi))
				else
					psdb = (psdb_initial] or "error")
							..(psdb_final(final..(i == #tone and tone or tonesandhi)) or "error")
				end
				if neutral then
					psdb = "~" .. (psdb_initial] or "error")
							..(psdb_final(final.."七") or "error")
				end --psdb = p
			end
			parts = table.concat(psdb, "")
		end
		readings = table.concat(parts, " ")
		readings = gsub(readings, "'+", "'")
		readings = gsub(readings, "^'", "")
		readings = gsub(readings, "()'", "%1")
		readings = gsub(readings, "()'()", "%1%2")
		readings = gsub(readings, "()'()", "%1%2")
		readings = gsub(readings, "()'g", "%1g")
		readings = gsub(readings, "()'h", "%1h")
	end
	
	return (gsub(table.concat(readings, ", "),'/()',' / %1'))
end

function export.pengim_check_syllable(initial, final, loc)
	local validInitials = {
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1,  = 1,
		 = 1,  = 1,  = 1,  = 1,  = 1,
	}
	local validFinals = {
		 = 1,  = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1, 
		 = 1,  = 1,  = 1,  = 1,  = 1,  = 1, 
		 = 1,  = 1, 
	}
	local moreValidFinals = {
		 = {
			 = 1,  = 1,  = 1,  = 1,
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1, 
		},
		 = {
			 = 1,  = 1,  = 1,  = 1,
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1, 
		},
		 = {
			 = 1,  = 1,  = 1,  = 1,
			 = 1,  = 1, 
			 = 1,  = 1, 
			 = 1,  = 1, 
		},
		 = {
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
			 = 1,  = 1, 
		},
		 = {
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1, 
		},
		 = {
			 = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1,  = 1,  = 1,  = 1, 
			 = 1, 
		},
	}
	if not (validInitials and (validFinals or moreValidFinals)) then
		return "]"
	end
	return nil
end

local pengim_to_ipa_two_letters_above = {
	 = "ɡ",  = "β",  = "ŋ",
	 = "au",
}

local pengim_to_ipa_one_letter = {
	--initials
	 = "m",  = "n",
	 = "p",  = "t",  = "k",
	 = "pʰ",  = "tʰ",  = "kʰ",
	 = "s",  = "h",
	 = "d͡z",
	 = "t͡s",
	 = "t͡sʰ",
	 = "l",
	--vowels
	 = "a",
	 = "e",
	 = "ɯ",
	 = "i",
	 = "o",
	 = "u",
	--tones
	 = "³³⁻²³",
	 = "⁵²⁻³⁵",
	 = "⁵²⁻²¹",
	 = "²¹³⁻⁵⁵",
	 = "²⁻⁴",
	 = "⁵⁵⁻¹¹",
	 = "³⁵⁻¹¹",
	 = "¹¹",
	 = "⁴⁻²",
}

local pengim_tone_value = {}
pengim_tone_value = {
	 = "33",  = "53",  = "213",  = "2",
	 = "55",  = "35",  = "11",  = "5"
}
pengim_tone_value = pengim_tone_value
pengim_tone_value = pengim_tone_value
pengim_tone_value = pengim_tone_value
pengim_tone_value = {
	 = "33",  = "53",  = "31",  = "2",
	 = "55",  = "313",  = "11",  = "5"
}

local pengim_tone_sandhi = {}
pengim_tone_sandhi = {
	 = "23",
	 = "23",  = "35",
	 = "31",  = "53",
	 = "3",  = "5",
	 = "11",  = "21",  = "",  = "2"
}
pengim_tone_sandhi = {
	 = "23",
	 = "35",  = "35",
	 = "55",  = "55",
	 = "5",  = "5",
	 = "11",  = "21",  = "",  = "2"
}
pengim_tone_sandhi = pengim_tone_value
pengim_tone_sandhi = pengim_tone_value
pengim_tone_sandhi = {
	 = "23",
	 = "31",  = "31",
	 = "55",  = "55",
	 = "5",  = "5",
	 = "11",  = "33",  = "33",  = "2"
}

local pengim_to_ipa_fix = {
	 = "b",
	+)"] = "p̚%1",
	+)"] = "k̚%1",
	+)"] = "ʔ%1",
}

local pengim_to_ipa_nasal = {
	 = "ã",
	 = "ẽ", -- ê
	 = "ɯ̃", -- e
	 = "ĩ",
	 = "õ",
	 = "ũ",
	 = "",
}

function export.pengim_to_ipa_conv(text)
	local result
	for key, val in pairs(pengim_to_ipa_two_letters_above) do
		text = gsub(text, key, val)
	end
	text = gsub(text, "() (+)2$", "%1 %2２")
	local function verbose_function(char) return pengim_to_ipa_one_letter or char end
	-- This should work, but it doesn't convert the tone number in "diên1":
	-- result = gsub(text, ".", pengim_to_ipa_one_letter)
	result = gsub(text, ".", verbose_function)
	result = result .. "/"
	for key, val in pairs(pengim_to_ipa_fix) do
		result = gsub(result, key, val)
	end
	result = gsub(result, "+nʔ?+", function (a)
		return gsub(a, ".", pengim_to_ipa_nasal)
	end)
	result = gsub(result, "(⁻+)/", function(a) return (a ~= "⁻²¹" and "/" or a .. "/") end)
	result = gsub(result, ",", "#")
	result = gsub(result, "(⁻+)#", function(a) return (a ~= "⁻²¹" and "" or a) end)
	result = gsub(result, "#", "")
	result = gsub(result, "/$", "")
	result = gsub(result, "/", "/, /")
	return "/" .. result .. "/"
end

function export.pengim_display(text)
	text = gsub(text, "()/", "%1 / ")
	text = gsub(text, "+", "<sup>%0</sup>") -- note: originally + but it seems like websites have the final tone within parentheses, if at all
	return (gsub(text, "#", ""))
end

function export.pengim_to_pojlike_conv(text)
	-- kind of based on MTR (http://www.ispeakmin.com/bbs/viewthread.php?tid=2784)
	text = gsub(text, "#", "")
	local words = split(text, "/", true)
			
	local tone_marks = {
		 = '', 
		 = '́',
		 = '̀',
		 = '',
		 = '̂',
		 = '̆', -- this is a breve; MTR: breve; current hokkien dialect convention: hacek; missionary: tilde or breve??
		 = '̄',
		 = '̍'
	}
	
	local function get_tone_mark(a, num) return tone_marks .. a end
	
	local function convert_final(x,c,t) -- convert final -g and -b (but not -ng)
		if c=='b' then c='p'
		elseif c=='g' then c='k' end
		return x..c..t
	end
	
	local cons_correspondences = { ='b', ='g', ='p', ='t',
		='k', ='ph', ='th', ='kh', ='ts', ='tsh',
		='j' }
	
	local function nasalization(n,h,t) return h..'ⁿ'..t end
	
	for i, word in ipairs(words) do
		local syllables = split(word, " ", true)
		for i, syllable in ipairs(syllables) do
			syllable = gsub(syllable, '^h?', cons_correspondences)
			syllable = gsub(syllable, '()()(%d)', convert_final)

			syllable = gsub(syllable, '', { ='e', ='ṳ' } )
			syllable = gsub(syllable, 'ao', 'au' )

			syllable = gsub(syllable, '(n)(h?)(%d)', nasalization)

			if find(syllable, 'uai') then
				syllable = gsub(syllable, 'uai', 'ua符i')
			elseif find(syllable, '') then
				syllable = gsub(syllable, '()i', '%1符i') -- ?i
				syllable = gsub(syllable, 'i()', 'i%1符') -- i?
				syllable = gsub(syllable, '()()', '%1符%2') -- ?u
				syllable = gsub(syllable, '()()', '%1%2符') -- u?
			elseif find(syllable, '') or find(syllable, '^') then
				syllable = gsub(syllable, '()', '%1符')
			elseif find(syllable, 'ngh?%d') then
				syllable = gsub(syllable, 'ng(h?)(%d)', 'n符g%1%2')
			elseif find(syllable, 'h?%d') then
				syllable = gsub(syllable, '()(h?)(%d)', '%1符%2%3')
			end
			
			syllable = gsub(syllable, '符(.*)(%d)', get_tone_mark)

			syllables = syllable
		end
		words = table.concat(syllables, ' ')
	end

	return toNFC(table.concat(words, ' / '))
end

return export
Module:sandbox/nan-pron

Wikious

Boobota

Sagapedia