Module:User:TongcyDai/cpx-pron

Hello, you have come here looking for the meaning of the word Module:User:TongcyDai/cpx-pron. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:User:TongcyDai/cpx-pron, but we will also tell you about its etymology, its characteristics and you will know how to say Module:User:TongcyDai/cpx-pron in singular and plural. Everything you need to know about the word Module:User:TongcyDai/cpx-pron you have here. The definition of the word Module:User:TongcyDai/cpx-pron will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:User:TongcyDai/cpx-pron, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.


local export = {}
local m_string_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_data = require("Module:cpx-pron/data")

local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC

local SPECIAL_MARKERS = {
	NO_ASSIMILATION = "*",
	NO_SANDHI = "#",
	MANUAL_CHANGE = ">",
	CAPITALIZATION = "^",
	SPACE_AFTER = "\\"
}

local FORMAT_MODES = {
	BRIEF = "brief",
	COMPLETE = "complete",
	DEMO = "demo"
}

local dialects = {
	pt = "]",
	nr = "]",
	jk = "]",
	xy = "]",
	ft = "]",
	yy = "]"
}

local initials = {
	pt = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "ɬ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = ""
	},
	jk = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "ɬ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = ""
	},
	nr = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "ɬ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = ""
	},
	xy = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "ɬ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = "",
		 = "β",
	},
	yy = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "θ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = "",
		 = "β",
	},
	ft = {
		 = "p",  = "pʰ",  = "m",
		 = "t",  = "tʰ",  = "n",  = "l",
		 = "t͡s",  = "t͡sʰ",  = "ɬ",
		 = "k",  = "kʰ",  = "ŋ",  = "h",
		 = "",
		 = "β",
	},
}

local finals = {
	pt = {
		 = "a",  = "ɛ",  = "e",  = "i",  = "o",
		 = "ø",  = "ɒ",  = "u",  = "y",
		 = "ai",  = "au",  = "ia",  = "ieu",  = "iu",
		 = "ɔu",  = "ua",  = "uei",  = "ui",  = "yɒ",
		 = "aŋ",  = "ɒŋ",  = "ɛŋ",  = "œŋ",  = "ɔŋ",
		 = "iŋ",  = "iæŋ",  = "uŋ",  = "uaŋ",  = "yŋ",
		 = "yɒŋ",  = "ŋ̍",
		 = "aʔ",  = "ɒʔ",  = "ɛʔ",  = "œʔ",  = "ɔʔ",
		 = "iʔ",  = "iæʔ",  = "uʔ",  = "uaʔ",  = "iɛʔ",
		 = "uoʔ",  = "yʔ",  = "yɒʔ",
	},
	jk = {
		 = "a",  = "e",  = "ɛ",  = "ø",  = "œ", 
		 = "ɒ",  = "o",  = "i",  = "u",  = "y", 
		 = "ie",  = "iɐu",  = "iu",  = "ai",  = "au", 
		 = "ou",  = "uo",  = "uɐi",  = "ui",  = "yø",
		 = "aŋ",  = "ɛŋ",  = "iŋ",  = "uŋ",  = "iɛŋ", 
		 = "ɒŋ",  = "œŋ",  = "uoŋ",  = "ŋ",
		 = "aʔ",  = "ɛʔ",  = "eʔ",  = "oʔ",  = "ɒʔ",
		 = "œʔ",  = "uoʔ"
	},
	nr = {
		 = "a",  = "e",  = "ø",  = "ɒ",  = "o", 
		 = "i",  = "u",  = "y",  = "ia",  = "ieu", 
		 = "iu",  = "ai",  = "au",  = "ɔ",  = "ua", 
		 = "uei",  = "ui",
		 = "aŋ",  = "ᴇŋ",  = "iŋ",  = "oŋ", 
		 = "ɒŋ",  = "œŋ",  = "uəŋ",  = "yŋ",  = "ŋ",
		 = "aʔ",  = "ᴇʔ",  = "iʔ",  = "oʔ",  = "ɒʔ",
		 = "œʔ",  = "uəʔ",  = "yʔ"
	},
	xy = {
		 = "a",  = "ɛ",  = "e",  = "i",  = "o",
		 = "ø",  = "ɒ",  = "u",  = "y",
		 = "ai",  = "au",  = "ia",  = "ieu",  = "iu",
		 = "ɔu",  = "ua",  = "uei",  = "ui",  = "ya",
		 = "aŋ",  = "ɒŋ",  = "ɛŋ",
		 = "iŋ",  = "iɛŋ",  = "yŋ",
		 = "yøŋ",  = "uoŋ",  = "ŋ̍",
		 = "aʔ",  = "ɒʔ",  = "ɛʔ",
		 = "iʔ",  = "iɛʔ",  = "uʔ",
		 = "uoʔ",  = "yʔ",  = "yøʔ",
		 = "iaʔ",  = "uaʔ", -- iah, uah only for 代詞促調
		 = "ã",  = "ĩ",  = "ỹ",  = "ɒ̃",  = "ãĩ", 
		 = "ãũ",  = "ĩã",  = "ĩũ",  = "ũã",  = "ũĩ", 
		 = "ỹã",
	},
	yy = {
		 = "a",  = "e",  = "ø",  = "ɒ",  = "o", 
		 = "i",  = "u",  = "y",  = "ia",  = "iəu", 
		 = "iu",  = "ai",  = "au",  = "ou",  = "ua", 
		 = "uai",  = "oi",  = "ui",  = "ya",
		 = "aŋ",  = "ɛŋ",  = "iŋ",  = "oŋ",  = "uŋ",
		 = "iɛŋ",  = "ɒŋ",  = "œŋ",  = "uaŋ", 
		 = "yɐŋ",  = "yŋ",  = "ŋ",
		 = "aʔ",  = "ɛʔ",  = "iʔ",  = "oʔ",  = "ɒʔ",
		 = "œʔ",  = "iɛʔ",  = "uaʔ",  = "yɐʔ",  = "yʔ",
		 = "ã",  = "ẽ",  = "ø̃",  = "ɒ̃", 
		 = "ĩã",  = "ĩũ",  = "ũã",  = "ũĩ",  = "ỹã"
	},
	ft = {
		 = "a",  = "e",  = "ɒ",  = "ɤ",  = "i", 
		 = "u",  = "ia",  = "iəu",  = "iu",  = "ai", 
		 = "au",  = "ou",  = "ua",  = "uei",  = "ui",
		 = "aŋ",  = "ɛŋ",  = "iŋ",  = "ɒŋ",
		 = "ieŋ",  = "ɯəŋ",  = "ŋ",
		 = "aʔ",  = "ɛʔ",  = "iʔ",  = "ɒʔ",
		 = "ieʔ",  = "ɯəʔ",
		 = "ã",  = "ĩ",  = "ɒ̃", 
		 = "ĩã",  = "ĩũ",  = "ũã",  = "ũĩ"
	}
}

-- 1 陰平 | 2 陽平 | 3 上聲 | 4 陰去 | 5 陽去 | 6A 陰入甲 | 6B 陰入乙 | 7A 陽入甲 | 7B 陽入乙
-- S1: variant of 1 in non-final position
-- S3: 代詞促調, sounds like 上聲 in both Putian and Xianyou after tone sandhi
local tones = {
	pt = {
		 = "⁵³³",  = "¹³",  = "⁴⁵³",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "¹³", 
		 = "⁵⁵",  = "³²",  = "⁴⁵"
	},
	jk = {
		 = "⁵³³",  = "¹³",  = "⁴⁵³",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "⁴⁵³", 
		 = "⁵⁵",  = "³²",  = "⁴⁵"
	},
	nr = {
		 = "⁵³³",  = "¹³",  = "⁴⁵³",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "¹³", 
		 = "⁵⁵",  = "³²",  = "⁴⁵"
	},
	xy = {
		 = "⁵³³",  = "¹³",  = "³³²",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "¹³", 
		 = "⁵⁵",  = "³²"
	},
	yy = {
		 = "⁵³³",  = "¹³",  = "³³²",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "¹³", 
		 = "⁵⁵",  = "³²"
	},
	ft = {
		 = "⁵³³",  = "¹³",  = "³³²",  = "⁴²",
		 = "²¹",  = "¹",  = "²¹",  = "⁴",  = "¹³", 
		 = "⁵⁵",  = "³²"
	},
}

local corrections = {
	common = {
		 = "ao",  = "ieo",  = "ieo"
	},
	pt = {
		 = "ieo",  = "yor",  = "uei",  = "uei",
		 = "yorh",  = "yorng",
	},
	jk = {
		 = "ie",  = "uo",  = "uai",  = "uai",
		 = "oe",  = "oe",  = "yoe",
	},
	nr = {
		 = "ieo",  = "oo",  = "uei",  = "ua",
		 = "ua",  = "ng",  = "uerng",  = "uerh",
	},
	xy = {
		 = "ieo",  = "uei",  = "yoeh",
		 = "yoeng",  = "ieng",  = "ng",
	},
	yy = {
		 = "ieo",  = "oi", -- or "uai"
		 = "oi", -- or "uai"
	},
	ft = {
		 = "er",  = "ieo",  = "uei",  = "ia",
		 = "ng",  = "uerng",  = "uerng",  = "uerh",
	}
}

local function handle_nasalization(final)
	local has_old_nasal = final:match("ⁿ$")
	local has_new_nasal = final:match("nn$")
    local base_final = has_old_nasal and final:gsub("ⁿ$", "") or 
                      has_new_nasal and final:gsub("nn$", "") or 
                      final
	
	return base_final, has_old_nasal, has_new_nasal
end
 
local sandhi_rules = {
	pt = {
		 = {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
		 = {="S1", ="4", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="4"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	},
	jk = {
		 = {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
		 = {="S1", ="4", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="4"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	},
	nr = {
		 = {="5", ="5", ="5", ="5", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="2", ="5", ="5", ="2", ="2", ="2", ="5", ="2"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="S7", ="S7", ="S7", ="S7", ="4", ="4", ="4", ="S7", ="S7"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="4", ="4", ="4", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	},
	xy = {
		 = {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	},
	yy = {
		 = {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	},
	ft = {
		 = {="5", ="5", ="5", ="2", ="2", ="2", ="2", ="5", ="5"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="5", ="S1", ="5", ="5", ="2", ="2", ="2", ="5", ="S1"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
		 = {="S1", ="S1", ="S1", ="S1", ="4", ="4", ="4", ="S1", ="S1"},
		 = {="6A", ="6A", ="6A", ="7A", ="7A", ="7A", ="7A", ="6A", ="6A"},
		 = {="5", ="5", ="5", ="S1", ="4", ="4", ="4", ="5", ="5"},
		 = {="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A", ="7A"},
	}
}

local initial_assimilation_rules = {
	pt = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			 = "",  = "",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
	jk = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		glottal_final = {},
		other_final = {
			 = "",  = "",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
	nr = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		glottal_final = {},
		other_final = {
			 = "",  = "",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
	xy = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		nasalized_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "",  = "",  = "",
			 = "ng",
			 = ""
		},
		glottal_final = {},
		other_final = {
			 = "w",  = "w",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
	yy = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		nasalized_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "",  = "",  = "",
			 = "ng",
			 = ""
		},
		glottal_final = {},
		other_final = {
			 = "w",  = "w",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
	ft = {
		nasal_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "ng",  = "ng",  = "ng",  = "ng",  = "ng"
		},
		nasalized_final = {
			 = "m",  = "m",  = "m",
			 = "n",  = "n",  = "n",  = "n",  = "n",  = "n",  = "n",
			 = "",  = "",  = "",
			 = "ng",
			 = ""
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			 = "w",  = "w",
			 = "m",  = "n",  = "l",  = "ng",
			 = "l",  = "l",  = "l",  = "l",  = "l",
			 = "",  = "",  = "",  = ""
		}
	},
}

local buc_initials = {
	 = "b",
	 = "p",
	 = "m",
	 = "d",
	 = "t",
	 = "n",
	 = "l",
	 = "c",
	 = "ch",
	 = "s",
	 = "g",
	 = "k",
	 = "ng",
	 = "h",
	 = ""
}

local buc_finals = {
	 = {{"a", 1}, {"aⁿ", 1}, {"ah", 1}},
	 = {{"e", 1}},
	 = {{"ah", 1}},
	 = {{"ai", 1}},
	 = {{"ang", 1}},
	 = {{"au", 1}},
	 = {{"a̤", 1}, {"a̤ⁿ", 1}, {"a̤h", 1}},
	 = {{"eh", 1}},
	 = {{"eng", 1}},
	 = {{"i", 1}, {"ih", 1}},
	 = {{"ia", 2}, {"iaⁿ", 2}, {"iah", 2}},
	 = {{"iah", 2}},
	 = {{"iah", 2}},
	 = {{"iang", 2}},
	 = {{"a̤u", 2}, {"a̤uⁿ", 2}, {"a̤uh", 2}}, -- on `u`
	 = {{"ih", 1}},
	 = {{"ing", 1}},
	 = {{"iu", 2}},
	 = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	 = {{"eo", 2}, {"eoh", 2}},
	 = {{"e̤", 1}, {"e̤ⁿ", 1}},
	 = {{"e̤h", 1}},
	 = {{"e̤ng", 1}},
	 = {{"eoh", 2}},
	 = {{"eong", 2}},
	 = {{"o̤", 1}, {"o̤ⁿ", 1}, {"o̤h", 1}},
	 = {{"o̤h", 1}},
	 = {{"o̤ng", 1}},
	 = {{"o", 1}},
	 = {{"u", 1}},
	 = {{"ua", 2}, {"uaⁿ", 2}, {"uah", 2}},
	 = {{"uah", 2}},
	 = {{"uang", 2}},
	 = {{"oi", 1}, {"uai", 2}, {"oiⁿ", 1}, {"oih", 1}}, -- on `o`
	 = {{"uh", 1}},
	 = {{"ui", 1}}, -- on `u`
	 = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	 = {{"ṳ", 1}},
	 = {{"ṳh", 1}},
	 = {{"ṳng", 1}},
	 = {{"io̤", 2}, {"io̤ⁿ", 2}, {"io̤h", 2}},
	 = {{"io̤h", 2}},
	 = {{"io̤ng", 2}}
}

local buc_tones = {
	 = "",	  -- 陰平 null
	 = "́",	  -- 陽平 u+0301
	 = "̂",	  -- 上聲 u+0302
	 = "̍",	  -- 陰去 u+030D
	 = "̄",	  -- 陽去 u+0304
	 = "",	  -- 陰入甲 -h
	 = "̄",	 -- 陰入乙
	 = "̍",	  -- 陽入甲 -h + u+030D
	 = "̍",	  -- 陽入乙 -h + u+030D
}

local function split_dialect_codes(code_string)
	local codes = {}
	for code in code_string:gmatch("+") do
		-- Validate dialect code
		if not dialects then
			error("Unsupported dialect: " .. code)
		end
		table.insert(codes, code)
	end
	return codes
end

local function get_syllable_markers(syllable)
	local markers = {
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if syllable:sub(1, 1) == SPECIAL_MARKERS.CAPITALIZATION then
		markers.capitalize = true
		syllable = syllable:sub(2)
	end

	if syllable:sub(-1) == SPECIAL_MARKERS.SPACE_AFTER then
		markers.space_after = true
		syllable = syllable:sub(1, -2)
	end

	if syllable:sub(-1) == "," then
		markers.comma_after = true
		syllable = syllable:sub(1, -2)
	end

	-- Check manual BUC
	local manual_start, manual_end = syllable:find("{+}")
	if manual_start then
		markers.manual_buc = syllable:sub(manual_start + 1, manual_end - 1)
		syllable = syllable:sub(1, manual_start - 1) .. syllable:sub(manual_end + 1)
	end

	return markers, syllable
end

local function split_initial_final(options)
	if not options or not options.form then
		error("split_initial_final: form is required")
	end

	local form = options.form
	local initial, final

	if form == "ng" then
		initial, final = "", form
	elseif form:sub(1, 2) == "ng" and #form > 2 then
		initial, final = "ng", form:sub(3)
	else
		initial = form:match("^h?") or ""
		final = form:sub(#initial + 1)
	end

	if not final or final == "" then
		error("Invalid form: " .. form .. " (unable to extract final)")
	end

	return initial, final
end

-- Phonological rule application functions
local function get_final_type(options)
	if not options or type(options) ~= "table" then
		error("get_final_type: options must be a table")
	end

	local initial = options.initial
	local final = options.final
	local dialect = options.dialect

	if not final then
		error("get_final_type: final cannot be nil")
	end

	if sub(final, -2) == "ng" then
		return "nasal_final"
	elseif sub(final, -1) == "h" then
		return "glottal_final"
	elseif sub(final, -2) == "nn" or 
			(match(initial, "g?") and 
			get_final_type({initial = "", final = final}) == "other_final" and 
			dialect == "xy") then

		if match(initial, "g?") and 
		   get_final_type({initial = "", final = final}) == "other_final" and 
		   dialect == "xy" then
			require("Module:debug").track('cpx-pron/xy-nasal-initial/default-rule')
		end
		return "nasalized_final"
	else
		return "other_final"
	end
end

local function track_buc_issue(reason)
	require("Module:debug").track('cpx-pron/' .. reason)
end

local function combine_buc_syllable(options)
	local initial = options.initial
	local final = options.final
	local tone = options.tone
	local tone_position = options.tone_position

	local tone_mark = buc_tones
	if not tone_mark then
		error("Invalid tone: " .. tone)
	end

	-- Split the final string into character table
	local chars = {}
	for char in mw.ustring.gmatch(final, ".") do
		table.insert(chars, char)
	end

	-- put tone diacritic
	if #chars >= tone_position then
		chars = chars .. tone_mark
	else
		error("Invalid tone position: " .. tone_position)
	end

	return mw.ustring.toNFC(initial .. table.concat(chars))
end

local function lookup_char_readings(char)
	if not m_data.buc then
		return nil
	end
	return m_data.buc
end

-- Convert single PSP syllable to BUC
local function convert_to_buc_syllable(options)
	local syllable_info = options.syllable_info
	local char = options.char
	
	-- If BUC is manually specified, first verify
	if syllable_info.manual_buc then
		local is_valid, error_msg = validate_manual_buc(syllable_info.manual_buc)
		if not is_valid then
			track_buc_issue("manual form incorrect")
			return nil
		end
		return syllable_info.manual_buc
	end
	
	local lookup_tone = syllable_info.original_tone
	local lookup_final = syllable_info.original_final
	
	-- Special handling for S3 tone
	if syllable_info.original_tone == "S3" then
		lookup_tone = "3"
		-- Remove final h if present
		if lookup_final:sub(-1) == "h" then
			lookup_final = lookup_final:sub(1, -2)
		end
	end
	
	-- Get possible BUC finals
	local possible_finals = buc_finals
	if not possible_finals then
		track_buc_issue("no final found")
		return nil
	end
	
	-- Get BUC initial
	local initial = buc_initials
	if not initial then
		track_buc_issue("no initial found")
		return nil
	end
	
	-- Generate all possible BUC forms
	local filtered_finals = {} -- special check for BUC tone 7B which merged into tone 2
	for _, final_info in ipairs(possible_finals) do
		local final, tone_position = final_info, final_info
		local is_tone_7b_final = final:match("h$")
		local psp_has_h = syllable_info.original_final:match("h$")
		local should_keep = true
		local use_tone = lookup_tone
		
		if lookup_tone == "7B" then
			if final:match("h$") then
				final = final .. "*"
			else
				should_keep = false
			end
		end
		
		if is_tone_7b_final and not psp_has_h then
			if lookup_tone == "2" then
				use_tone = "7B"
				final = final .. "*"
			elseif lookup_tone == "7B" then
				-- do nothing
			else
				should_keep = false
			end
		end
		
		if should_keep then
			table.insert(filtered_finals, {
				final = final,
				tone_position = tone_position,
				tone = use_tone
			})
		end
	end
	
	local candidates = {}
	for _, final_info in ipairs(filtered_finals) do
		local candidate = combine_buc_syllable({
			initial = initial,
			final = final_info.final,
			tone = final_info.tone,
			tone_position = final_info.tone_position
		})
		table.insert(candidates, candidate)
	end
	
	if #candidates == 1 then
		return candidates
	end
	
	-- No need to look up Hanzi-BUC table if hanzi's and PSP's counts don't match
	if not char then
		if #possible_finals > 1 then
			track_buc_issue("contraction and multiple final found")
			-- temp
			local finals_for_output = {}
			for _, final_info in ipairs(possible_finals) do
				table.insert(finals_for_output, final_info)
			end
			return nil
		end
		return combine_buc_syllable({
			initial = initial,
			final = possible_finals,
			tone = lookup_tone,
			tone_position = possible_finals
		})
	end
	
	local char_readings = lookup_char_readings(char)
	if not char_readings then
		track_buc_issue("cannot look up table")
		return nil
	end
	
	local matches = {}
	for _, candidate in ipairs(candidates) do
		for _, reading in ipairs(char_readings) do
			local match = (candidate == reading)
			if match then
				table.insert(matches, candidate)
			end
		end
	end
	
	if #matches == 0 then
		track_buc_issue("no matching reading found")
		return nil
	elseif #matches > 1 then
		track_buc_issue("multiple matching readings found")
		return nil
	end
	
	-- temp
	if syllable_info.original_tone == "2" and matches:match("h%*$") then
		require("Module:debug").track('cpx-pron/2-to-7B')
	end
	
	return matches
end

local function generate_buc(options)
	if not options.syllable_infos then
		error("Missing required syllable_infos in generate_buc")
	end

	if options.dialect ~= "pt" then
		return nil
	end

	local page_title = mw.title.getCurrentTitle().text
	local chars = mw.ustring.gsub(page_title, "", "")
	local char_count = mw.ustring.len(chars)

	local syllable_count = #options.syllable_infos
	local check_char_table = (syllable_count == char_count)

	local buc_syllables = {}

	for i, syllable_info in ipairs(options.syllable_infos) do
		if syllable_info.manual_buc then
			table.insert(buc_syllables, syllable_info.manual_buc)
		else
			-- Get possible BUC
			local syllable_result = convert_to_buc_syllable({
				syllable_info = syllable_info,
				char = check_char_table and mw.ustring.sub(chars, i, i) or nil,
				word = options.word
			})

			-- If any syllable cannot be uniquely identified
			if not syllable_result then
				return nil
			end

			if syllable_info.capitalize then
				local normalized = mw.ustring.toNFD(syllable_result)
				local first_char = mw.ustring.sub(normalized, 1, 1)
				syllable_result = mw.ustring.toNFC(
					mw.ustring.upper(first_char) .. 
					mw.ustring.sub(normalized, 2)
				)
			end

			table.insert(buc_syllables, syllable_result)
		end
	end

	-- concat syllables
	local result = {}
	for i = 1, #buc_syllables do
		table.insert(result, buc_syllables)
		if i < #buc_syllables then
			if options.syllable_infos.comma_after then
				table.insert(result, ", ")
			elseif options.syllable_infos.space_after then
				table.insert(result, " ")
			else
				table.insert(result, "-")
			end
		end
	end

	return table.concat(result)
end

local function split_syllable(syllable)
	-- Initialize result table
	local components = {
		orig_form = nil,
		changed_form = nil,
		tone_part = nil,
		orig_initial = nil,
		orig_final = nil,
		changed_initial = nil,
		changed_final = nil,
		orig_tone = nil,
		manual_sandhi_tone = nil,
		no_sandhi = false,
		no_assimilation = false,
		-- BUC (only for Putian)
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if not syllable or syllable == "" then
		error("Invalid syllable: " .. tostring(syllable))
	end

	local markers, cleaned_syllable = get_syllable_markers(syllable)
	components.capitalize = markers.capitalize
	components.space_after = markers.space_after
	components.comma_after = markers.comma_after
	components.manual_buc = markers.manual_buc
	syllable = cleaned_syllable

	components.no_assimilation = syllable:sub(1, 1) == SPECIAL_MARKERS.NO_ASSIMILATION
	if components.no_assimilation then
		syllable = syllable:sub(2)
	end

	components.no_sandhi = syllable:sub(-1) == SPECIAL_MARKERS.NO_SANDHI
	if components.no_sandhi then
		syllable = syllable:sub(1, -2)
	end

	if syllable:find(SPECIAL_MARKERS.MANUAL_CHANGE) then
		components.orig_form, components.changed_form, components.tone_part = 
			syllable:match("(.-)>(.-)(+.*)$")
	else
		components.orig_form, components.tone_part = 
			syllable:match("(.-)(+.*)$")
		components.changed_form = components.orig_form
	end

	-- If the segmentation is not correct
	if not components.orig_form or not components.tone_part then
		error("Invalid syllable format: " .. syllable)
	end

	-- Process form components
	components.orig_initial, components.orig_final = 
		split_initial_final({form = components.orig_form})
	components.changed_initial, components.changed_final = 
		split_initial_final({form = components.changed_form})

	-- Process tone components
	if components.tone_part:find("-") then
		components.orig_tone, components.manual_sandhi_tone = 
			components.tone_part:match("^(+)%-(+)$")
		require("Module:debug").track('cpx-pron/manual sandhi tone')
	else
		components.orig_tone = components.tone_part
	end

	-- Special tone processing
	if components.orig_tone == "3" and components.changed_final:sub(-1) == "h" then
		components.orig_tone = "S3"
	end
	if components.orig_tone == "6" then
		if components.orig_final:sub(-1) == "h" then
			components.orig_tone = "6A"
		else
			components.orig_tone = "6B"
		end
	elseif components.orig_tone == "7" then
		if components.orig_final:sub(-1) == "h" then
			components.orig_tone = "7A"
		else
			components.orig_tone = "7B"
		end
	end

	-- final validation
	if not (components.orig_initial and components.orig_final and components.orig_tone) then
		error("Unable to parse syllable: " .. syllable)
	end

	return components
end

local function create_syllable_info(options)
	local syllable_components = split_syllable(options.syllable)

	return {
		original_initial = syllable_components.orig_initial,
		original_final = syllable_components.orig_final,
		original_tone = syllable_components.orig_tone,
		changed_initial = syllable_components.changed_initial,
		changed_final = syllable_components.changed_final,
		changed_tone = syllable_components.orig_tone,  -- default: original tone
		no_sandhi = syllable_components.no_sandhi,
		no_assimilation = syllable_components.no_assimilation,
		is_first_syllable = options.is_first_syllable,
		manual_sandhi_tone = syllable_components.manual_sandhi_tone,
		-- BUC
		capitalize = syllable_components.capitalize,
		space_after = syllable_components.space_after,
		comma_after = syllable_components.comma_after,
		manual_buc = syllable_components.manual_buc
	}
end

-- Syllable processing functions
local function create_syllable_infos(options)
	local syllable_infos = {}
	for syllable in options.word:gmatch("%S+") do
		local syllable_options = {
			syllable = syllable,
			is_first_syllable = #syllable_infos == 0
		}
		table.insert(syllable_infos, create_syllable_info(syllable_options))
	end
	return syllable_infos
end

local function post_process_nasalization(options)
	local syllable = options.syllable

	-- Remove duplicate nasalization
	if syllable.changed_initial:match("^g?") then 
		if syllable.changed_final:match("nn$") then
			syllable.changed_final = syllable.changed_final:gsub("nn$", "")
		end
	end

	-- Simplify ng-initial syllables
	if syllable.changed_initial == "ng" and 
		syllable.changed_final == "ng" then
		syllable.changed_initial = ""
	end
end

local function get_sandhi_tone(options)
	local curr_syllable = options.curr_syllable
	local next_syllable = options.next_syllable
	local dialect = options.dialect

	-- Handle manual tone specification
	if curr_syllable.manual_sandhi_tone then
		return curr_syllable.manual_sandhi_tone
	end

	-- Handle final syllable
	if not next_syllable then
		return curr_syllable.original_tone
	end

	-- Apply sandhi rules
	local current_tone = curr_syllable.original_tone
	local next_tone = next_syllable.original_tone

	return sandhi_rules or 
			curr_syllable.original_tone
end

local function apply_sandhi(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos

	for i = 1, #syllable_infos do
		local curr_syllable = syllable_infos
		local next_syllable = syllable_infos

		local original_tone = curr_syllable.original_tone
		
		-- No sandhi if one of the following conditions are met
		-- 1. there is a no_sandhi mark
		-- 2. syllable followed by a comma
		-- 3. is the last syllable
		if curr_syllable.no_sandhi or 
			curr_syllable.comma_after or 
			-- curr_syllable.space_after or 
			not next_syllable then
			curr_syllable.changed_tone = curr_syllable.original_tone
		else
			curr_syllable.changed_tone = get_sandhi_tone({
				curr_syllable = curr_syllable,
				next_syllable = next_syllable,
				dialect = dialect
			})
		end

		-- Special tone adjustment for glottal finals
		if curr_syllable.changed_tone == '3' and 
			curr_syllable.changed_final:sub(-1) == 'h' then
			curr_syllable.changed_tone = 'S3'
		end
		
		-- Tracking
		if next_syllable then
			local format_tone_for_tracking = function(tone)
				if tone == "S1" or tone == "S3" then
					return tone
				elseif tone:sub(1, 1) == "S" then
					return tone:sub(2)
				else
					return tone
				end
			end
			
			local track_original_tone = format_tone_for_tracking(original_tone)
			local track_next_tone = format_tone_for_tracking(next_syllable.original_tone)
			local track_changed_tone = format_tone_for_tracking(curr_syllable.changed_tone)
			
			require("Module:debug").track('cpx-pron/sandhi/' .. dialect .. '/' .. 
				track_original_tone .. '+' .. track_next_tone .. '/' .. 
				track_changed_tone)
		end
	end
end

local function apply_initial_assimilation(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos
	local result = {}

	-- Handle first syllable
	result = syllable_infos
	result.is_first_syllable = true

	-- Process subsequent syllables
	for i = 2, #syllable_infos do
		local prev_syllable = result
		local curr_syllable = syllable_infos
		-- Store original initial for tracking
		local original_initial = curr_syllable.original_initial
		local rule_applied = false

		-- Check for manual override in xy dialect with nasal initial + other final
		local is_manual_override = (dialect == "xy" or dialect == "yy" or dialect == "ft") and 
		   prev_syllable.changed_initial:match("^g?$") and 
		   not (prev_syllable.changed_final:sub(-2) == "ng" or 
				prev_syllable.changed_final:sub(-1) == "h" or 
				prev_syllable.changed_final:sub(-2) == "nn") and
		   curr_syllable.changed_initial ~= curr_syllable.original_initial

		if is_manual_override then
			require("Module:debug").track('cpx-pron/xy-nasal-initial/manual-override')
		end

		local original_final_type = get_final_type({
			initial = prev_syllable.changed_initial,
			final = prev_syllable.changed_final,
			dialect = dialect
		})

		-- No initial assimilation if one of the following conditions are met:
		-- 1. there is a no_assimilation mark
		-- 2. the previous syllable is followed by a comma
		if not curr_syllable.no_assimilation and 
			not prev_syllable.comma_after and
			curr_syllable.changed_initial == curr_syllable.original_initial then

			local final_type = original_final_type

			-- Special rule for nasalized finals
			local should_apply_nasal_rule = 
				final_type == "other_final" and
				curr_syllable.original_initial:match("^") and
				get_final_type({
					initial = curr_syllable.original_initial,
					final = curr_syllable.original_final,
					dialect = dialect
				}) == "nasalized_final"

			if should_apply_nasal_rule then
				final_type = "nasal_final"
			end

			-- Mark that we are applying an assimilation rule
			rule_applied = true

			-- Apply assimilation rules
			curr_syllable.changed_initial = 
				initial_assimilation_rules or 
				curr_syllable.original_initial

			require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
		end

		-- Track assimilation only for manual override cases
		if not rule_applied and curr_syllable.changed_initial ~= original_initial then
			require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
		end

		-- Post-process nasalization
		local post_process_options = {
			syllable = curr_syllable,
			dialect = dialect
		}
		post_process_nasalization(post_process_options)

		table.insert(result, curr_syllable)
	end
	return result
end

local function generate_actual_pronunciation(syllable_infos)
	local pronunciations = {}

	for _, syllable in ipairs(syllable_infos) do
		-- Combine the changed components
		local pronunciation = syllable.changed_initial ..
							syllable.changed_final ..
							syllable.changed_tone

		table.insert(pronunciations, pronunciation)
	end

	return table.concat(pronunciations, " ")
end

local function generate_actual_pronunciation(syllable_infos)
	local pronunciations = {}

	for _, syllable in ipairs(syllable_infos) do
		-- Combine the changed components
		local pronunciation = syllable.changed_initial ..
							syllable.changed_final ..
							syllable.changed_tone

		table.insert(pronunciations, pronunciation)
	end

	return table.concat(pronunciations, " ")
end

local function get_ipa_value(options)
	-- Validation
	if not options.type or not options.dialect or not options.value then
		error("Missing required parameter for IPA lookup")
	end

	-- Get the appropriate lookup table
	local lookup_tables = {
		initials = initials,
		finals = finals,
		tones = tones
	}

	local table = lookup_tables
	if not table then
		error("Invalid lookup type: " .. options.type)
	end

	if options.type == "initials" and options.value == "bh" then
		error(string.format(
			'Invalid initial "bh" for %s dialect. Please use "w" instead.',
			options.dialect
		))
	end

	if options.type == "tones" and options.value == "S5" then
		error('Invalid tone S5. Please use "6" instead.')
	end

	if options.type == "finals" then
		local base_final, has_old_nasal, has_new_nasal = handle_nasalization(options.value)
		local nasal_suffix = ""
		
		if has_old_nasal then
			nasal_suffix = "nn"
			require("Module:debug").track('cpx-pron/deprecated-nasalization')
		elseif has_new_nasal then
			nasal_suffix = "nn"
		end

		local result = table and table

		if not result then
			local corrected_final = nil

			if corrections.common then
				corrected_final = corrections.common
			elseif corrections and corrections then
				corrected_final = corrections
			elseif has_old_nasal and not has_new_nasal then
				corrected_final = base_final
				nasal_suffix = "nn"
			end

			if corrected_final or (has_old_nasal and not has_new_nasal) then
				local corrected_value = corrected_final or base_final
				if has_old_nasal or has_new_nasal then
					corrected_value = corrected_value .. nasal_suffix
				end

				local full_syllable = ""
				if options.syllable_info then
					full_syllable = options.syllable_info.original_initial .. corrected_value .. options.syllable_info.original_tone
				end
				
				error(string.format(
				    'Invalid final "%s" for %s dialect. Please use "%s" instead.',
				    options.value,
				    options.dialect,
				    full_syllable ~= "" and full_syllable:gsub("", "") or corrected_value
				))
			end
		end
	end

	local result = table and table

	if not result then
		error(string.format(
			"Invalid %s %s for %s.",
			options.type:sub(1, -2),
			options.value,
			options.dialect
		))
	end

	return result
end

local function get_ipa_components(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get basic components
	local components = {
		initial = get_ipa_value({
			type = "initials",
			dialect = dialect,
			value = syllable_info.changed_initial,
			syllable_info = syllable_info
		}),
		final = get_ipa_value({
			type = "finals",
			dialect = dialect,
			value = syllable_info.changed_final,
			syllable_info = syllable_info
		}),
		tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.original_tone,
			syllable_info = syllable_info
		})
	}

	-- Handle tone change
	if syllable_info.changed_tone ~= syllable_info.original_tone then
		local sandhi_tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.changed_tone
		})

		if not sandhi_tone then
			error("Invalid sandhi tone: " .. syllable_info.changed_tone .. 
				  " for dialect: " .. dialect)
		end

		components.tone = components.tone .. "⁻" .. sandhi_tone
	end

	return components
end

local function get_original_initial_display(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Only show original initial for non-first syllables with changes
	if syllable_info.is_first_syllable or
		(syllable_info.original_initial == syllable_info.changed_initial) then
		return ""
	end

	-- Get IPA for original initial
	local ipa_initial = get_ipa_value({
		type = "initials",
		dialect = dialect,
		value = syllable_info.original_initial,
		syllable_info = syllable_info
	})

	return syllable_info.original_initial == "" and "<sup>(Ø-)</sup>" or
		   "<sup>(" .. ipa_initial .. "-)</sup>"
end

local function syllable_to_ipa(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get IPA components
	local ipa_components = get_ipa_components({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Generate display for changed initial if needed
	local original_initial_display = get_original_initial_display({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Combine all parts
	return original_initial_display .. 
			ipa_components.initial .. 
			ipa_components.final .. 
			ipa_components.tone
end

-- Generate IPA for the syllables
local function generate_ipa(options)
	if not options or not options.syllable_infos then
		error("Missing required syllable_infos in generate_ipa")
	end

	local syllable_infos = options.syllable_infos
	local dialect = options.dialect

	local ipa_parts = {}
	for _, syllable_info in ipairs(syllable_infos) do
		table.insert(ipa_parts, syllable_to_ipa({
			syllable_info = syllable_info,
			dialect = dialect
		}))
	end

	return table.concat(ipa_parts, " ")
end

-- Process a single pronunciation entry
local function process_pronunciation(options)
	local result = {
		dialect_codes = options.dialect_codes,
		word = options.word,
		processed = {},
		index = options.index
	}

	local dialect_list = split_dialect_codes(options.dialect_codes)

	-- Create syllable info objects for each syllable
	local syllable_options = {
		word = options.word,
		is_first_syllable = true
	}
	local original_syllable_infos = create_syllable_infos(syllable_options)

	-- Process for each dialect
	for i, dialect in ipairs(dialect_list) do
		local syllable_infos = m_table.deepCopy(original_syllable_infos)

		-- Apply phonological rules
		local processed_syllables = apply_initial_assimilation({
			dialect = dialect,
			syllable_infos = syllable_infos
		})
		apply_sandhi({
			dialect = dialect,
			syllable_infos = processed_syllables
		})

		-- Generate IPA and collect results
		local pronunciation_result = {
			dialect = dialect,
			dialect_position = i,
			original = options.word,
			actual = generate_actual_pronunciation(processed_syllables),
			ipa = generate_ipa({
				syllable_infos = processed_syllables,
				dialect = dialect
			}),
			index = options.index,
			syllable_infos = processed_syllables
		}

		-- Generate BUC only for Putian
		if dialect == "pt" then
			pronunciation_result.buc = generate_buc({
				syllable_infos = processed_syllables,
				dialect = dialect,
				word = options.word
			})
		end

		table.insert(result.processed, pronunciation_result)
	end
	return result
end

-- Formatting helper functions
local function font_consolas(text)
	-- return '<span style="font-family: Consolas, monospace;">' .. text .. '</span>'
	return '<span class="zhpron-monospace">' .. text .. '</span>'
end

local function font_ipa(text)
	return '<span class="IPA">/' .. text .. '/</span>'
end

local function clear_pinging_format(text)
	if not text then
		return ""
	end
	return text:gsub("%-S?%d", "") -- remove tone sandhi
			  :gsub(">+", "") -- remove irregular sound change
			  :gsub("+", "") -- remove special symbols
			  :gsub("{+}", "") -- remove manual BUC
			  :gsub("", "") -- remove A/B
			  :gsub("(%d)", "<sup>%1</sup>") -- superscript tone numbers
			  :gsub("S", "") -- remove "S" in special tones
end

-- Output formatting functions
local function format_demo_output(options)
	local results = options.results
	local highlight_type = options.type or "default"
	local output = {}

	for _, result in ipairs(results) do
		for _, processed in ipairs(result.processed) do
			local syllable_infos = processed.syllable_infos
			local orig_parts = {}
			local actual_parts = {}
			
			-- Process each syllable
			for i, syllable_info in ipairs(syllable_infos) do
				local is_first = (i == 1)
				local is_last = (i == #syllable_infos)
				
				-- Process original syllable
				local orig_initial = syllable_info.original_initial
				local orig_final = syllable_info.original_final
				local orig_tone = syllable_info.original_tone:gsub("S", "")
				
				local orig_text = orig_initial .. orig_final
				
				-- Apply bold formatting based on highlight_type
				if highlight_type == "assim" and not is_first and orig_initial ~= "" then
					orig_text = "'''" .. orig_initial .. "'''" .. orig_final
				elseif highlight_type == "sandhi" and not is_last then
					orig_tone = "'''" .. orig_tone .. "'''"
				end
				
				-- Add tone as superscript and clear format
				orig_text = orig_text .. orig_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("", "")
				table.insert(orig_parts, orig_text)
				
				-- Process actual syllable
				local actual_initial = syllable_info.changed_initial
				local actual_final = syllable_info.changed_final
				local actual_tone = syllable_info.changed_tone:gsub("S", "")
				
				local actual_text = actual_initial .. actual_final
				
				-- Apply bold formatting based on highlight_type
				if highlight_type == "assim" and not is_first and actual_initial ~= "" then
					-- Bold non-first syllable initials for assim type
					actual_text = "'''" .. actual_initial .. "'''" .. actual_final
				elseif highlight_type == "sandhi" and not is_last then
					-- Bold non-last syllable tones for sandhi type
					actual_tone = "'''" .. actual_tone .. "'''"
				end
				
				-- Add tone as superscript and clear format
				actual_text = actual_text .. actual_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("", "")
				table.insert(actual_parts, actual_text)
			end
			
			-- Build the output line
			local line = table.concat(orig_parts, " ")
			line = line .. " → " .. table.concat(actual_parts, " ")
			
			-- Add IPA
			line = line .. "<br/>" .. font_ipa(processed.ipa)
			
			table.insert(output, line)
		end
	end

	return table.concat(output, "\n\n")
end

local function format_brief_output(options)
	local results = options.results
	local output_parts = {}
	local dialect_codes = {}
	local seen_pronunciations = {} 
	local order = {}

	-- Collecte Pronunciation and dialect codes in their original order
	for _, result in ipairs(results) do
		if result.processed and #result.processed > 0 then
			local original = result.processed.original
			local cleared_text = clear_pinging_format(original)

			-- If the cleaned pronunciation has not appeared before, record its order
			if not seen_pronunciations then
				seen_pronunciations = {
					original = original,
					index = result.index
				}
				table.insert(order, cleared_text)
			end

			-- Collect dialect codes
			for _, processed in ipairs(result.processed) do
				if not dialect_codes then
					dialect_codes = true
				end
			end
		end
	end

	local dialect_codes_array = {}
	for code, _ in pairs(dialect_codes) do
		table.insert(dialect_codes_array, code)
	end

	local output = " " -- "Puxian Min" already written in zh-pron
	if #dialect_codes_array == 1 then
		output = output .. "<small>(<i>" .. dialects] .. ", "
	else
		output = output .. "<small>(<i>"
	end
	output = output .. "]</i>): </small>"

	-- Generate the pronunciation parts in the original order
	if #order > 0 then
		local formatted = {}
		for _, cleared_text in ipairs(order) do
			table.insert(formatted, cleared_text)
		end
		output = output .. font_consolas(table.concat(formatted, " / "))
	end

	return output
end

function format_complete_output(options)
	local results = options.results
	local output = {}

	local ordered_results = {}
	for _, result in ipairs(results) do
		table.insert(ordered_results, result)
	end
	
	table.sort(ordered_results, function(a, b)
		return a.index < b.index
	end)

	for _, result in ipairs(ordered_results) do
		local grouped = {}
		local group_keys = {}
		
		for _, processed in ipairs(result.processed) do
			local key = processed.original .. "|" .. processed.actual .. "|" .. processed.ipa
			
			if not grouped then
				grouped = {
					data = {
						original = processed.original,
						actual = processed.actual,
						ipa = processed.ipa,
						buc = processed.buc,
						dialect = processed.dialect
					},
					dialects = {}
				}
				table.insert(group_keys, key)
			elseif not grouped.data.buc and processed.buc then
				grouped.data.buc = processed.buc
				grouped.data.dialect = processed.dialect
			end
			
			table.insert(grouped.dialects, {
				code = processed.dialect,
				position = processed.dialect_position
			})
		end
		
		for _, key in ipairs(group_keys) do
			local group = grouped
			
			table.sort(group.dialects, function(a, b)
				return a.position < b.position
			end)
			
			local dialect_names = {}
			for _, dialect_info in ipairs(group.dialects) do
				table.insert(dialect_names, dialects)
			end
			
			table.insert(output, "\n** <small>(<i>" .. table.concat(dialect_names, ", ") .. "</i>)</small>")
			
			-- Pouseng Ping'ing
			table.insert(output, "\n*** <small><i>]</i></small>: " ..
				font_consolas(clear_pinging_format(group.data.original)))
			
			if clear_pinging_format(group.data.original) ~= clear_pinging_format(group.data.actual) then
				table.insert(output, font_consolas(
					" [<small>Phonetic</small>: " .. clear_pinging_format(group.data.actual)) ..
					"]")
			end
			
			-- BUC
			if group.data.dialect == "pt" and group.data.buc then
				local displayed_buc = group.data.buc:gsub("%*", "")
				table.insert(output, "\n*** <small><i>]</i></small>: " ..
				font_consolas(displayed_buc))
			end
			
			-- IPA
			table.insert(output, '\n*** <small>Sinological ] ' ..
				'<sup>(])</sup></small>: ' .. font_ipa(group.data.ipa))
		end
	end

	return table.concat(output)
end

-- Main entry point
function export.rom_display(text, mode, highlight_type)
	if type(text) == "table" then
		highlight_type = text.args.type
		mode = text.args or mode
		text = text.args
	end

	-- Parameter validation
	if not text or text == "" then
		error("Invalid input: text must be a non-empty string")
	end

	mode = mode or FORMAT_MODES.BRIEF
	highlight_type = highlight_type or "default"

	local pronunciation_data = {
		results = {},
		mode = mode,
		type = highlight_type
	}

	-- Process each pronunciation in the input
	local index = 1
	for pronunciation in text:gmatch("+") do
		local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
		if not dialect_codes or not word then
			error("Invalid input format: " .. pronunciation)
		end

		local pron_options = {
			dialect_codes = dialect_codes,
			word = word,
			index = index
		}
		table.insert(pronunciation_data.results, 
					process_pronunciation(pron_options))
		index = index + 1
	end

	-- Format output according to the specified mode
	if mode == FORMAT_MODES.BRIEF then
		return format_brief_output(pronunciation_data)
	elseif mode == FORMAT_MODES.COMPLETE then
		return format_complete_output(pronunciation_data)
	elseif mode == FORMAT_MODES.DEMO then
		return format_demo_output(pronunciation_data)
	else
		error("Unsupported mode: " .. mode)
	end
end

-- Convert single BUC syllable to PSP
local function syllable_to_psp(input)
	local buc_to_psp_initials = {
		 = "b",  = "c",  = "z",
		 = "d",  = "g",  = "h",
		 = "k",  = "l",  = "m",
		 = "ng",  = "n",  = "p",
		 = "s",  = "t",  = ""
	}

	local buc_to_psp_finals = {
		 = "a",
		 = "a",
		 = "ah",
		 = "a",
		 = "ai",
		 = "ang",
		 = "ao",
		 = "e",
		 = "e",
		 = "eh",
		 = "e",
		 = "ae",
		 = "eh",
		 = "eng",
		 = "oe",
		 = "oe",
		 = "oeh",
		 = "oeng",
		 = "i",
		 = "ih",
		 = "i",
		 = "ing",
		 = "ia",
		 = "ia",
		 = "iah",
		 = "ia",
		 = "ieng",
		 = "iu",
		 = "ou",
		 = "or",
		 = "or",
		 = "orh",
		 = "or",
		 = "orng",
		 = "o",
		 = "oh",
		 = "o",
		 = "ong",
		 = "u",
		 = "uh",
		 = "ua",
		 = "ua",
		 = "uah",
		 = "ua",
		 = "uang",
		 = "ui",
		 = "uei",
		 = "uei",
		 = "uei",
		 = "uei",
		 = "uei",
		 = "y",
		 = "yh",
		 = "yng",
		 = "yor",
		 = "yor",
		 = "yorh",
		 = "yor",
		 = "yorng",
		 = "ng",
		 = "ieo",
		 = "ieo",
		 = "ieoh",
		 = "ieo"
	}

	-- Handle input parameter
	local syllable
	if type(input) == "table" then
		syllable = input.args
	else
		syllable = input
	end

	if not syllable or syllable == "" then
		return syllable
	end

	-- Try to convert the syllable, return original if any error occurs
	local success, result = pcall(function()
		-- Decompose the syllable and check for validity
		local decomposed = mw.ustring.toNFD(syllable)
		if not decomposed then
			return syllable
		end

		-- Extract and remove tone marks
		local tone = ""
		if decomposed:find("́") then		  -- Tone 2: COMBINING ACUTE ACCENT
			tone = "2"
			decomposed = decomposed:gsub("́", "")
		elseif decomposed:find("̂") then	  -- Tone 3: COMBINING CIRCUMFLEX ACCENT
			tone = "3"
			decomposed = decomposed:gsub("̂", "")
		elseif decomposed:find("̍") then	  -- Tone 4/7: COMBINING VERTICAL LINE ABOVE
			if decomposed:find("h%*$") then   -- Special case: -h* ending -> tone 2
				if not decomposed:find("̍") then  -- If has h* but no vertical line
					return syllable
				end
				tone = "2"
			elseif decomposed:find("h$") then
				tone = "7"
			else
				tone = "4"
			end
			decomposed = decomposed:gsub("̍", "")
		elseif decomposed:find("̄") then	  -- Tone 5: COMBINING MACRON
			tone = "5"
			decomposed = decomposed:gsub("̄", "")
		else
			-- No tone mark: either tone 1 (no -h) or tone 6 (with -h)
			if decomposed:find("h$") and not decomposed:find("h%*$") then
				tone = "6"
			else
				tone = "1"
			end
		end

		-- Recompose and check validity
		local normalized = mw.ustring.toNFC(decomposed)
		if not normalized then
			return syllable
		end

		-- Special case: standalone `ng` syllable after tone removal
		if normalized == "ng" then
			return "ng" .. tone
		end

		-- Extract initial
		local initial = ""
		if normalized:match("^") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^") then
			initial = normalized:sub(1, 1):lower()
			normalized = normalized:sub(2)
		end
		
		local psp_initial = buc_to_psp_initials or ""

		-- Process final
		-- Remove -h* marker if present (affects tone but not final lookup)
		local final = normalized:gsub("h%*$", "")

		-- Look up PSP final
		local psp_final = buc_to_psp_finals
		if not psp_final then
			return syllable
		end

		-- Combine all parts to form complete PSP syllable
		return (psp_initial .. psp_final .. tone):lower()
	end)

	-- Return original syllable if conversion failed
	return success and result or syllable
end

-- Convert BUC to PSP (both single syllable and text)
function export.buc_to_psp(input)
	-- Handle input parameter
	local text
	if type(input) == "table" then
		text = input.args
	else
		text = input
	end

	if not text or text == "" then
		return text
	end

	-- Split text into parts by delimiters while keeping delimiters
	local parts = {}
	local last_pos = 1
	local pattern = ""

	for pos, delimiter in mw.ustring.gmatch(text, "()("..pattern..")") do
		if pos > last_pos then
			table.insert(parts, mw.ustring.sub(text, last_pos, pos - 1))
		end
		table.insert(parts, delimiter)
		last_pos = pos + mw.ustring.len(delimiter)
	end

	-- Handle the last part
	if last_pos <= mw.ustring.len(text) then
		table.insert(parts, mw.ustring.sub(text, last_pos))
	end

	-- Convert syllables and keep delimiters
	for i = 1, #parts do
		if not parts:match("^$") then
			parts = syllable_to_psp(parts)
		end
	end

	return table.concat(parts)
end

return export