local u = mw.ustring.char
local data = {}
-- Codepoint ranges (start, end).
-- Note: does not use subtables to save memory.
data.ranges = {
0x3007, 0x3007,
0x3400, 0x4DBF,
0x4E00, 0x9FFF,
0xF900, 0xFA6D,
0xFA70, 0xFAD9,
0x20000, 0x2A6DF,
0x2A700, 0x2B739,
0x2B740, 0x2B81D,
0x2B820, 0x2CEA1,
0x2CEB0, 0x2EBE0,
0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
0x31350, 0x323AF
}
data.ranges.n = #data.ranges
-- Characters not included in Unicode, which must be described using IDS.
data.unsupported = {
= "丿01",
= "人03",
= "八09",
= "冖08",
= "冖09",
= "十05",
= "土13",
= "士14",
= "大02",
= "女02",
= "女03",
= "宀37",
= "手08",
= "手09",
= "手10車06",
= "犬01",
= "田05",
= "糹10",
= "艸06",
= "邑11",
= "雨04",
= "龍06",
= "龍06",
}
-- IDS characters paired to the number of characters which must follow them.
data.ids = {
= 2, -- left-to-right
= 2, -- above-to-below
= 3, -- left-to-middle and right
= 3, -- above-to-middle and below
= 2, -- full surround
= 2, -- surround from above
= 2, -- surround from below
= 2, -- surround from left
= 2, -- surround from upper left
= 2, -- surround from upper right
= 2, -- surround from lower left
= 2, -- overlaid
= 2, -- surround from right
= 2, -- surround from lower right
= 1, -- horizontal reflection
= 1, -- rotation
= 1, -- variant but not equivalent
= 2 -- subtraction
}
data.preconvert = {
-- Enclosed CJK Letters and Months
= "一",
= "二",
= "三",
= "四",
= "五",
= "六",
= "七",
= "八",
= "九",
= "十",
= "月",
= "火",
= "水",
= "木",
= "金",
= "土",
= "日",
= "株",
= "有",
= "社",
= "名",
= "特",
= "財",
= "祝",
= "労",
= "代",
= "呼",
= "学",
= "監",
= "企",
= "資",
= "協",
= "祭",
= "休",
= "自",
= "至",
= "問",
= "幼",
= "文",
= "箏",
= "一",
= "二",
= "三",
= "四",
= "五",
= "六",
= "七",
= "八",
= "九",
= "十",
= "月",
= "火",
= "水",
= "木",
= "金",
= "土",
= "日",
= "株",
= "有",
= "社",
= "名",
= "特",
= "財",
= "祝",
= "労",
= "秘",
= "男",
= "女",
= "適",
= "優",
= "印",
= "注",
= "項",
= "休",
= "写",
= "正",
= "上",
= "中",
= "下",
= "左",
= "右",
= "医",
= "宗",
= "学",
= "監",
= "企",
= "資",
= "協",
= "夜",
= "令和",
-- CJK Compatibility
= "平成",
= "昭和",
= "大正",
= "明治",
= "株式会社",
-- Enclosed Ideographic Supplement
= "手",
= "字",
= "双",
= "二",
= "多",
= "解",
= "天",
= "交",
= "映",
= "無",
= "料",
= "前",
= "後",
= "再",
= "新",
= "初",
= "終",
= "生",
= "販",
= "声",
= "吹",
= "演",
= "投",
= "捕",
= "一",
= "三",
= "遊",
= "左",
= "中",
= "右",
= "指",
= "走",
= "打",
= "禁",
= "空",
= "合",
= "満",
= "有",
= "月",
= "申",
= "割",
= "営",
= "配",
= "本",
= "三",
= "二",
= "安",
= "点",
= "打",
= "盗",
= "勝",
= "敗",
= "得",
= "可",
= "福",
= "祿",
= "壽",
= "喜",
= "囍",
= "財",
}
local function add_sequences(from, to, offset, char)
for i = from, to do
local k = u(i)
local v = (i - from + offset) .. char
data.preconvert = v
end
end
add_sequences(0x32C0, 0x32CB, 1, "月")
add_sequences(0x3358, 0x3370, 0, "点")
add_sequences(0x33E0, 0x33FE, 1, "日")
data.radicals = {
"一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又", "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳", "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬", "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾", "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里", "金", "長", "門", "阜", "隶", "隹", "雨", "靑", "非", "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香", "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼", "魚", "鳥", "鹵", "鹿", "麥", "麻", "黃", "黍", "黑", "黹", "黽", "鼎", "鼓", "鼠", "鼻", "齊", "齒", "龍", "龜", "龠"
}
local function add_radicals(radicals)
for k, v in pairs(radicals) do
data.preconvert = data.radicals
end
end
-- Kangxi radicals
add_radicals{
= 1, = 2, = 3, = 4, = 5,
= 6, = 7, = 8, = 9, = 10,
= 11, = 12, = 13, = 14, = 15,
= 16, = 17, = 18, = 19, = 20,
= 21, = 22, = 23, = 24, = 25,
= 26, = 27, = 28, = 29, = 30,
= 31, = 32, = 33, = 34, = 35,
= 36, = 37, = 38, = 39, = 40,
= 41, = 42, = 43, = 44, = 45,
= 46, = 47, = 48, = 49, = 50,
= 51, = 52, = 53, = 54, = 55,
= 56, = 57, = 58, = 59, = 60,
= 61, = 62, = 63, = 64, = 65,
= 66, = 67, = 68, = 69, = 70,
= 71, = 72, = 73, = 74, = 75,
= 76, = 77, = 78, = 79, = 80,
= 81, = 82, = 83, = 84, = 85,
= 86, = 87, = 88, = 89, = 90,
= 91, = 92, = 93, = 94, = 95,
= 96, = 97, = 98, = 99, = 100,
= 101, = 102, = 103, = 104, = 105,
= 106, = 107, = 108, = 109, = 110,
= 111, = 112, = 113, = 114, = 115,
= 116, = 117, = 118, = 119, = 120,
= 121, = 122, = 123, = 124, = 125,
= 126, = 127, = 128, = 129, = 130,
= 131, = 132, = 133, = 134, = 135,
= 136, = 137, = 138, = 139, = 140,
= 141, = 142, = 143, = 144, = 145,
= 146, = 147, = 148, = 149, = 150,
= 151, = 152, = 153, = 154, = 155,
= 156, = 157, = 158, = 159, = 160,
= 161, = 162, = 163, = 164, = 165,
= 166, = 167, = 168, = 169, = 170,
= 171, = 172, = 173, = 174, = 175,
= 176, = 177, = 178, = 179, = 180,
= 181, = 182, = 183, = 184, = 185,
= 186, = 187, = 188, = 189, = 190,
= 191, = 192, = 193, = 194, = 195,
= 196, = 197, = 198, = 199, = 200,
= 201, = 202, = 203, = 204, = 205,
= 206, = 207, = 208, = 209, = 210,
= 211, = 212, = 213, = 214
}
-- CJK Radicals Supplement
add_radicals{
= 3, = 27, = 5, = 5, = 5,
= 9, = 13, = 16, = 18, = 18,
= 25, = 26, = 42, = 42, = 43,
= 43, = 43, = 43, = 49, = 52,
= 58, = 58, = 61, = 61, = 64,
= 66, = 71, = 72, = 74, = 78,
= 80, = 83, = 85, = 85, = 86,
= 87, = 87, = 90, = 93, = 94,
= 96, = 103, = 109, = 113, = 113,
= 118, = 120, = 120, = 122, = 109,
= 122, = 122, = 122, = 123, = 123,
= 123, = 125, = 129, = 129, = 130,
= 134, = 140, = 140, = 140, = 141,
= 145, = 146, = 146, = 147, = 148,
= 148, = 149, = 154, = 157, = 159,
= 162, = 162, = 162, = 163, = 167,
= 168, = 168, = 168, = 169, = 170,
= 170, = 173, = 174, = 178, = 181,
= 182, = 183, = 184, = 184, = 184,
= 184, = 185, = 187, = 188, = 194,
= 195, = 196, = 197, = 199, = 201,
= 205, = 210, = 210, = 211, = 211,
= 212, = 212, = 213, = 213, = 213
}
return data