A documentação para este módulo pode ser criada na página Módulo:languages/data/2/doc
local u = mw.ustring.char
local m_langdata = require("Module:languages/data")
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
local m = {}
m = {
"Afar",
27811,
"cus-eas",
"Latn",
entry_name = {remove_diacritics = c.acute},
}
m = {
"Abkhaz",
5111,
"cau-abz",
"Cyrl, Geor, Latn",
translit = {
Cyrl = "ab-translit",
Geor = "Geor-translit",
},
override_translit = true,
display_text = {Cyrl = s},
entry_name = {
Cyrl = s,
Latn = s,
},
sort_key = {
Cyrl = {
from = {
"х'ә", -- 3 chars
"гь", "гә", "ӷь", "ҕь", "ӷә", "ҕә", "дә", "ё", "жь", "жә", "ҙә", "ӡә", "ӡ'", "кь", "кә", "қь", "қә", "ҟь", "ҟә", "ҫә", "тә", "ҭә", "ф'", "хь", "хә", "х'", "ҳә", "ць", "цә", "ц'", "ҵә", "ҵ'", "шь", "шә", "џь", -- 2 chars
"ӷ", "ҕ", "ҙ", "ӡ", "қ", "ҟ", "ԥ", "ҧ", "ҫ", "ҭ", "ҳ", "ҵ", "ҷ", "ҽ", "ҿ", "ҩ", "џ", "ә" -- 1 char
},
to = {
"х" .. p,
"г" .. p, "г" .. p, "г" .. p, "г" .. p, "г" .. p, "г" .. p, "д" .. p, "е" .. p, "ж" .. p, "ж" .. p, "з" .. p, "з" .. p, "з" .. p, "к" .. p, "к" .. p, "к" .. p, "к" .. p, "к" .. p, "к" .. p, "с" .. p, "т" .. p, "т" .. p, "ф" .. p, "х" .. p, "х" .. p, "х" .. p, "х" .. p, "ц" .. p, "ц" .. p, "ц" .. p, "ц" .. p, "ц" .. p, "ш" .. p, "ш" .. p, "ы" .. p,
"г" .. p, "г" .. p, "з" .. p, "з" .. p, "к" .. p, "к" .. p, "п" .. p, "п" .. p, "с" .. p, "т" .. p, "х" .. p, "ц" .. p, "ч" .. p, "ч" .. p, "ч" .. p, "ы" .. p, "ы" .. p, "ь" .. p
}
},
},
}
m = {
"Avestan",
29572,
"ira-cen",
"Avst, Gujr",
translit = {Avst = "Avst-translit"},
wikipedia_article = "Avestan",
}
m = {
"Afrikaans",
14196,
"gmw",
"Latn, Arab",
ancestors = "nl",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'",
from = {"n"},
to = {"n" .. p}
}
},
}
m = {
"Akan",
28026,
"alv-ctn",
"Latn",
}
m = {
"Amharic",
28244,
"sem-eth",
"Ethi",
translit = "Ethi-translit",
}
m = {
"Aragonese",
8765,
"roa-ibe",
"Latn",
ancestors = "roa-oan",
}
m = {
"Arabic",
13955,
"sem-arb",
"Arab, Hebr, Brai",
translit = "ar-translit",
entry_name = {Arab = "ar-entryname"},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
Hebr = {
from = {"^%f"},
to = {u(0xFB21)},
},
},
}
m = {
"Assamese",
29401,
"inc-eas",
"as-Beng",
ancestors = "inc-mas",
translit = "as-translit",
}
m = {
"Avar",
29561,
"cau-ava",
"Cyrl, Latn, Arab",
ancestors = "oav",
translit = {
Cyrl = "cau-nec-translit",
Arab = "ar-translit",
},
override_translit = true,
display_text = {Cyrl = s},
entry_name = {
Cyrl = s,
Latn = s,
},
sort_key = {
Cyrl = {
from = {"гъ", "гь", "гӏ", "ё", "кк", "къ", "кь", "кӏ", "лъ", "лӏ", "тӏ", "хх", "хъ", "хь", "хӏ", "цӏ", "чӏ"},
to = {"г" .. p, "г" .. p, "г" .. p, "е" .. p, "к" .. p, "к" .. p, "к" .. p, "к" .. p, "л" .. p, "л" .. p, "т" .. p, "х" .. p, "х" .. p, "х" .. p, "х" .. p, "ц" .. p, "ч" .. p}
},
},
}
m = {
"Aymara",
4627,
"sai-aym",
"Latn",
}
m = {
"Azerbaijani",
9292,
"trk-ogz",
"Latn, Cyrl, fa-Arab",
ancestors = "trk-oat",
dotted_dotless_i = true,
entry_name = { = {remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun}},
sort_key = {
Latn = {
from = {
"i", -- Ensure "i" comes after "ı".
"ç", "ə", "ğ", "x", "ı", "q", "ö", "ş", "ü", "w"
},
to = {
"i" .. p,
"c" .. p, "e" .. p, "g" .. p, "h" .. p, "i", "k" .. p, "o" .. p, "s" .. p, "u" .. p, "z" .. p
}
},
Cyrl = {
from = {"ғ", "ә", "ы", "ј", "ҝ", "ө", "ү", "һ", "ҹ"},
to = {"г" .. p, "е" .. p, "и" .. p, "и" .. p, "к" .. p, "о" .. p, "у" .. p, "х" .. p, "ч" .. p}
},
},
}
m = {
"Bashkir",
13389,
"trk-kbu",
"Cyrl",
translit = "ba-translit",
override_translit = true,
sort_key = {
from = {"ғ", "ҙ", "ё", "ҡ", "ң", "ө", "ҫ", "ү", "һ", "ә"},
to = {"г" .. p, "д" .. p, "е" .. p, "к" .. p, "н" .. p, "о" .. p, "с" .. p, "у" .. p, "х" .. p, "э" .. p}
},
}
m = {
"Belarusian",
9091,
"zle",
"Cyrl, Latn",
ancestors = "zle-ort",
translit = {Cyrl = "be-translit"},
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"Ć", "ć", "Ń", "ń", "Ś", "ś", "Ź", "ź"},
},
sort_key = {
Cyrl = {
from = {"ґ", "ё", "і", "ў"},
to = {"г" .. p, "е" .. p, "и" .. p, "у" .. p}
},
Latn = {
from = {"ć", "č", "dz", "dź", "dž", "ch", "ł", "ń", "ś", "š", "ŭ", "ź", "ž"},
to = {"c" .. p, "c" .. p, "d" .. p, "d" .. p, "d" .. p, "h" .. p, "l" .. p, "n" .. p, "s" .. p, "s" .. p, "u" .. p, "z" .. p, "z" .. p}
},
},
}
m = {
"Bulgarian",
7918,
"zls",
"Cyrl",
ancestors = "cu",
translit = "bg-translit",
entry_name = {remove_diacritics = c.grave .. c.acute},
}
m = {
"Bihari",
135305,
"inc-eas",
"Deva",
ancestors = "inc-mgd",
}
m = {
"Bislama",
35452,
"crp",
"Latn",
ancestors = "en",
}
m = {
"Bambara",
33243,
"dmn-emn",
"Latn",
sort_key = {
from = {"ɛ", "ɲ", "ŋ", "ɔ"},
to = {"e" .. p, "n" .. p, "n" .. p, "o" .. p}
},
}
m = {
"Bengali",
9610,
"inc-eas",
"Beng, Newa",
ancestors = "inc-mbn",
translit = {Beng = "bn-translit"},
}
m = {
"Tibetan",
34271,
"sit-tib",
"Tibt", -- sometimes Deva?
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
display_text = s,
entry_name = s,
sort_key = "Tibt-sortkey",
}
m = {
"Breton",
12107,
"cel-bry",
"Latn",
ancestors = "xbm",
sort_key = {
from = {"ch", "ch"},
to = {"c" .. p, "c" .. p}
},
}
m = {
"Catalan",
7026,
"roa-ocr",
"Latn",
ancestors = "roa-oca",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
from = {"l·l"},
to = {"ll"}
},
}
m = {
"Chechen",
33350,
"cau-vay",
"Cyrl, Latn, Arab",
translit = {
Cyrl = "cau-nec-translit",
Arab = "ar-translit",
},
override_translit = true,
display_text = {Cyrl = s},
entry_name = {
Cyrl = s,
Latn = s,
},
sort_key = {
Cyrl = {
from = {"аь", "гӏ", "ё", "кх", "къ", "кӏ", "оь", "пӏ", "тӏ", "уь", "хь", "хӏ", "цӏ", "чӏ", "юь", "яь"},
to = {"а" .. p, "г" .. p, "е" .. p, "к" .. p, "к" .. p, "к" .. p, "о" .. p, "п" .. p, "т" .. p, "у" .. p, "х" .. p, "х" .. p, "ц" .. p, "ч" .. p, "ю" .. p, "я" .. p}
},
},
}
m = {
"Chamorro",
33262,
"poz-sus",
"Latn",
sort_key = {
remove_diacritics = "'",
from = {"å", "ch", "ñ", "ng"},
to = {"a" .. p, "c" .. p, "n" .. p, "n" .. p}
},
}
m = {
"Corsican",
33111,
"roa-itd",
"Latn",
sort_key = {
from = {"chj", "ghj", "sc", "sg"},
to = {"c" .. p, "g" .. p, "s" .. p, "s" .. p}
},
}
m = {
"Cree",
33390,
"alg",
"Cans, Latn",
translit = {Cans = "cr-translit"},
}
m = {
"Czech",
9056,
"zlw",
"Latn",
ancestors = "zlw-ocs",
sort_key = {
from = {"á", "č", "ď", "é", "ě", "ch", "í", "ň", "ó", "ř", "š", "ť", "ú", "ů", "ý", "ž"},
to = {"a" .. p, "c" .. p, "d" .. p, "e" .. p, "e" .. p, "h" .. p, "i" .. p, "n" .. p, "o" .. p, "r" .. p, "s" .. p, "t" .. p, "u" .. p, "u" .. p, "y" .. p, "z" .. p}
},
}
m = {
"Old Church Slavonic",
35499,
"zls",
"Cyrs, Glag",
translit = "Cyrs-Glag-translit",
entry_name = {Cyrs = "Cyrs-entryname"},
sort_key = {Cyrs = "Cyrs-sortkey"},
}
m = {
"Chuvash",
33348,
"trk-ogr",
"Cyrl",
ancestors = "xbo",
translit = "cv-translit",
override_translit = true,
sort_key = {
from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"},
to = {"а" .. p, "е" .. p, "е" .. p, "с" .. p, "у" .. p}
},
}
m = {
"Welsh",
9309,
"cel-bry",
"Latn",
ancestors = "wlm",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. "'",
from = {"ch", "dd", "ff", "ng", "ll", "ph", "rh", "th"},
to = {"c" .. p, "d" .. p, "f" .. p, "g" .. p, "l" .. p, "p" .. p, "r" .. p, "t" .. p}
},
standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. c.punc,
}
m = {
"Danish",
9035,
"gmq",
"Latn",
ancestors = "gmq-oda",
sort_key = {
from = {"æ", "ø", "å"},
to = {"z" .. p, "z" .. p, "z" .. p}
},
}
m = {
"German",
188,
"gmw",
"Latn, Latf",
ancestors = "gmh",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove,
from = {"ß"},
to = {"ss"}
},
standardChars = "A-Za-z0-9ÄäÖöÜüß" .. c.punc,
}
m = {
"Dhivehi",
32656,
"inc-ins",
"Thaa, Diak",
ancestors = "elu-prk",
translit = {
Thaa = "dv-translit",
Diak = "Diak-translit",
},
override_translit = true,
}
m = {
"Dzongkha",
33081,
"sit-tib",
"Tibt",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
display_text = s,
entry_name = s,
sort_key = "Tibt-sortkey",
}
m = {
"Ewe",
30005,
"alv-gbe",
"Latn",
sort_key = {
remove_diacritics = c.tilde,
from = {"ɖ", "dz", "ɛ", "ƒ", "gb", "ɣ", "kp", "ny", "ŋ", "ɔ", "ts", "ʋ"},
to = {"d" .. p, "d" .. p, "e" .. p, "f" .. p, "g" .. p, "g" .. p, "k" .. p, "n" .. p, "n" .. p, "o" .. p, "t" .. p, "v" .. p}
},
}
m = {
"Greek",
9129,
"grk",
"Grek, Brai",
ancestors = "grc",
translit = {Grek = "el-translit"},
override_translit = true,
entry_name = {Grek = {remove_diacritics = c.caron .. c.diaerbelow .. c.brevebelow}},
sort_key = {Grek = s},
standardChars = "ͺ;΄-ώϜϝ" .. c.punc,
}
m = {
"English",
1860,
"gmw",
"Latn, Brai, Shaw, Dsrt", -- entries in Shaw or Dsrt might require prior discussion
wikimedia_codes = "en, simple",
ancestors = "enm",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.diaer .. c.ringabove .. c.caron .. c.cedilla .. "'",
from = {"æ", "œ"},
to = {"ae", "oe"}
},
},
standardChars = "A-Za-z0-9" .. c.punc .. u(0x2800) .. "-" .. u(0x28FF),
}
m = {
"Esperanto",
143,
"art",
"Latn",
sort_key = {
remove_diacritics = c.grave .. c.acute,
from = {"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ"},
to = {"c" .. p, "g" .. p, "h" .. p, "j" .. p, "s" .. p, "u" .. p}
},
standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. c.punc,
}
m = {
"Spanish",
1321,
"roa-ibe",
"Latn, Brai",
ancestors = "osp",
sort_key = {
Latn = {
remove_diacritics = c.acute .. c.diaer .. c.cedilla,
from = {"ñ"},
to = {"n" .. p}
},
},
standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. c.punc,
}
m = {
"Estonian",
9072,
"urj-fin",
"Latn",
sort_key = {
from = {
"š", "ž", "õ", "ä", "ö", "ü", -- 2 chars
"z" -- 1 char
},
to = {
"s" .. p, "s" .. p, "w" .. p, "w" .. p, "w" .. p, "w" .. p,
"s" .. p
}
},
}
m = {
"Basque",
8752,
"euq",
"Latn",
sort_key = {
from = {"ç", "ñ"},
to = {"c" .. p, "n" .. p}
},
}
m = {
"Persian",
9168,
"ira-swi",
"fa-Arab",
ancestors = "pal", -- "ira-mid"
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.kashida .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun},
}
m = {
"Fula",
33454,
"alv-fwo",
"Latn, Adlm",
}
m = {
"Finnish",
1412,
"urj-fin",
"Latn",
entry_name = {remove_diacritics = "ˣ"}, -- used to indicate gemination of the next consonant
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.dacute .. c.caron .. c.cedilla .. "':",
from = {"ø", "æ", "œ", "ß"},
to = {"o", "ae", "oe", "ss"}
},
}
m = {
"Fijian",
33295,
"poz-occ",
"Latn",
}
m = {
"Faroese",
25258,
"gmq",
"Latn",
ancestors = "non",
sort_key = {
from = {"á", "ð", "í", "ó", "ú", "ý", "æ", "ø"},
to = {"a" .. p, "d" .. p, "i" .. p, "o" .. p, "u" .. p, "y" .. p, "z" .. p, "z" .. p}
},
}
m = {
"French",
150,
"roa-oil",
"Latn, Brai",
ancestors = "frm",
sort_key = {Latn = s},
standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. c.punc,
}
m = {
"West Frisian",
27175,
"gmw-fri",
"Latn",
ancestors = "ofs",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer,
from = {"y"},
to = {"i"}
},
standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. c.punc,
}
m = {
"Irish",
9142,
"cel-gae",
"Latn, Latg",
ancestors = "mga",
sort_key = {
remove_diacritics = c.acute,
from = {"ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"},
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
},
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. c.punc,
}
m = {
"Scottish Gaelic",
9314,
"cel-gae",
"Latn, Latg",
ancestors = "mga",
sort_key = {remove_diacritics = c.grave .. c.acute},
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. c.punc,
}
m = {
"Galician",
9307,
"roa-ibe",
"Latn",
ancestors = "roa-opt",
sort_key = {
remove_diacritics = c.acute,
from = {"ñ"},
to = {"n" .. p}
},
}
m = {
"Guaraní",
35876,
"tup-gua",
"Latn",
}
m = {
"Gujarati",
5137,
"inc-wes",
"Gujr",
ancestors = "inc-mgu",
translit = "gu-translit",
}
m = {
"Manx",
12175,
"cel-gae",
"Latn",
ancestors = "mga",
sort_key = {remove_diacritics = c.cedilla .. "-"},
standardChars = "A-WYÇa-wyç0-9" .. c.punc,
}
m = {
"Hausa",
56475,
"cdc-wst",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}},
sort_key = {
Latn = {
from = {"ɓ", "b'", "ɗ", "d'", "ƙ", "k'", "sh", "ƴ", "'y"},
to = {"b" .. p, "b" .. p, "d" .. p, "d" .. p, "k" .. p, "k" .. p, "s" .. p, "y" .. p, "y" .. p}
},
},
}
m = {
"Hebrew",
9288,
"sem-can",
"Hebr, Phnx, Brai",
entry_name = {Hebr = {remove_diacritics = u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. c.CGJ}},
}
m = {
"Hindi",
1568,
"inc-hnd",
"Deva, Kthi, Newa",
ancestors = "inc-ohi",
translit = {Deva = "hi-translit"},
standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. c.punc,
}
m = {
"Hiri Motu",
33617,
"crp",
"Latn",
ancestors = "meu",
}
m = {
"Haitian Creole",
33491,
"crp",
"Latn",
ancestors = "fr",
sort_key = {
from = {
"oun", -- 3 chars
"an", "ch", "è", "en", "ng", "ò", "on", "ou", "ui" -- 2 chars
},
to = {
"o" .. p,
"a" .. p, "c" .. p, "e" .. p, "e" .. p, "n" .. p, "o" .. p, "o" .. p, "o" .. p, "u" .. p
}
},
}
m = {
"Hungarian",
9067,
"urj-ugr",
"Latn, Hung",
ancestors = "ohu",
sort_key = {
Latn = {
from = {
"dzs", -- 3 chars
"á", "cs", "dz", "é", "gy", "í", "ly", "ny", "ó", "ö", "ő", "sz", "ty", "ú", "ü", "ű", "zs", -- 2 chars
},
to = {
"d" .. p,
"a" .. p, "c" .. p, "d" .. p, "e" .. p, "g" .. p, "i" .. p, "l" .. p, "n" .. p, "o" .. p, "o" .. p, "o" .. p, "s" .. p, "t" .. p, "u" .. p, "u" .. p, "u" .. p, "z" .. p,
}
},
},
}
m = {
"Armenian",
8785,
"hyx",
"Armn, Brai",
ancestors = "axm",
translit = {Armn = "Armn-translit"},
override_translit = true,
entry_name = {
Armn = {
remove_diacritics = "՛՜՞՟",
from = {"եւ", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"և", "յ", "ի", "է"}
},
},
sort_key = {
Armn = {
from = {
"ու", "եւ", -- 2 chars
"և" -- 1 char
},
to = {
"ւ", "եվ",
"եվ"
}
},
},
}
m = {
"Herero",
33315,
"bnt-swb",
"Latn",
}
m = {
"Interlingua",
35934,
"art",
"Latn",
}
m = {
"Indonesian",
9240,
"poz-mly",
"Latn",
ancestors = "ms",
}
m = {
"Interlingue",
35850,
"art",
"Latn",
type = "appendix-constructed",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
}
m = {
"Igbo",
33578,
"alv-igb",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
sort_key = {
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
to = {"g" .. p, "g" .. p, "g" .. p, "i" .. p, "k" .. p, "k" .. p, "n" .. p, "n" .. p, "n" .. p, "o" .. p, "s" .. p, "u" .. p}
},
}
m = {
"Sichuan Yi",
34235,
"tbq-lol",
"Yiii",
translit = "ii-translit",
}
m = {
"Inupiaq",
27183,
"esx-inu",
"Latn",
sort_key = {
from = {
"ch", "ġ", "dj", "ḷ", "ł̣", "ñ", "ng", "r̂", "sr", "zr", -- 2 chars
"ł", "ŋ", "ʼ" -- 1 char
},
to = {
"c" .. p, "g" .. p, "h" .. p, "l" .. p, "l" .. p, "n" .. p, "n" .. p, "r" .. p, "s" .. p, "z" .. p,
"l" .. p, "n" .. p, "z" .. p
}
},
}
m = {
"Ido",
35224,
"art",
"Latn",
}
m = {
"Icelandic",
294,
"gmq",
"Latn",
ancestors = "non",
sort_key = {
from = {"á", "ð", "é", "í", "ó", "ú", "ý", "þ", "æ", "ö"},
to = {"a" .. p, "d" .. p, "e" .. p, "i" .. p, "o" .. p, "u" .. p, "y" .. p, "z" .. p, "z" .. p, "z" .. p}
},
}
m = {
"Italian",
652,
"roa-itd",
"Latn",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
standardChars = "A-IL-VZa-il-vz0-9ÀàÈèÌìÒòÙùÉé" .. c.punc,
}
m = {
"Inuktitut",
29921,
"esx-inu",
"Cans, Latn",
translit = {Cans = "cr-translit"},
override_translit = true,
}
m = {
"Japanese",
5287,
"jpx",
"Jpan, Brai",
ancestors = "ojp",
--].
sort_key = {
Jpan = {
from = {"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "ナ", "ニ", "", "ネ", "ノ", "", "", "", "", "", "マ", "ミ", "", "メ", "モ", "", "", "", "", "", "", "", "", "", "", "", "", "ン", "", "𛀀"},
to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}
},
}, --]=]
}
m = {
"Javanese",
33549,
"poz-sus",
"Latn, Java",
ancestors = "kaw",
translit = {Java = "jv-translit"},
link_tr = true,
sort_key = {
Latn = {
from = {"dh", "é", "è", "ng", "ny", "th"},
to = {"d" .. p, "e" .. p, "e" .. p, "n" .. p, "n" .. p, "t" .. p}
},
},
}
m = {
"Georgian",
8108,
"ccs-gzn",
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
ancestors = "oge",
translit = {
Geor = "Geor-translit",
Geok = "Geok-translit",
},
override_translit = true,
entry_name = {remove_diacritics = c.circ},
}
m = {
"Kongo",
33702,
"bnt-kng",
"Latn",
}
m = {
"Kikuyu",
33587,
"bnt-kka",
"Latn",
}
m = {
"Kwanyama",
1405077,
"bnt-ova",
"Latn",
}
m = {
"Kazakh",
9252,
"trk-kno",
"Cyrl, Latn, kk-Arab",
translit = {
Cyrl = {
from = {
"Ё", "ё", "Й", "й", -- 2 chars
"А", "а", "Ә", "ә", "Б", "б", "В", "в", "Г", "г", "Ғ", "ғ", "Д", "д", "Е", "е", "Ж", "ж", "З", "з", "И", "и", "К", "к", "Қ", "қ", "Л", "л", "М", "м", "Н", "н", "Ң", "ң", "О", "о", "Ө", "ө", "П", "п", "Р", "р", "С", "с", "Т", "т", "У", "у", "Ұ", "ұ", "Ү", "ү", "Ф", "ф", "Х", "х", "Һ", "һ", "Ц", "ц", "Ч", "ч", "Ш", "ш", "Щ", "щ", "Ъ", "ъ", "Ы", "ы", "І", "і", "Ь", "ь", "Э", "э", "Ю", "ю", "Я", "я", -- 1 char
},
to = {
"İo", "io", "İ", "i",
"A", "a", "Ä", "ä", "B", "b", "V", "v", "G", "g", "Ğ", "ğ", "D", "d", "E", "e", "J", "j", "Z", "z", "İ", "i", "K", "k", "Q", "q", "L", "l", "M", "m", "N", "n", "Ñ", "ñ", "O", "o", "Ö", "ö", "P", "p", "R", "r", "S", "s", "T", "t", "U", "u", "Ū", "ū", "Ü", "ü", "F", "f", "X", "x", "H", "h", "Ts", "ts", "Tş", "tş", "Ş", "ş", "Ştş", "ştş", "", "", "Y", "y", "I", "ı", "", "", "E", "e", "İu", "iu", "İa", "ia",
}
}
},
override_translit = true,
sort_key = {
Cyrl = {
from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ұ", "ү", "һ", "і"},
to = {"а" .. p, "г" .. p, "е" .. p, "к" .. p, "н" .. p, "о" .. p, "у" .. p, "у" .. p, "х" .. p, "ы" .. p}
},
},
}
m = {
"Greenlandic",
25355,
"esx-inu",
"Latn",
sort_key = {
from = {"æ", "ø", "å"},
to = {"z" .. p, "z" .. p, "z" .. p}
}
}
m = {
"Khmer",
9205,
"mkh-kmr",
"Khmr",
ancestors = "xhm",
translit = "km-translit",
}
m = {
"Kannada",
33673,
"dra",
"Knda",
ancestors = "dra-mkn",
translit = "kn-translit",
}
m = {
"Korean",
9176,
"qfa-kor",
"Kore, Brai",
ancestors = "ko-ear",
translit = {Kore = "ko-translit"},
entry_name = {Kore = s},
}
m = {
"Kanuri",
36094,
"ssa-sah",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}}, -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
sort_key = {
Latn = {
from = {"ǝ", "ny", "ɍ", "sh"},
to = {"e" .. p, "n" .. p, "r" .. p, "s" .. p}
},
},
}
m = {
"Kashmiri",
33552,
"inc-dar",
"ks-Arab, Deva, Shrd, Latn",
translit = {
= "ks-Arab-translit",
Deva = "ks-Deva-translit",
Shrd = "Shrd-translit",
},
}
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
m = {
"Cornish",
25289,
"cel-bry",
"Latn",
ancestors = "cnx",
sort_key = {
from = {"ch"},
to = {"c" .. p}
},
}
m = {
"Kyrgyz",
9255,
"trk-kip",
"Cyrl, Latn, Arab",
translit = {Cyrl = "ky-translit"},
override_translit = true,
sort_key = {
Cyrl = {
from = {"ё", "ң", "ө", "ү"},
to = {"е" .. p, "н" .. p, "о" .. p, "у" .. p}
},
},
}
m = {
"Latin",
397,
"itc",
"Latn, Ital",
entry_name = {Latn = {remove_diacritics = c.macron .. c.breve .. c.diaer .. c.dinvbreve}},
sort_key = {
Latn = {
from = {"æ", "œ"},
to = {"ae", "oe"}
},
},
standardChars = "A-Za-z0-9Æ挜Ā-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. c.macron .. c.breve .. c.punc,
}
m = {
"Luxembourgish",
9051,
"gmw",
"Latn",
ancestors = "gmw-cfr",
sort_key = {
from = {"ä", "ë", "é"},
to = {"z" .. p, "z" .. p, "z" .. p}
},
}
m = {
"Luganda",
33368,
"bnt-nyg",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
from = {"ŋ"},
to = {"n" .. p}
},
}
m = {
"Limburgish",
102172,
"gmw",
"Latn",
ancestors = "dum",
}
m = {
"Lingala",
36217,
"bnt-bmo",
"Latn",
sort_key = {
remove_diacritics = c.acute .. c.circ .. c.caron,
from = {"ɛ", "gb", "mb", "mp", "nd", "ng", "nk", "ns", "nt", "ny", "nz", "ɔ"},
to = {"e" .. p, "g" .. p, "m" .. p, "m" .. p, "n" .. p, "n" .. p, "n" .. p, "n" .. p, "n" .. p, "n" .. p, "n" .. p, "o" .. p}
},
}
m = {
"Lao",
9211,
"tai-swe",
"Laoo",
translit = "lo-translit",
sort_key = "Laoo-sortkey",
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
}
m = {
"Lithuanian",
9083,
"bat",
"Latn",
ancestors = "olt",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.tilde},
sort_key = {
from = {"ą", "č", "ę", "ė", "į", "y", "š", "ų", "ū", "ž"},
to = {"a" .. p, "c" .. p, "e" .. p, "e" .. p, "i" .. p, "i" .. p, "s" .. p, "u" .. p, "u" .. p, "z" .. p}
},
}
m = {
"Luba-Katanga",
36157,
"bnt-lub",
"Latn",
}
m = {
"Latvian",
9078,
"bat",
"Latn",
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
from = {"()" .. c.cedilla, "", "()" .. c.tilde .."?()" .. c.tilde .. "?()", "()" .. c.tilde .."?()" .. c.tilde .."?$", "()" .. c.tilde .. "?()" .. c.tilde .. "?", "()" .. c.tilde, c.tilde},
to = {"%1", c.tilde, "%1%2%3", "%1%2", "%1%2", "%1" .. c.macron}
},
sort_key = {
from = {"ā", "č", "ē", "ģ", "ī", "ķ", "ļ", "ņ", "š", "ū", "ž"},
to = {"a" .. p, "c" .. p, "e" .. p, "g" .. p, "i" .. p, "k" .. p, "l" .. p, "n" .. p, "s" .. p, "u" .. p, "z" .. p}
},
}
m = {
"Malagasy",
7930,
"poz-bre",
"Latn",
}
m = {
"Marshallese",
36280,
"poz-mic",
"Latn",
sort_key = {
from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"},
to = {"a" .. p, "l" .. p, "m" .. p, "n" .. p, "n" .. p, "o" .. p, "o" .. p, "u" .. p}
},
}
m = {
"Maori",
36451,
"poz-pep",
"Latn",
sort_key = {
remove_diacritics = c.macron,
from = {"ng", "wh"},
to = {"z" .. p, "z" .. p}
},
}
m = {
"Macedonian",
9296,
"zls",
"Cyrl",
translit = "mk-translit",
entry_name = {
remove_diacritics = c.acute,
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
},
sort_key = {
remove_diacritics = c.grave,
from = {"ѓ", "ѕ", "ј", "љ", "њ", "ќ", "џ"},
to = {"д" .. p, "з" .. p, "и" .. p, "л" .. p, "н" .. p, "т" .. p, "ч" .. p}
},
}
m = {
"Malayalam",
36236,
"dra",
"Mlym",
translit = "ml-translit",
override_translit = true,
}
m = {
"Mongolian",
9246,
"xgn",
"Cyrl, Mong, Latn",
ancestors = "cmg",
translit = {
Cyrl = "mn-translit",
Mong = "Mong-translit",
},
override_translit = true,
display_text = {Mong = s},
entry_name = {
Cyrl = {remove_diacritics = c.grave .. c.acute},
Mong = s,
},
sort_key = {
Cyrl = {
remove_diacritics = c.grave,
from = {"ё", "ө", "ү"},
to = {"е" .. p, "о" .. p, "у" .. p}
},
},
standardChars = "A-PR-UX-Za-pr-ux-zÇÖÜçöüŞşƟƵƶɵЁА-ШЫ-шы-яёҮүӨө—᠊-᠙ᠠ-ᡂ" .. c.punc,
}
-- "mo" IS TREATED AS "ro", SEE WT:LT
m = {
"Marathi",
1571,
"inc-sou",
"Deva, Modi",
ancestors = "omr",
translit = {
Deva = "mr-translit",
Modi = "mr-Modi-translit",
},
entry_name = {
Deva = {
from = {"च़", "ज़", "झ़"},
to = {"च", "ज", "झ"}
},
},
}
m = {
"Malay",
9237,
"poz-mly",
"Latn, ms-Arab",
}
m = {
"Maltese",
9166,
"sem-arb",
"Latn",
ancestors = "sqr",
sort_key = {
from = {
"ċ", "ġ", "ż", -- Convert into PUA so that decomposed form does not get caught by the next step.
"()", -- Ensure "c" comes after "ċ", "g" comes after "ġ" and "z" comes after "ż".
"g" .. p .. "ħ", -- "għ" after initial conversion of "g".
p, p, "ħ", "ie", p -- Convert "ċ", "ġ", "ħ", "ie", "ż" into final output.
},
to = {
p, p, p,
"%1" .. p,
"g" .. p,
"c", "g", "h" .. p, "i" .. p, "z"
}
},
}
m = {
"Burmese",
9228,
"tbq-brm",
"Mymr",
ancestors = "obr",
translit = "my-translit",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}
},
}
m = {
"Nauruan",
13307,
"poz-mic",
"Latn",
}
m = {
"Norwegian Bokmål",
25167,
"gmq",
"Latn",
wikimedia_codes = "no",
ancestors = "gmq-mno",
sort_key = s,
}
m = {
"Northern Ndebele",
35613,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Nepali",
33823,
"inc-pah",
"Deva, Newa",
translit = {Deva = "ne-translit"},
}
m = {
"Ndonga",
33900,
"bnt-ova",
"Latn",
}
m = {
"Dutch",
7411,
"gmw",
"Latn",
ancestors = "dum",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
standardChars = "A-Za-z0-9" .. c.punc .. u(0x2800) .. "-" .. u(0x28FF),
}
m = {
"Norwegian Nynorsk",
25164,
"gmq",
"Latn",
ancestors = "gmq-mno",
sort_key = s,
}
m = {
"Norwegian",
9043,
"gmq",
"Latn",
ancestors = "gmq-mno",
sort_key = s,
}
m = {
"Southern Ndebele",
36785,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Navajo",
13310,
"apa",
"Latn",
sort_key = {
remove_diacritics = c.acute .. c.ogonek,
from = {
"chʼ", "tłʼ", "tsʼ", -- 3 chars
"ch", "dl", "dz", "gh", "hw", "kʼ", "kw", "sh", "tł", "ts", "zh", -- 2 chars
"ł", "ʼ" -- 1 char
},
to = {
"c" .. p, "t" .. p, "t" .. p,
"c" .. p, "d" .. p, "d" .. p, "g" .. p, "h" .. p, "k" .. p, "k" .. p, "s" .. p, "t" .. p, "t" .. p, "z" .. p,
"l" .. p, "z" .. p
}
},
}
m = {
"Chichewa",
33273,
"bnt-nys",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
from = {"ng'"},
to = {"ng"}
},
}
m = {
"Occitan",
14185,
"roa-ocr",
"Latn, Hebr",
ancestors = "pro",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
from = {"()·h"},
to = {"%1h"}
},
},
}
m = {
"Ojibwe",
33875,
"alg",
"Cans, Latn",
sort_key = {
Latn = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a" .. p, "h" .. p, "i" .. p, "o" .. p, "s" .. p, "z" .. p}
},
},
}
m = {
"Oromo",
33864,
"cus-eas",
"Latn, Ethi",
}
m = {
"Oriya",
33810,
"inc-eas",
"Orya",
ancestors = "inc-mor",
translit = "or-translit",
}
m = {
"Ossetian",
33968,
"xsc",
"Cyrl, Geor, Latn",
ancestors = "oos",
translit = {
Cyrl = "os-translit",
Geor = "Geor-translit",
},
override_translit = true,
entry_name = {Cyrl = {remove_diacritics = c.grave .. c.acute}},
sort_key = {
Cyrl = {
from = {"ӕ", "гъ", "дж", "дз", "ё", "къ", "пъ", "тъ", "хъ", "цъ", "чъ"},
to = {"а" .. p, "г" .. p, "д" .. p, "д" .. p, "е" .. p, "к" .. p, "п" .. p, "т" .. p, "х" .. p, "ц" .. p, "ч" .. p}
},
},
}
m = {
"Punjabi",
58635,
"inc-pan",
"Guru, pa-Arab",
ancestors = "inc-opa",
translit = {
Guru = "Guru-translit",
= "pa-Arab-translit",
},
entry_name = {
= {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
from = {"ݨ", "ࣇ"},
to = {"ن", "ل"}
},
},
}
m = {
"Pali",
36727,
"inc-mid",
"Latn, Brah, Deva, Beng, Sinh, Mymr, Thai, Lana, Laoo, Khmr, Cakm",
ancestors = "sa",
translit = {
Brah = "Brah-translit",
Deva = "sa-translit",
Beng = "pi-translit",
Sinh = "si-translit",
Mymr = "pi-translit",
Thai = "pi-translit",
Lana = "pi-translit",
Laoo = "pi-translit",
Khmr = "pi-translit",
Cakm = "Cakm-translit",
},
entry_name = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
},
remove_diacritics = c.VS01
},
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
from = {"ā", "ī", "ū", "ḍ", "ḷ", "m", "ṅ", "ñ", "ṇ", "ṭ", "()()", "()()", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "()ᩛ", "()ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}
},
}
m = {
"Polish",
809,
"zlw-lch",
"Latn",
ancestors = "zlw-opl",
sort_key = {
from = {"ą", "ć", "ę", "ł", "ń", "ó", "ś", "ź", "ż"},
to = {"a" .. p, "c" .. p, "e" .. p, "l" .. p, "n" .. p, "o" .. p, "s" .. p, "z" .. p, "z" .. p}
},
}
m = {
"Pashto",
58680,
"ira-pat",
"ps-Arab",
}
m = {
"Portuguese",
5146,
"roa-ibe",
"Latn, Brai",
ancestors = "roa-opt",
sort_key = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.cedilla}},
}
m = {
"Quechua",
5218,
"qwe",
"Latn",
}
m = {
"Romansch",
13199,
"roa-rhe",
"Latn",
}
m = {
"Romanian",
7913,
"roa-eas",
"Latn, Cyrl",
sort_key = {
Latn = {
from = {"ă", "â", "î", "ș", "ț"},
to = {"a" .. p, "a" .. p, "i" .. p, "s" .. p, "t" .. p}
},
Cyrl = {
from = {"ӂ"},
to = {"ж" .. p}
},
},
}
m = {
"Russian",
7737,
"zle",
"Cyrl, Brai",
translit = {Cyrl = "ru-translit"},
entry_name = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_exceptions = {"Ё", "ё"}
},
},
sort_key = {
Cyrl = {
from = {"ё", "і", "ѣ", "ѳ", "ѵ"},
to = {"е" .. p, "и" .. p, "ь" .. p, "я" .. p, "я" .. p}
},
},
standardChars = "ЁА-яё0-9—" .. c.punc,
}
m = {
"Rwanda-Rundi",
3217514,
"bnt-glb",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Sanskrit",
11059,
"inc-old",
"Deva, Bali, as-Beng, Beng, Bhks, Brah, Gran, Gujr, Guru, Hani, Java, Kawi, Khar, Khmr, Knda, Lana, Laoo, Marc, Mlym, Modi, Mong, mnc-Mong, xwo-Mong, Mymr, Nand, Newa, Orya, Phag, Ranj, Saur, Shrd, Sidd, Sinh, Taml, Tang, Telu, Thai, Tibt, Tirh",
translit = {
Deva = "sa-translit",
= "sa-Beng-translit",
Beng = "sa-Beng-translit",
Brah = "Brah-translit",
Gujr = "sa-Gujr-translit",
Java = "sa-Java-translit",
Khmr = "pi-translit",
Knda = "sa-Knda-translit",
Lana = "pi-translit",
Laoo = "pi-translit",
Modi = "sa-Modi-translit",
Mong = "Mong-translit",
= "mnc-translit",
= "xal-translit",
Mymr = "pi-translit",
Orya = "sa-Orya-translit",
Sinh = "si-translit",
Thai = "pi-translit",
Tibt = "Tibt-translit",
},
display_text = {
},
entry_name = {
Mong = s,
Tibt = s,
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
},
remove_diacritics = c.VS01
},
sort_key = {
Tibt = "Tibt-sortkey",
{ -- FIXME: This needs to be converted into the current standardized format.
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "()()", "()()", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "()ᩛ", "()ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
},
},
}
m = {
"Sardinian",
33976,
"roa",
"Latn",
}
m = {
"Sindhi",
33997,
"inc-snd",
"sd-Arab, Deva, Sind, Khoj",
translit = {Sind = "Sind-translit"},
entry_name = {
= {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
from = {"ٱ"},
to = {"ا"}
},
},
ancestors = "inc-vra",
}
m = {
"Northern Sami",
33947,
"smi",
"Latn",
display_text = {
from = {"'"},
to = {"ˈ"}
},
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
sort_key = {
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
to = {"a" .. p, "c" .. p, "d" .. p, "n" .. p, "s" .. p, "t" .. p, "z" .. p}
},
standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. c.punc,
}
m = {
"Sango",
33954,
"crp",
"Latn",
ancestors = "ngb",
}
m = {
"Serbo-Croatian",
9301,
"zls",
"Latn, Cyrl, Glag",
wikimedia_codes = "sh, bs, hr, sr",
entry_name = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_exceptions = {"Ć", "ć", "Ś", "ś", "Ź", "ź"}
},
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_exceptions = {"З́", "з́", "С́", "с́"}
},
},
sort_key = {
Latn = {
from = {"č", "ć", "dž", "đ", "lj", "nj", "š", "ś", "ž", "ź"},
to = {"c" .. p, "c" .. p, "d" .. p, "d" .. p, "l" .. p, "n" .. p, "s" .. p, "s" .. p, "z" .. p, "z" .. p}
},
Cyrl = {
from = {"ђ", "з́", "ј", "љ", "њ", "с́", "ћ", "џ"},
to = {"д" .. p, "з" .. p, "и" .. p, "л" .. p, "н" .. p, "с" .. p, "т" .. p, "ч" .. p}
},
},
}
m = {
"Sinhalese",
13267,
"inc-ins",
"Sinh",
ancestors = "elu-prk",
translit = "si-translit",
override_translit = true,
}
m = {
"Slovak",
9058,
"zlw",
"Latn",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer},
}
m = {
"Slovene",
9063,
"zls",
"Latn",
entry_name = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
from = {"Ə", "ə", "Ł", "ł"},
to = {"E", "e", "L", "l"}
},
sort_key = {
remove_diacritics = c.tilde .. c.dotabove .. c.diaer .. c.ringabove .. c.ringbelow .. c.ogonek,
from = {"č", "š", "ž"},
to = {"c" .. p, "s" .. p, "z" .. p}
},
}
m = {
"Samoan",
34011,
"poz-pnp",
"Latn",
}
m = {
"Shona",
34004,
"bnt-sho",
"Latn",
entry_name = {remove_diacritics = c.acute},
}
m = {
"Somali",
13275,
"cus-eas",
"Latn, Arab, Osma",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}},
}
m = {
"Albanian",
8748,
"sqj",
"Latn, Grek, Elba",
entry_name = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.circ .. c.tilde .. c.diaer .. c.cedilla},
}
m = {
"Swazi",
34014,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Sotho",
34340,
"bnt-sts",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Sundanese",
34002,
"poz-msa",
"Latn, Sund",
ancestors = "osn",
translit = {Sund = "su-translit"},
}
m = {
"Swedish",
9027,
"gmq",
"Latn",
ancestors = "gmq-osw",
}
m = {
"Swahili",
7838,
"bnt-swh",
"Latn, Arab",
sort_key = {
Latn = {
from = {"ng'"},
to = {"ng" .. p}
},
},
}
m = {
"Tamil",
5885,
"dra",
"Taml",
ancestors = "oty",
translit = "ta-translit",
override_translit = true,
}
m = {
"Telugu",
8097,
"dra",
"Telu",
translit = "te-translit",
override_translit = true,
}
m = {
"Tajik",
9260,
"ira-swi",
"Cyrl, fa-Arab, Latn",
ancestors = "fa-cls",
translit = {Cyrl = "tg-translit"},
override_translit = true,
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
Cyrl = {
from = {"ғ", "ё", "ӣ", "қ", "ӯ", "ҳ", "ҷ"},
to = {"г" .. p, "е" .. p, "и" .. p, "к" .. p, "у" .. p, "х" .. p, "ч" .. p}
},
},
}
m = {
"Thai",
9217,
"tai-swe",
"Thai, Brai",
translit = {Thai = "th-translit"},
sort_key = {Thai = "Thai-sortkey"},
}
m = {
"Tigrinya",
34124,
"sem-eth",
"Ethi",
translit = "Ethi-translit",
}
m = {
"Turkmen",
9267,
"trk-ogz",
"Latn, Cyrl, Arab",
entry_name = {remove_diacritics = c.macron},
sort_key = {
Latn = {
from = {"ç", "ä", "ž", "ň", "ö", "ş", "ü", "ý"},
to = {"c" .. p, "e" .. p, "j" .. p, "n" .. p, "o" .. p, "s" .. p, "u" .. p, "y" .. p}
},
Cyrl = {
from = {"ё", "җ", "ң", "ө", "ү", "ә"},
to = {"е" .. p, "ж" .. p, "н" .. p, "о" .. p, "у" .. p, "э" .. p}
},
},
}
m = {
"Tagalog",
34057,
"phi",
"Latn, Tglg",
translit = {Tglg = "tl-translit"},
override_translit = true,
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}},
}
m = {
"Tswana",
34137,
"bnt-sts",
"Latn",
}
m = {
"Tongan",
34094,
"poz-pol",
"Latn",
entry_name = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.macron},
}
m = {
"Turkish",
256,
"trk-ogz",
"Latn",
ancestors = "ota",
dotted_dotless_i = true,
sort_key = {
from = {
"i", -- Ensure "i" comes after "ı".
"ç", "ğ", "ı", "ö", "ş", "ü"
},
to = {
"i" .. p,
"c" .. p, "g" .. p, "i", "o" .. p, "s" .. p, "u" .. p
}
},
}
m = {
"Tsonga",
34327,
"bnt-tsr",
"Latn",
}
m = {
"Tatar",
25285,
"trk-kbu",
"Cyrl, Latn, tt-Arab",
translit = {Cyrl = "tt-translit"},
override_translit = true,
dotted_dotless_i = true,
sort_key = {
Cyrl = {
from = {"ә", "ў", "ғ", "ё", "җ", "қ", "ң", "ө", "ү", "һ"},
to = {"а" .. p, "в" .. p, "г" .. p, "е" .. p, "ж" .. p, "к" .. p, "н" .. p, "о" .. p, "у" .. p, "х" .. p}
},
Latn = {
from = {
"i", -- Ensure "i" comes after "ı".
"ä", "ə", "ç", "ğ", "ı", "ñ", "ŋ", "ö", "ɵ", "ş", "ü"
},
to = {
"i" .. p,
"a" .. p, "a" .. p, "c" .. p, "g" .. p, "i", "n" .. p, "n" .. p, "o" .. p, "o" .. p, "s" .. p, "u" .. p
}
},
},
}
-- "tw" IS TREATED AS "ak", SEE WT:LT
m = {
"Tahitian",
34128,
"poz-pep",
"Latn",
}
m = {
"Uyghur",
13263,
"trk-kar",
"ug-Arab, Latn, Cyrl",
ancestors = "chg",
translit = {
= "ug-translit",
Cyrl = "ug-translit",
},
override_translit = true,
}
m = {
"Ukrainian",
8798,
"zle",
"Cyrl",
ancestors = "zle-ort",
translit = "uk-translit",
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
from = {
"ї", -- 2 chars
"ґ", "є", "і" -- 1 char
},
to = {
"и" .. p,
"г" .. p, "е" .. p, "и" .. p
}
},
standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. c.punc,
}
m = {
"Urdu",
1617,
"inc-hnd",
"ur-Arab",
ancestors = "inc-ohi",
entry_name = {remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna},
-- put Judeo-Urdu (Hebrew-script Urdu) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
from = {"^%f"},
to = {u(0xFB21)},
},
}
m = {
"Uzbek",
9264,
"trk-kar",
"Latn, Cyrl, fa-Arab",
ancestors = "chg",
translit = {Cyrl = "uz-translit"},
sort_key = {
Latn = {
from = {"oʻ", "gʻ", "sh", "ch", "ng"},
to = {"z" .. p, "z" .. p, "z" .. p, "z" .. p, "z" .. p}
},
Cyrl = {
from = {"ё", "ў", "қ", "ғ", "ҳ"},
to = {"е" .. p, "я" .. p, "я" .. p, "я" .. p, "я" .. p}
},
},
}
m = {
"Venda",
32704,
"bnt-bso",
"Latn",
}
m = {
"Vietnamese",
9199,
"mkh-vie",
"Latn, Hani",
ancestors = "mkh-mvi",
sort_key = {
Latn = "vi-sortkey",
Hani = "Hani-sortkey",
},
}
m = {
"Volapük",
36986,
"art",
"Latn",
}
m = {
"Walloon",
34219,
"roa-oil",
"Latn",
ancestors = "fro",
sort_key = s,
}
m = {
"Wolof",
34257,
"alv-fwo",
"Latn, Arab",
}
m = {
"Xhosa",
13218,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m = {
"Yiddish",
8641,
"gmw",
"Hebr",
ancestors = "gmh",
translit = "yi-translit",
sort_key = {
from = {"א", "בּ", "ו", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}
},
}
m = {
"Yoruba",
34311,
"alv-yor",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}},
sort_key = {
Latn = {
from = {"ẹ", "gb", "ọ", "ṣ"},
to = {"e" .. p, "g" .. p, "o" .. p, "s" .. p}
},
},
}
m = {
"Zhuang",
13216,
"tai",
"Latn, Hani",
sort_key = {
Latn = "za-sortkey",
Hani = "Hani-sortkey",
},
}
m = {
"Chinese",
7850,
"zhx",
"Hani, Hant, Hans, Latn, Bopo, Nshu, Brai",
ancestors = "ltc",
generate_forms = "zh-generateforms",
sort_key = {Hani = "Hani-sortkey"},
}
m = {
"Zulu",
10179,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
return m