A modult a Modul:0IPA/data/doc lapon tudod dokumentálni
local data = {}
--[=[
This should list the language codes of all languages that have a pronunciation
page in the appendix of the form ''Appendix:LANG pronunciation'', e.g.
]. For these languages, the text "key" next to
the generated pronunciation links to such pages; for other languages, it links
to the "LANG phonology" page in Wikipedia (which may or may not exist).
] is responsible for this linking; see format_IPA_full().
]=]
local langs_with_infopages = {
"acw",
"ady",
"ang",
"arc",
"ba",
"bg",
"bo",
"ca",
"cho",
"cmn",
"cs",
"cv",
"cy",
"da",
"de",
"dsb",
"dz",
"egl",
"egy",
"el",
"en",
"enm",
"eo",
"es",
"fa",
"fi",
"fo",
"fr",
"fy",
"ga",
"gd",
"gem-pro",
"got",
"he",
"hi",
"hu",
"hy",
"ii",
"is",
"it",
"iu",
"ja",
"jbo",
"ka",
"kls",
"ko",
"kw",
"la",
"lb",
"liv",
"lt",
"lv",
"mdf",
"mfe",
"mic",
"mk",
"ms",
"mt",
"mul",
"my",
"nan",
"nci",
"nl",
"nn",
"no",
"nov",
"nv",
"pjt",
"pl",
"ps",
"pt",
"ro",
"ru",
"scn",
"sco",
"sga",
"sh",
"sl",
"sq",
"sv",
"sw",
"syc",
"th",
"tl",
"tr",
"tyv",
"ug",
"uk",
"vi",
"vo",
"wlm",
"yi",
"yue",
}
data.langs_with_infopages = {}
-- Convert the list in `langs_with_infopages` to a set.
for _, langcode in ipairs(langs_with_infopages) do
data.langs_with_infopages = true
end
--[=[
This should list the diphthongs of a language (in the form of Lua patterns),
provided they do *NOT* contain semivowel symbols such as /j w ɰ ɥ/ or vowels
with nonsyllabic diacritics such as /i̯ u̯/. For example, list /au/ or /aʊ/,
but do not list /aw/ or /au̯/. The data in this table is used to count the
number of syllables in a word. ] automatically knows how
to correctly handle semivowel symbols and nonsyllabic diacritics.
Any language listed here will automatically have categories of the form
"LANG #-syllable words" generated. In addition, any language listed below under
`langs_to_generate_syllable_count_categories` will also have such categories
generated.
NOTE: There are some additional languages that have these categories.
For example:
* Thai words have these categories added by ].
]=]
data.diphthongs = {
= { -- ]
"u",
},
= {
"a",
"ɔ",
},
= { -- from ] mostly, but /ʌɪ/ is from the OED
"",
"e",
"ʉ",
"ʊ",
"æo",
"ə", -- /iə/ is a diphthong in NZE, but a disyllabic sequence in GA.
-- /ɪə/ is both a disyllabic sequence and a diphthong in old-fashioned RP.
"ə", -- May be a disyllabic sequence in some or all dialects?
},
= {
"i",
"u",
"ː",
},
= { -- ]
"", -- Wikipedia is oddly specific about the second element: ei and ai, but øɪ.
"u",
},
= {
"i",
"u",
},
= {
"i",
"u",
"e",
},
= {
"ə",
"ɪ",
"ʊ",
},
}
--[=[
This should list any languages for which categories of the form
"LANG #-syllable words", e.g. ], should be
generated. Do not list languages here if they have an entry above under
`data.diphthongs`; such languages are automatically added to this list.
]=]
local langs_to_generate_syllable_count_categories = {
"ar", -- Arabic has diphthongs, but they are transcribed
-- with semivowel symbols.
"ary", -- Moroccan Arabic has diphthongs, but they are transcribed
-- with semivowel symbols.
"ca", -- Catalan has diphthongs, but they are generally transcribed using
-- /w/ and /j/, so do not need to be listed (see ].
"es", -- Spanish has diphthongs, but they are transcribed with i̯ etc.
"fi", -- Finnish has diphthongs, but they are now automatically transcribed with
-- the nonsyllabic diacritic
"fr", -- French has diphthongs, but they are transcribed
-- with semivowel symbols: ].
"ka",
"kmr",
"ku",
"mk",
"mt", -- Maltese has diphthongs, but they are transcribed
-- with semivowel symbols.
"pl", -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.
"pt", -- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.
"ru", -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
"sk", -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
"sl", -- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though
"sq", -- ] doesn't mention anything about diphthongs.
"ug", -- No diphthongs.
}
data.langs_to_generate_syllable_count_categories = {}
-- Convert the list in `langs_to_generate_syllable_count_categories` to a set.
for _, langcode in ipairs(langs_to_generate_syllable_count_categories) do
data.langs_to_generate_syllable_count_categories = true
end
-- Also add languages listed under `data.diphthongs`.
for langcode, _ in pairs(data.diphthongs) do
data.langs_to_generate_syllable_count_categories = true
end
-- Languages to use the phonetic not phonemic notation to compute syllables counts.
local langs_to_use_phonetic_notation = {
"es",
"mk",
"ru",
}
data.langs_to_use_phonetic_notation = {}
-- Convert the list in `langs_to_use_phonetic_notation` to a set.
for _, langcode in ipairs(langs_to_use_phonetic_notation) do
data.langs_to_use_phonetic_notation = true
end
-- Non-standard or obsolete IPA symbols.
data.nonstandard = {
--[[ The following symbols consist of more than one character,
so we can't put them in the line below. ]]
"ɑ̢", "d̂", "t̂", "n̂", "l̂", "k̫", "ɔ̗", "ɔ̖",
"[ʦʣʧʤʨʥ?ƍσƺƪƻƾƞᶀᶁᶂᶃᶄᶅᶆᶈᶇᶉᶊᶋƫᶌᶍᶎʓʆλƛłščžǰǧǯẋᵻᵿⱻʚ"
.. "ɷωıȹȸ∅ØƥƭƈƙʠʇʗʖʞɩɼȣяɿʅʮʯᴀᴀᴇGRŒQȡȶȵȴKPT]"
}
-- See valid IPA characters at ].
data.phonemes = {}
data.phonemes = {
"m", "n", "ŋ",
"p", "t", "ʈ", "k",
"pʰ", "tʰ", "ʈʰ", "kʰ",
"t͡s", "t͡ɕ",
"t͡sʰ", "t͡ɕʰ",
"w", "s", "z", "ɬ", "l", "r", "ɕ", "ʑ", "j", "h",
"ɑ", "e", "i", "o", "u",
"ɑː", "eː", "ɛː", "iː", "oː", "øː", "uː", "yː",
"ɑ˥", "e˥", "i˥", "o˥", "u˥",
"ɑː˥", "eː˥", "ɛː˥", "iː˥", "oː˥", "øː˥", "uː˥", "yː˥",
"m˥", "n˥", "ŋ˥", "p˥", "k˥", "k̚˥", "w˥", "l˥", "r˥", "ɕ˥", "j˥", ")˥",
"ɑ˩", "e˩", "i˩", "o˩", "u˩",
"ɑː˩", "eː˩", "ɛː˩", "iː˩", "oː˩", "øː˩", "uː˩", "yː˩",
"m˩", "n˩", "ŋ˩", "p˩", "k˩", "k̚˩", "w˩", "l˩", "r˩", "ɕ˩", "j˩", ")˩",
".", ",", "-",
}
data.phonemes = {
"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",
"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",
"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
"ˈ", ".", " ", "-",
}
data.phonemes = {
"ɑ", "b", "ɡ", "d", "ɛ", "z", "ɛ", "ə", "tʰ", "ʒ", "i", "l", "χ", "t͡s",
"k", "h", "d͡z", "ʁ", "t͡ʃ", "m", "j", "n", "ʃ", "ɔ", "t͡ʃʰ", "p", "d͡ʒ",
"r", "s", "v", "t", "ɾ", "t͡sʰ", "v", "pʰ", "kʰ", "ɔ", "f", "ŋɡ", "ŋk",
"ŋχ", "u", "ˈ", "ˌ", ".", " ", "ː",
}
data.phonemes = {
"m", "n", "ŋ",
"p", "b", "t", "d", "k", "ɡ",
"f", "v", "s", "z", "ʃ", "ʒ", "x", "ɣ", "ɦ",
"ʋ", "l", "j", "r",
"ɪ", "ʏ", "ɛ", "ə", "ɔ", "ɑ",
"i", "iː", "y", "yː", "u", "uː", "eː", "øː", "oː", "ɛː", "œː", "ɔː", "aː",
"ɛi̯", "œy̯", "ɔi̯", "ɑu̯", "ɑi̯",
"iu̯", "yu̯", "ui̯", "eːu̯", "oːi̯", "aːi̯",
"ˈ", "ˌ", ".", " ", "-",
}
data.phonemes = {
"m", "n",
"p", "t", "k", "ʔ",
"b", "d", "ɡ",
"t͡s", "t͡ʃ",
"d͡z", "d͡ʒ",
"f", "s", "ʃ", "ħ",
"v", "z", "ʒ", "ɣ",
"l", "j", "w",
"r",
"ɪ", "ɛ", "ɔ", "a", "u",
"ɛˤ", "ɔˤ", "aˤ", "əˤ",
"ɛˤː", "ɔˤː", "aˤː", "əˤː", "ɪˤː",
"iː", "ɪː", "ɛː", "ɔː", "aː", "uː",
"ˈ", "ˌ", ".", " ", "‿", "-"
}
return data