Module:IPA/data

Hello, you have come here looking for the meaning of the word Module:IPA/data. In DICTIOUS you will not only get to know all the dictionary meanings for the word Module:IPA/data, but we will also tell you about its etymology, its characteristics and you will know how to say Module:IPA/data in singular and plural. Everything you need to know about the word Module:IPA/data you have here. The definition of the word Module:IPA/data will help you to be more precise and correct when speaking or writing your texts. Knowing the definition ofModule:IPA/data, as well as those of other words, enriches your vocabulary and provides you with more and better linguistic resources.

This module provides a list of the languages that have pronunciation info pages, a list of nonstandard IPA symbols, and some lists of phonemes that are used in the phonemic transcriptions of particular languages. These lists are used by Module:IPA to link to pronunciation info pages, to categorize entries that use nonstandard IPA symbols using the nonstandard symbol as sortkey, and to color incorrect phonemic symbols red and categorize entries that use the incorrect phonemic symbols in language-specific categories.

For data associated with individual IPA symbols, see Module:IPA/data/symbols. For the table used by the function that converts X-SAMPA to IPA, see Module:IPA/data/X-SAMPA.

See also: Module:IPA/testcases

nonstandard

m.nonstandard

local list_to_set = require("Module:table").listToSet

local data = {}

--[=[
A list of representation types (e.g. /foo/ for phonemic and  for phonetic),
given as a table. The key is the opening symbol, the first value the
representation type, and the second value the closing symbol.
]=]
data.representation_types = {
	 = {"phonemic", "/"},
	 = {"phonetic", "]"},
	 = {"morphophonemic", "⫽"},
	 = {"orthographic", "⟩"},
	 = {"rhyme", ""},
}

--[=[
This should list the language codes of all languages that have a pronunciation
page in the appendix of the form ''Appendix:LANG pronunciation'', e.g.
]. For these languages, the text "key" next to
the generated pronunciation links to such pages; for other languages, it links
to the "LANG phonology" page in Wikipedia (which may or may not exist).
] is responsible for this linking; see format_IPA_full().
]=]
data.langs_with_infopages = list_to_set{
	"acw",
	"ady",
	"ang",
	"arc",
	"ba",
	"bg",
	"bo",
	"ca",
	"cho",
	"cmn",
	"cs",
	"cv",
	"cy",
	"da",
	"de",
	"dsb",
	"dz",
	"egl",
	"egy",
	"el",
	"en",
	"enm",
	"eo",
	"es",
	"fa",
	"fi",
	"fo",
	"fr",
	"fy",
	"ga",
	"gd",
	"got",
	"he",
	"hi",
	"hrx",
	"hu",
	"hy",
	"id",
	"ii",
	"is",
	"it",
	"iu",
	"ja",
	"jbo",
	"ka",
	"kls",
	"ko",
	"kw",
	"la",
	"lb",
	"liv",
	"lt",
	"lv",
	"mdf",
	"mfe",
	"mic",
	"mk",
	"mns-nor",
	"ms",
	"mt",
	"mul",
	"my",
	"nan",
	"nci",
	"nl",
	"nn",
	"no",
	"nov",
	"nv",
	"pjt",
	"pl",
	"ps",
	"pt",
	"ro",
	"ru",
	"scn",
	"sco",
	"sga",
	"sh",
	"sl",
	"sq",
	"sv",
	"sw",
	"syc",
	"szl",
	"tg",
	"th",
	"tl",
	"tpw",
	"tr",
	"tyv",
	"ug",
	"uk",
	"vi",
	"vo",
	"wlm",
	"yi",
	"yue",
	"zlw-mas"
}

--[=[
This should list the diphthongs of a language (in the form of Lua patterns),
provided they do *NOT* contain semivowel symbols such as /j w ɰ ɥ/ or vowels
with nonsyllabic diacritics such as /i̯ u̯/. For example, list /au/ or /aʊ/,
but do not list /aw/ or /au̯/. The data in this table is used to count the
number of syllables in a word. ] automatically knows how
to correctly handle semivowel symbols and nonsyllabic diacritics.

Any language listed here will automatically have categories of the form
"LANG #-syllable words" generated. In addition, any language listed below under
`langs_to_generate_syllable_count_categories` will also have such categories
generated.

NOTE: There are some additional languages that have these categories.
For example:
* Thai words have these categories added by ].
]=]
data.diphthongs = {
	 = { -- ]
		"u",
		},
	 = {
		"a",
		"ɔ",
		},
	 = { -- from ] mostly, but /ʌɪ/ is from the OED
		"",
		"e",
		"ʉ",
		"ʊ",
		"æo",
		"ə",	-- /iə/ is a diphthong in NZE, but a disyllabic sequence in GA.
						-- /ɪə/ is both a disyllabic sequence and a diphthong in old-fashioned RP.
		"ə",	-- May be a disyllabic sequence in some or all dialects?
		},
	 = {
		"i",
		"u",
		"ː",
		},
	 = {
		"aɪ̯",
		"aʊ̯",
		"oɪ̯",
		"eʊ̯",
	},
	 = {			-- ]
		"",	-- Wikipedia is oddly specific about the second element: ei and ai, but øɪ.
		"u",
		},
	 = {
		"i",
		"u",
		},
	 = {
		"i",
		"u",
		"e",
		},
	 = {
		"ə",
		"ɪ",
		"ʊ",
	},
}

--[=[
This should list any languages for which categories of the form
"LANG #-syllable words", e.g. ], should be
generated. Do not list languages here if they have an entry above under
`data.diphthongs`; such languages are automatically added to this list.
]=]
local langs_to_generate_syllable_count_categories = list_to_set{
	"ar",	-- Arabic has diphthongs, but they are transcribed
			-- with semivowel symbols.
	"ary",	-- Moroccan Arabic has diphthongs, but they are transcribed
			-- with semivowel symbols.
	"ca",	-- Catalan has diphthongs, but they are generally transcribed using
			-- /w/ and /j/, so do not need to be listed (see ].
	"es",	-- Spanish has diphthongs, but they are transcribed with i̯ etc.
	"fi",	-- Finnish has diphthongs, but they are now automatically transcribed with
			-- the nonsyllabic diacritic
	"fr",	-- French has diphthongs, but they are transcribed
			-- with semivowel symbols: ].
	"hnn",
	"id",	-- Indonesian has diphthongs, but they are transcribed with i̯ or /j/ etc.
	"ka",
	"kne",
	"kmr",
	"ku",
	"mk",
	"ms",	-- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.
	"mt",	-- Maltese has diphthongs, but they are transcribed
			-- with semivowel symbols.
	"pl",	-- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.
	"pt",	-- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.
	"ru",	-- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
	"sk",	-- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
	"sl",	-- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though
	"sq",	-- ] doesn't mention anything about diphthongs.
	"tl",	-- Tagalog has diphthongs, but they are transcribed with i̯ or /j/ etc
	"tsg",
	"ug",	-- No diphthongs.
}
-- Also add languages listed under `data.diphthongs`.
for langcode, _ in pairs(data.diphthongs) do
	langs_to_generate_syllable_count_categories = true
end
data.langs_to_generate_syllable_count_categories = langs_to_generate_syllable_count_categories

-- Languages to use the phonetic not phonemic notation to compute syllables counts.
data.langs_to_use_phonetic_notation = list_to_set{
	"es",
	"id",
	"mk",
	"ms",
	"ru",
}

-- Non-standard or obsolete IPA symbols.
data.nonstandard = {
	--[[	The following symbols consist of more than one character,
			so we can't put them in the line below.		]]
	"ɑ̢", "ɔ̗", "ɔ̖",
	""
}

-- See valid IPA characters at ].

data.phonemes = {}
data.phonemes = {
	"m", "n", "ŋ",
	"p", "t", "ʈ", "k",
	"pʰ", "tʰ", "ʈʰ", "kʰ",
	"t͡s", "t͡ɕ",
	"t͡sʰ", "t͡ɕʰ",
	"w", "s", "z", "ɬ", "l", "r", "ɕ", "ʑ", "j", "h",
	"ɑ", "e", "i", "o", "u",
	"ɑː", "eː", "ɛː", "iː", "oː", "øː", "uː", "yː",
	"ɑ˥", "e˥", "i˥", "o˥", "u˥",
	"ɑː˥", "eː˥", "ɛː˥", "iː˥", "oː˥", "øː˥", "uː˥", "yː˥",
	"m˥", "n˥", "ŋ˥", "p˥", "k˥", "k̚˥", "w˥", "l˥", "r˥", "ɕ˥", "j˥", ")˥",
	"ɑ˩", "e˩", "i˩", "o˩", "u˩",
	"ɑː˩", "eː˩", "ɛː˩", "iː˩", "oː˩", "øː˩", "uː˩", "yː˩",
	"m˩", "n˩", "ŋ˩", "p˩", "k˩", "k̚˩", "w˩", "l˩", "r˩", "ɕ˩", "j˩", ")˩",
	".", ",", "-",
}
data.phonemes = {
	"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",
	"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",
	"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
	"ˈ", ".", " ", "-",
}
data.phonemes = {
	"ɑ", "b", "ɡ", "d", "e", "z", "ə", "tʰ", "ʒ", "i", "l", "χ", "t͡s",
	"k", "h", "d͡z", "ʁ", "t͡ʃ", "m", "j", "n", "ʃ", "ɔ", "t͡ʃʰ", "p", "d͡ʒ",
	"r", "s", "v", "t", "ɾ", "t͡sʰ", "v", "pʰ", "kʰ", "o", "f", "ŋɡ", "ŋk",
	"ŋχ", "u", "œ", "ʏ", "ˈ", "ˌ", ".", " ", "ː", 
}
data.phonemes = {
	"m", "n", "ŋ",
	"p", "b", "t", "d", "k", "ɡ",
	"f", "v", "s", "z", "ʃ", "ʒ", "x", "ɣ", "ɦ",
	"ʋ", "l", "j", "r",
	"ɪ", "ʏ", "ɛ", "ə", "ɔ", "ɑ",
	"i", "iː", "y", "yː", "u", "uː", "eː", "øː", "oː", "ɛː", "œː", "ɔː", "aː",
	"ɛi̯", "œy̯", "ɔi̯", "ɑu̯", "ɑi̯",
	"iu̯", "yu̯", "ui̯", "eːu̯", "oːi̯", "aːi̯",
	"ˈ", "ˌ", ".", " ", "-",
}
data.phonemes = {
	"m", "n",
	"p", "t", "k", "ʔ",
	"b", "d", "ɡ",
	"t͡s", "t͡ʃ",
	"d͡z", "d͡ʒ",
	"f", "s", "ʃ", "ħ",
	"v", "z", "ʒ", "ɣ",
	"l", "j", "w",
	"r",
	"ɪ", "ɛ", "ɔ", "a", "u",
	"ɛˤ", "ɔˤ", "aˤ", "əˤ",
	"ɛˤː", "ɔˤː", "aˤː", "əˤː", "ɪˤː",
	"iː", "ɪː", "ɛː", "ɔː", "aː", "uː",
	"ˈ", "ˌ", ".", " ", "‿", "-"
}

return data