local export = {}
local m_shared = require("Module:User:Benwing2/place/shared-data")
local m_links = require("Module:links")
local m_strutils = require("Module:string utilities")
local function ucfirst(label)
return mw.getContentLanguage():ucfirst(label)
end
local function lc(label)
return mw.getContentLanguage():lc(label)
end
export.force_cat = false -- set to true for testing
------------------------------------------------------------------------------------------
-- Basic utilities --
------------------------------------------------------------------------------------------
function export.remove_links_and_html(text)
text = m_links.remove_links(text)
return text:gsub("<.->", "")
end
-- Return the singular version of a maybe-plural placetype, or nil if not plural.
function export.maybe_singularize(placetype)
if not placetype then
return nil
end
local retval = m_strutils.singularize(placetype)
if retval == placetype then
return nil
end
return retval
end
-- Check for special pseudo-placetypes that should be ignored for categorization purposes.
function export.placetype_is_ignorable(placetype)
return placetype == "and" or placetype == "or" or placetype:find("^%(")
end
function export.resolve_placetype_aliases(placetype)
return export.placetype_aliases or placetype
end
-- Look up and resolve any category aliases that need to be applied to a holonym. For example,
-- "country/Republic of China" maps to "Taiwan" for use in categories like "Counties in Taiwan".
-- This also removes any links.
function export.resolve_cat_aliases(holonym_placetype, holonym_placename)
local retval
local cat_aliases = export.get_equiv_placetype_prop(holonym_placetype, function(pt) return export.placename_cat_aliases end)
holonym_placename = export.remove_links_and_html(holonym_placename)
if cat_aliases then
retval = cat_aliases
end
return retval or holonym_placename
end
-- Given a placetype, split the placetype into one or more potential "splits", each consisting of
-- a three-element list {PREV_QUALIFIERS, THIS_QUALIFIER, BARE_PLACETYPE}, i.e.
-- (a) the concatenation of zero or more previously-recognized qualifiers on the left, normally
-- canonicalized (if there are zero such qualifiers, the value will be nil);
-- (b) a single recognized qualifier, normally canonicalized (if there is no qualifier, the value will be nil);
-- (c) the "bare placetype" on the right.
-- Splitting between the qualifier in (b) and the bare placetype in (c) happens at each space character, proceeding from
-- left to right, and stops if a qualifier isn't recognized. All placetypes are canonicalized by checking for aliases
-- in placetype_aliases, but no other checks are made as to whether the bare placetype is recognized. Canonicalization
-- of qualifiers does not happen if NO_CANON_QUALIFIERS is specified.
--
-- For example, given the placetype "small beachside unincorporated community", the return value will be
-- {
-- {nil, nil, "small beachside unincorporated community"},
-- {nil, "small", "beachside unincorporated community"},
-- {"small", "]", "unincorporated community"},
-- {"small ]", "]", "community"},
-- }
-- Here, "beachside" is canonicalized to "]" and "unincorporated" is canonicalized
-- to "]", in both cases according to the entry in placetype_qualifiers.
--
-- On the other hand, if given "small former haunted community", the return value will be
-- {
-- {nil, nil, "small former haunted community"},
-- {nil, "small", "former haunted community"},
-- {"small", "former", "haunted community"},
-- }
-- because "small" and "former" but not "haunted" are recognized as qualifiers.
--
-- Finally, if given "former adr", the return value will be
-- {
-- {nil, nil, "former adr"},
-- {nil, "former", "administrative region"},
-- }
-- because "adr" is a recognized placetype alias for "administrative region".
function export.split_qualifiers_from_placetype(placetype, no_canon_qualifiers)
local splits = {{nil, nil, export.resolve_placetype_aliases(placetype)}}
local prev_qualifier = nil
while true do
local qualifier, bare_placetype = placetype:match("^(.-) (.*)$")
if qualifier then
local canon = export.placetype_qualifiers
if not canon then
break
end
local new_qualifier = qualifier
if not no_canon_qualifiers and canon ~= true then
new_qualifier = canon
end
table.insert(splits, {prev_qualifier, new_qualifier, export.resolve_placetype_aliases(bare_placetype)})
prev_qualifier = prev_qualifier and prev_qualifier .. " " .. new_qualifier or new_qualifier
placetype = bare_placetype
else
break
end
end
return splits
end
-- Given a placetype (which may be pluralized), return an ordered list of equivalent placetypes to look under to find
-- the placetype's properties (such as the category or categories to be inserted). The return value is actually an
-- ordered list of objects of the form {qualifier=QUALIFIER, placetype=EQUIV_PLACETYPE} where EQUIV_PLACETYPE is a
-- placetype whose properties to look up, derived from the passed-in placetype or from a contiguous subsequence of the
-- words in the passed-in placetype (always including the rightmost word in the placetype, i.e. we successively chop
-- off qualifier words from the left and use the remainder to find equivalent placetypes). QUALIFIER is the remaining
-- words not part of the subsequence used to find EQUIV_PLACETYPE; or nil if all words in the passed-in placetype were
-- used to find EQUIV_PLACETYPE. (FIXME: This qualifier is not currently used anywhere.) The placetype passed in always
-- forms the first entry.
function export.get_placetype_equivs(placetype)
local equivs = {}
-- Look up the equivalent placetype for `placetype` in `placetype_equivs`. If `placetype` is plural, also look up
-- the equivalent for the singularized version. Return any equivalent placetype(s) found.
local function lookup_placetype_equiv(placetype)
local retval = {}
-- Check for a mapping in placetype_equivs; add if present.
if export.placetype_equivs then
table.insert(retval, export.placetype_equivs)
end
local sg_placetype = export.maybe_singularize(placetype)
-- Check for a mapping in placetype_equivs for the singularized equivalent.
if sg_placetype and export.placetype_equivs then
table.insert(retval, export.placetype_equivs)
end
return retval
end
-- Insert `placetype` into `equivs`, along with any equivalent placetype listed in `placetype_equivs`. `qualifier`
-- is the preceding qualifier to insert into `equivs` along with the placetype (see comment at top of function). We
-- also check to see if `placetype` is plural, and if so, insert the singularized version along with its equivalent
-- (if any) in `placetype_equivs`.
local function do_placetype(qualifier, placetype)
-- FIXME! The qualifier (first arg) is inserted into the table, but isn't
-- currently used anywhere.
local function insert(pt)
table.insert(equivs, {qualifier=qualifier, placetype=pt})
end
-- First do the placetype itself.
insert(placetype)
-- Then check for a singularized equivalent.
local sg_placetype = export.maybe_singularize(placetype)
if sg_placetype then
insert(sg_placetype)
end
-- Then check for a mapping in placetype_equivs, and a mapping for the singularized equivalent; add if present.
local placetype_equiv_list = lookup_placetype_equiv(placetype)
for _, placetype_equiv in ipairs(placetype_equiv_list) do
insert(placetype_equiv)
end
end
-- Successively split off recognized qualifiers and loop over successively greater sets of qualifiers from the left.
local splits = export.split_qualifiers_from_placetype(placetype)
for _, split in ipairs(splits) do
local prev_qualifier, this_qualifier, bare_placetype = unpack(split)
if this_qualifier then
-- First see if the rightmost split-off qualifier is in qualifier_equivs (e.g. 'former' -> 'historical').
-- If so, create a placetype from the qualifier mapping + the following bare_placetype; then, add
-- that placetype, and any mapping for the placetype in placetype_equivs.
local equiv_qualifier = export.qualifier_equivs
if equiv_qualifier then
do_placetype(prev_qualifier, equiv_qualifier .. " " .. bare_placetype)
end
-- Also see if the remaining placetype to the right of the rightmost split-off qualifier has a placetype
-- equiv, and if so, create placetypes from the qualifier + placetype equiv and qualifier equiv + placetype
-- equiv, inserting them along with any equivalents. This way, if we are given the placetype "former
-- alliance", and we have a mapping 'former' -> 'historical' in qualifier_equivs and a mapping 'alliance'
-- -> 'confederation' in placetype_equivs, we check for placetypes 'former confederation' and (most
-- importantly) 'historical confederation' and their equivalents (if any) in placetype_equivs. This allows
-- the user to specify placetypes using any combination of "former/ancient/historical/etc." and
-- "league/alliance/confederacy/confederation" and it will correctly map to the placetype 'historical
-- confederation' and in turn to the category ]. Similarly, any
-- combination of "former/ancient/historical/etc." and "protectorate/autonomous territory/dependent
-- territory" will correctly map to placetype 'historical dependent territory' and in turn to the category
-- ].
local bare_placetype_equiv_list = lookup_placetype_equiv(bare_placetype)
for _, bare_placetype_equiv in ipairs(bare_placetype_equiv_list) do
do_placetype(prev_qualifier, this_qualifier .. " " .. bare_placetype_equiv)
if equiv_qualifier then
do_placetype(prev_qualifier, equiv_qualifier .. " " .. bare_placetype_equiv)
end
end
-- Then see if the rightmost split-off qualifier is in qualifier_to_placetype_equivs
-- (e.g. 'fictional *' -> 'fictional location'). If so, add the mapping.
if export.qualifier_to_placetype_equivs then
table.insert(equivs, {qualifier=prev_qualifier, placetype=export.qualifier_to_placetype_equivs})
end
end
-- Finally, join the rightmost split-off qualifier to the previously split-off qualifiers to form a
-- combined qualifier, and add it along with bare_placetype and any mapping in placetype_equivs for
-- bare_placetype.
local qualifier = prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier
do_placetype(qualifier, bare_placetype)
end
return equivs
end
function export.get_equiv_placetype_prop(placetype, fun)
if not placetype then
return fun(nil), nil
end
local equivs = export.get_placetype_equivs(placetype)
for _, equiv in ipairs(equivs) do
local retval = fun(equiv.placetype)
if retval then
return retval, equiv
end
end
return nil, nil
end
-- Given a place desc (see top of file) and a holonym object (see top of file), add a key/value into the place desc's
-- `holonyms_by_placetype` field corresponding to the placetype and placename of the holonym. For example, corresponding
-- to the holonym "c/Italy", a key "country" with the list value {"Italy"} will be added to the place desc's
-- `holonyms_by_placetype` field. If there is already a key with that place type, the new placename will be added to the
-- end of the value's list.
function export.key_holonym_into_place_desc(place_desc, holonym)
if not holonym.placetype then
return
end
local equiv_placetypes = export.get_placetype_equivs(holonym.placetype)
local placename = holonym.placename
for _, equiv in ipairs(equiv_placetypes) do
local placetype = equiv.placetype
if not place_desc.holonyms_by_placetype then
place_desc.holonyms_by_placetype = {}
end
if not place_desc.holonyms_by_placetype then
place_desc.holonyms_by_placetype = {placename}
else
table.insert(place_desc.holonyms_by_placetype, placename)
end
end
end
------------------------------------------------------------------------------------------
-- Placename and placetype data --
------------------------------------------------------------------------------------------
-- This is a map from aliases to their canonical forms. Any placetypes appearing
-- as keys here will be mapped to their canonical forms in all respects, including
-- the display form. Contrast 'placetype_equivs', which apply to categorization and
-- other processes but not to display.
export.placetype_aliases = {
= "country",
= "constituent country",
= "province",
= "autonomous province",
= "region",
= "autonomous region",
= "administrative region",
= "special administrative region",
= "state",
= "archipelago",
= "borough",
= "canton",
= "council area",
= "community development block",
= "Crown dependency",
= "census-designated place",
= "census-designated place",
= "county",
= "county borough",
= "county-level city",
= "collectivity",
= "community",
= "autonomous community",
= "unincorporated community",
= "continent",
= "civil parish",
= "dependency",
= "department",
= "district",
= "district municipality",
= "division",
= "French prefecture",
= "governorate",
= "governorate",
= "Indian reservation",
= "island",
= "London borough",
= "local government area",
= "local government district",
= "metropolitan borough",
= "metropolitan city",
= "mountain",
= "municipality",
= "municipal district",
= "oblast",
= "autonomous oblast",
= "okrug",
= "autonomous okrug",
= "parish",
= "parish municipality",
= "peninsula",
= "prefecture",
= "prefecture-level city",
= "prefecture-level city",
= "autonomous prefecture",
= "republic",
= "autonomous republic",
= "river",
= "regional county municipality",
= "regional district",
= "regional municipality",
= "royal borough",
= "regional unit",
= "rural municipality",
= "territorial authority",
= "territory",
= "autonomous territory",
= "union territory",
= "tribal jurisdictional area",
= "township",
= "township municipality",
= "united township municipality",
= "valley",
= "voivodeship",
= "Welsh community",
= "mountain range",
= "department capital",
= "home rule city",
= "home rule municipality",
= "subprovincial city",
= "subprovincial city",
= "subprovincial district",
= "sub-prefectural city",
= "sub-prefectural city",
= "non-metropolitan county",
= "inner city area",
}
-- These qualifiers can be prepended onto any placetype and will be handled correctly.
-- For example, the placetype "large city" will be displayed as such but otherwise
-- treated exactly as if "city" were specified. Links will be added to the remainder
-- of the placetype as appropriate, e.g. "small voivodeship" will display as
-- "small ]" because "voivoideship" has an entry in placetype_links.
-- If the value is a string, the qualifier will display according to the string.
-- Note that these qualifiers do not override placetypes with entries elsewhere that
-- contain those same qualifiers. For example, the entry for "former colony" in
-- placetype_equivs will apply in preference to treating "former colony" as equivalent
-- to "colony". Also note that if an entry like "former colony" appears in either
-- placetype_equivs or cat_data, the non-qualifier portion won't automatically be
-- linked, so it needs to be specifically included in placetype_links if linking is
-- desired.
export.placetype_qualifiers = {
-- generic qualifiers
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true, -- left tributary
= true, -- right tributary
= true, -- for use in opposition to "ancient" in another definition
-- "former" qualifiers
= true,
= true,
= true,
= true,
= true,
= "historical",
= true,
= true,
= true,
= true,
-- sea qualifiers
= true,
= true,
= true,
= "]",
= "coastal",
= "]",
= "]",
= true,
-- political status qualifiers
= "]",
= "]",
= "]",
= "]",
-- monetary status/etc. qualifiers
= true,
= true,
= "]",
= "]",
-- city vs. rural qualifiers
= true,
= "]",
= true,
= true,
= true,
= true,
= true,
-- land use qualifiers
= "]",
= "]",
= true,
= "]",
= "]",
-- business use qualifiers
= "]",
= "]",
= "]",
= "]",
= "]",
= "]",
= "]",
-- religious qualifiers
= true,
= true,
= true,
= true,
-- qualifiers for nonexistent places
= true,
= true,
-- directional qualifiers
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
-- misc. qualifiers
= true,
= true,
= true,
}
-- If there's an entry here, the corresponding placetype will use the text of the
-- value, which should be used to add links. If the value is true, a simple link
-- will be added around the whole placetype. If the value is "w", a link to
-- Wikipedia will be added around the whole placetype.
export.placetype_links = {
= "w",
= "w",
= "w",
= "w",
= "w",
= "] ]",
= true,
= "w",
= "w",
= true,
= true,
= true,
= "] ]",
= true,
= "w",
= true,
= true,
= true,
= true,
= "w",
= "w",
= "w",
= "w",
= true,
= true, -- Philippines
= true, -- Spanish-speaking countries; Philippines
= true,
= "w",
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= "] and largest city",
= true,
= true,
= true, -- Australia
= true,
= true, -- United States
= true,
= true,
= true,
= "w", -- Northwest Territories, Canada
= true,
= true,
= "w",
= true,
= true,
= true,
= true,
= true,
= "w", -- India
= true, -- Italy, Switzerland
= true,
= true,
= true,
= "] region",
= true,
= "w", -- Taiwan
= "w", -- Taiwan
= "w", -- China
= true,
= true,
= true,
= true,
= true,
= true,
= "] ]",
= true,
= "w",
= "w",
= "w",
= "]",
= "w",
= true,
= true,
= "] ]",
= "] ]",
= "w",
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= "] ]",
= "w",
= "w",
= "w",
= "] ]", -- Canada
= true,
= "former ]",
= "former ]",
= "former ]",
= "former ]",
= "former ] ]",
= "w", -- Italy
= "]",
= "w",
= true, -- Nigeria
= true,
= true,
= true,
= "w", -- China (historical)
= "w", -- Poland (historical)
= true,
= true,
= "] ]",
= "] ]",
= "] ]",
= "] ]",
= true,
= "w",
= true,
= "w",
= "w",
= "w",
= "w",
= "w",
= true,
= "w", -- Ukraine
= true,
= "]",
= "w", -- United States
= "w", -- Canada
= "] ]",
= "] area",
= "w",
= "w",
= "w",
= "w",
= "w",
= true,
= true,
= true,
= true,
= "] ]",
= "w",
= "w",
= "w",
= "w",
= "] with ]",
= "w",
= "]",
= "w",
= true,
= true,
= "]",
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= true,
= "] town", -- England
= true,
= true,
= "]", -- Taiwan
= "]", -- Taiwan
= true,
= true,
= "] ]",
= "w",
= true,
= "] with ]",
= "w",
= true,
= true,
= "]",
= "w",
= "w",
= true,
= "w",
= "w",
= "w",
= true,
= "]",
= true,
= "]",
= true,
= true,
= "]",
= true,
= true,
= "w",
= true,
= "w",
= true,
= true,
= true,
= "]", -- China (type of economic development zone)
= true,
= true,
= "] ]",
= "w",
= "w",
= "w",
= "w",
= true,
= "] ]",
= "] area",
= "w",
= "w",
= "w",
= true,
= "w",
= "w", -- Hong Kong
= "w",
= "w",
= "]", -- Taiwan
= true,
= true,
= true,
= true, -- Isle of Man
= true, -- Australia
= true,
= "w",
= true,
= "] town",
= "]",
= "w",
= "w", -- China; North Korea; Indonesia; East Timor
= "w",
= true,
= true,
= true,
= "w",
= true,
= "w",
= "w",
= true,
= true,
= true,
= "] ]",
= true,
= true,
= "w",
= "w",
= "w",
= true,
= true,
= "w",
= true,
= true,
= "w",
= true,
= "]",
-- can't use templates in this code
= "] with ]",
= true,
= "w",
= "w",
= "w",
= true,
= "w",
= "w",
= true,
= "w",
= "]",
= "w",
= "w",
= "] area",
= "w",
= "w",
= "]",
= true, -- Poland
= true,
= true,
= true,
= "]",
}
-- In this table, the key qualifiers should be treated the same as the value qualifiers for
-- categorization purposes. This is overridden by cat_data, placetype_equivs and
-- qualifier_to_placetype_equivs.
export.qualifier_equivs = {
= "historical",
= "historical",
= "historical",
= "historical",
= "historical",
-- This needs to be here. If we take it out, 'historic province' won't properly
-- map to 'historical political subdivision'.
= "historical",
= "historical",
= "historical",
= "historical",
}
-- In this table, any placetypes containing these qualifiers that do not occur in placetype_equivs
-- or cat_data should be mapped to the specified placetypes for categorization purposes. Entries here
-- are overridden by cat_data and placetype_equivs.
export.qualifier_to_placetype_equivs = {
= "fictional location",
= "mythological location",
}
-- In this table, the key placetypes should be treated the same as the value placetypes for
-- categorization purposes. Entries here are overridden by cat_data.
-- NOTE: 'coal town', 'county town', 'ghost town', 'resort town', 'ski resort town',
-- 'spa town', etc. aren't mapped to 'town' because they aren't necessarily towns.
export.placetype_equivs = {
= "capital city",
= "administrative centre",
= "administrative centre",
= "administrative centre",
= "confederation",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "island",
= "province",
= "dependent territory",
= "polity",
= "neighborhood", -- not completely correct, barangays are formal administrative divisions of a city
= "neighborhood", -- not completely correct, in some countries barrios are formal administrative divisions of a city
= "polity",
= "area",
= "borough",
= "polity",
= "peninsula",
= "capital city",
= "capital city",
= "city", -- should be 'former city' if we distinguish that
= "city",
= "neighborhood",
= "county",
= "island",
= "village",
= "dependent territory",
= "historical political subdivision",
= "municipality",
= "village",
= "municipality",
= "confederation",
= "region",
= "county-administered city",
= "prefecture-level city",
= "dependent territory",
= "dependent territory",
= "capital city",
= "dependent territory",
= "ancient settlement",
= "ancient settlement",
= "municipality",
= "municipality",
= "capital city",
= "administrative centre",
= "volcano",
= "polity",
= "polity",
= "polity",
= "dependent territory",
= "territory",
= "Indian reserve",
= "village", -- should be "hamlet" but hamlet in turn redirects to village
= "region",
= "valley",
= "island",
= "village",
= "city",
= "city",
= "town",
= "town",
= "administrative centre",
= "moor",
= "town",
= "town",
-- We try to list all top-level polities and political subdivisions here and classify them
-- accordingly. (Note that the following entries also apply to anything preceded by "former",
-- "ancient", "historic", "medieval", etc., according to qualifier_equivs.) Anything we don't
-- list will be categorized as if the qualifier were absent, e.g. "ancient city" will be
-- categorized as a city and "former sea" as a sea.
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical polity",
= "historical political subdivision",
= "historical polity",
= "historical polity",
= "historical settlement",
= "historical polity",
= "historical polity",
= "historical political subdivision",
= "historical political subdivision",
= "historical polity",
= "historical political subdivision",
= "historical polity",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical polity",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical polity",
= "historical polity",
= "historical polity",
= "historical political subdivision",
= "historical settlement",
= "historical polity",
= "historical polity",
= "historical political subdivision",
= "historical polity",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical polity",
= "historical polity",
= "historical polity",
-- The following could refer either to a state of a country (a subdivision)
-- or a state = sovereign entity. The latter appears more common (e.g. in
-- various "ancient states" of East Asia).
= "historical polity",
= "historical political subdivision",
= "historical political subdivision",
= "historical political subdivision",
= "historical settlement",
= "historical settlement",
= "historical political subdivision",
= "city",
= "municipality",
= "volcano",
= "city",
= "town",
= "neighborhood",
= "country",
= "municipality",
= "island",
= "capital city",
= "polity",
= "polity",
= "confederation",
= "capital city",
= "local government district",
= "local government district",
= "unincorporated community",
= "village", -- not necessarily true, but usually is the case
= "region",
= "city",
= "town",
= "ancient capital",
= "ancient capital",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "ancient settlement",
= "city",
= "county",
= "neighborhood",
= "country",
= "town",
= "moor",
= "district",
= "township",
= "mountain",
= "region",
= "municipality",
= "capital city",
= "park",
= "neighborhood",
= "town",
= "county",
= "local government district",
= "collectivity",
= "department",
= "dependent territory",
= "mountain pass",
= "village", -- not necessarily true, but usually is the case
= "city",
= "town",
= "dependent territory",
= "capital city",
= "capital city",
= "municipality",
= "city",
= "borough",
= "capital city",
= "administrative centre",
= "village", -- not necessarily true, but usually is the case
= "district",
= "county",
= "county",
= "county seat",
= "city",
= "peninsula",
= "capital city",
= "park",
= "city",
= "town",
= "river",
= "ghost town",
= "subprovincial city",
= "region",
= "suburb",
= "metro station",
= "continent",
= "district",
= "town",
= "county",
= "city", -- should be 'former city' if we distinguish that
= "metro station",
= "territory",
= "unrecognized country",
= "neighborhood",
= "township",
= "town",
= "neighborhood", -- not completely correct, wards are formal administrative divisions of a city
}
-- These contain transformations applied to certain placenames to convert them
-- into displayed form. For example, if any of "country/US", "country/USA" or
-- "country/United States of America" (or "c/US", etc.) are given, the result
-- will be displayed as "United States".
export.placename_display_aliases = {
= {
= "Valencia",
},
= {
= "New York City",
= "Washington, D.C.",
= "Washington, D.C.",
= "Washington, D.C.",
},
= {
= "Armenia",
= "Bosnia and Herzegovina",
= "Czech Republic",
= "Eswatini",
= "Ireland",
= "Ivory Coast",
= "North Macedonia",
= "North Macedonia",
= "North Macedonia",
= "United Arab Emirates",
= "United Kingdom",
= "United States",
= "United States",
= "United States",
= "United States",
= "United States",
= "Vatican City",
},
= {
= "North Ostrobothnia",
= "South Ostrobothnia",
= "Northern Savonia",
= "Southern Savonia",
= "Päijänne Tavastia",
= "Tavastia Proper",
= "Åland Islands",
},
= {
= "Kabardino-Balkar Republic",
= "Tuva Republic",
},
= {
= "Mecklenburg-Vorpommern",
= "State of Mexico",
},
= {
= "United States Virgin Islands",
= "United States Virgin Islands",
},
}
-- These contain transformations applied to the displayed form of certain
-- placenames to convert them into the form they will appear in categories.
-- For example, either of "country/Myanmar" and "country/Burma" will be
-- categorized into categories with "Burma" in them (but the displayed form
-- will respect the form as input). (NOTE, the choice of names here should not
-- be taken to imply any political position; it is just this way because it has
-- always been this way.)
export.placename_cat_aliases = {
= {
= "Nenets Autonomous Okrug",
= "Khanty-Mansi Autonomous Okrug",
= "Khanty-Mansi Autonomous Okrug",
},
= {
= "City of Glasgow",
= "City of Edinburgh",
= "City of Aberdeen",
= "City of Dundee",
= "Na h-Eileanan Siar",
},
= {
-- will categorize into e.g. "Cities in Burma".
= "Burma",
= "Artsakh",
= "China",
= "Taiwan",
= "Bosnia and Herzegovina",
= "Democratic Republic of the Congo",
= "Republic of the Congo",
},
= {
= "Isle of Anglesey",
},
= {
= "North Brabant",
= "North Holland",
= "South Holland",
= "Fujian",
},
= {
-- Only needs to include cases that aren't just shortened versions of the
-- full federal subject name (i.e. where words like "Republic" and "Oblast"
-- are omitted but the name is not otherwise modified). Note that a couple
-- of minor variants are recognized as display aliases, meaning that they
-- will be canonicalized for display as well as categorization.
= "Republic of Bashkortostan",
= "Chechen Republic",
= "Chuvash Republic",
= "Kabardino-Balkar Republic",
= "Kabardino-Balkar Republic",
= "Karachay-Cherkess Republic",
= "Republic of North Ossetia-Alania",
= "Republic of North Ossetia-Alania",
= "Sakha Republic",
= "Sakha Republic",
= "Sakha Republic",
= "Tuva Republic",
= "Udmurt Republic",
},
}
-- This contains placenames that should be preceded by an article (almost always "the").
-- NOTE: There are multiple ways that placenames can come to be preceded by "the":
-- 1. Listed here.
-- 2. Given in ] with an initial "the". All such placenames
-- are added to this map by the code just below the map.
-- 3. The placetype of the placename has holonym_article = "the" in its cat_data.
-- 4. A regex in placename_the_re matches the placename.
-- Note that "the" is added only before the first holonym in a place description.
export.placename_article = {
-- This should only contain info that can't be inferred from ].
= {
= "the",
= "the",
},
= {
= "the",
},
= {
= "the",
},
= {
= "the",
= "the",
},
= {
= "the",
= "the",
= "the",
= "the",
= "the",
= "the",
= "the",
= "the",
= "the",
},
= {
= "the",
},
}
-- Regular expressions to apply to determine whether we need to put 'the' before
-- a holonym. The key "*" applies to all holonyms, otherwise only the regexes
-- for the holonym's placetype apply.
export.placename_the_re = {
-- We don't need entries for peninsulas, seas, oceans, gulfs or rivers
-- because they have holonym_article = "the".
= {"^Isle of ", " Islands$", " Mountains$", " Empire$", " Country$", " Region$", " District$", "^City of "},
= {"^Bay of "},
= {"^Lake of "},
= {"^Republic of ", " Republic$"},
= {"^Republic of ", " Republic$"},
= {" egion$"},
= {" River$"},
= {"^Shire of "},
= {"^Shire of "},
= {" Reservation", " Nation"},
= {" Reservation", " Nation"},
}
-- Now extract from the shared place data all the other places that need "the"
-- prefixed.
for _, group in ipairs(m_shared.polities) do
for key, value in pairs(group.data) do
key = key:gsub(", .*$", "") -- Chop off ", England" and such from the end
local base = key:match("^the (.*)$")
if base then
local divtype = value.divtype or group.default_divtype
if not divtype then
error("Group in ] is missing a default_divtype key")
end
if type(divtype) ~= "table" then
divtype = {divtype}
end
for _, dt in ipairs(divtype) do
if not export.placename_article then
export.placename_article = {}
end
export.placename_article = "the"
end
end
end
end
-- If any of the following holonyms are present, the associated holonyms are automatically added
-- to the end of the list of holonyms for display and categorization purposes.
-- FIXME: There are none here currently and the mechanism is broken in that it doesn't properly
-- check for the presence of the holonym already. Don't add any without fixing this, or we'll
-- get redundantly-displayed holonyms in the common case where e.g. "Alabama, USA" is specified.
-- See below under cat_implications.
-- FIXME: Consider implementing a handler to automatically add implications for all political
-- subdivisions listed in the groups in ], with the containing polity
-- as the implicand. That way, if someone writes e.g. {{place|en|village|s/Thuringia}}, it will
-- automatically display as if written {{place|en|village|s/Thuringia|c/Germany}}.
export.general_implications = {
}
-- If any of the following holonyms are present, the associated holonyms are automatically added
-- to the end of the list of holonyms for categorization (but not display) purposes.
-- FIXME: We should implement an implication handler to add cat_implications for all political
-- subdivisions listed in the groups in ], with the containing polity
-- as the implicand. (This should be a handler not a preprocessing step to save memory.) Before
-- doing that, we should fix the implication mechanism to not add a holonym if the holonym
-- already exists or a conflicting holonym exists, where "conflicting" means a different holonym
-- of the same placetype as the holonym being added. Hence, if e.g. two countries have a province of
-- the same name, and we have an entry for one of the provinces, we won't add that province's country
-- if the other country is already specified.
export.cat_implications = {
= {
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Europe"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Asia"},
= {"continent/Africa"},
= {"continent/Africa"},
= {"continent/Africa"},
= {"continent/Africa"},
= {"continent/Africa"},
= {"continent/Central America"},
= {"continent/North America"},
= {"continent/Oceania"},
= {"continent/Oceania"},
= {"continent/Oceania"},
= {"country/Russia", "continent/Asia"},
= {"country/Russia", "continent/Asia"},
= {"constituent country/Wales", "continent/Europe"},
= {"continent/Europe"},
= {"country/Palestine", "continent/Asia"},
}
}
local function call_place_cat_handler(group, placetypes, placename)
local handler = group.place_cat_handler or m_shared.default_place_cat_handler
return handler(group, placetypes, placename)
end
------------------------------------------------------------------------------------------
-- Category and display handlers --
------------------------------------------------------------------------------------------
local function city_type_cat_handler(placetype, holonym_placetype, holonym_placename, allow_if_holonym_is_city,
no_containing_polity, extracats)
local plural_placetype = m_strutils.pluralize(placetype)
if m_shared.generic_place_types then
for _, group in ipairs(m_shared.polities) do
-- Find the appropriate key format for the holonym (e.g. "pref/Osaka" -> "Osaka Prefecture").
local key, _ = call_place_cat_handler(group, holonym_placetype, holonym_placename)
if key then
local value = group.data
if value then
-- Use the group's value_transformer to ensure that 'is_city', 'containing_polity'
-- and 'british_spelling' keys are present if they should be.
value = group.value_transformer(group, key, value)
if not value.is_former_place and (not value.is_city or allow_if_holonym_is_city) then
-- Categorize both in key, and in the larger polity that the key is part of,
-- e.g. ] goes in both "Cities in Osaka Prefecture" and
-- "Cities in Japan". (But don't do the latter if no_containing_polity_cat is set.)
if plural_placetype == "neighborhoods" and value.british_spelling then
plural_placetype = "neighbourhoods"
end
local retcats = {ucfirst(plural_placetype) .. " in " .. key}
if value.containing_polity and not value.no_containing_polity_cat and not no_containing_polity then
table.insert(retcats, ucfirst(plural_placetype) .. " in " .. value.containing_polity)
end
if extracats then
for _, cat in ipairs(extracats) do
table.insert(retcats, cat)
end
end
return {
= retcats
}
end
end
end
end
end
end
local function capital_city_cat_handler(holonym_placetype, holonym_placename, place_desc, non_city)
-- The first time we're called we want to return something; otherwise we will be called
-- for later-mentioned holonyms, which can result in wrongly classifying into e.g.
-- 'National capitals'.
if holonym_placetype then
-- Simulate the loop in find_cat_specs() over holonyms so we get the proper
-- 'Cities in ...' categories as well as the capital category/categories we add below.
local inner_data
if not non_city and place_desc.holonyms then
for _, holonym in ipairs(place_desc.holonyms) do
local h_placetype, h_placename = holonym.placetype, holonym.placename
h_placename = export.resolve_cat_aliases(h_placetype, h_placename)
inner_data = export.get_equiv_placetype_prop(h_placetype,
function(pt) return city_type_cat_handler("city", pt, h_placename) end)
if inner_data then
break
end
end
end
if not inner_data then
inner_data = {
= {}
}
end
-- Now find the appropriate capital-type category for the placetype of the holonym,
-- e.g. 'State capitals'. If we recognize the holonym among the known holonyms in
-- ], also add a category like 'State capitals of the United States'.
-- Truncate e.g. 'autonomous region' to 'region', 'union territory' to 'territory' when looking
-- up the type of capital category, if we can't find an entry for the holonym placetype itself
-- (there's an entry for 'autonomous community').
local capital_cat = m_shared.placetype_to_capital_cat
if not capital_cat then
capital_cat = m_shared.placetype_to_capital_cat
end
if capital_cat then
capital_cat = ucfirst(capital_cat)
local inserted_specific_variant_cat = false
for _, group in ipairs(m_shared.polities) do
-- Find the appropriate key format for the holonym (e.g. "pref/Osaka" -> "Osaka Prefecture").
local key, _ = call_place_cat_handler(group, holonym_placetype, holonym_placename)
if key then
local value = group.data
if value then
-- Use the group's value_transformer to ensure that 'containing_polity'
-- is present if it should be.
value = group.value_transformer(group, key, value)
if value.containing_polity and not value.no_containing_polity_cat then
table.insert(inner_data, capital_cat .. " of " .. value.containing_polity)
inserted_specific_variant_cat = true
break
end
end
end
end
if not inserted_specific_variant_cat then
table.insert(inner_data, capital_cat)
end
else
-- We didn't recognize the holonym placetype; just put in 'Capital cities'.
table.insert(inner_data, "Capital cities")
end
return inner_data
end
end
-- This is used to add pages to base holonym categories like 'en:Places in Merseyside, England'
-- (and 'en:Places in England') for any pages that have 'co/Merseyside' as their holonym.
-- It also handles cities (e.g. 'en:Places in Boston', along with 'en:Places in Massachusetts, USA'
-- and 'en:Places in the United States') for any pages that have 'city/Boston' as their holonym.
local function generic_cat_handler(holonym_placetype, holonym_placename, place_desc)
for _, group in ipairs(m_shared.polities) do
-- Find the appropriate key format for the holonym (e.g. "pref/Osaka" -> "Osaka Prefecture").
local key, _ = call_place_cat_handler(group, holonym_placetype, holonym_placename)
if key then
local value = group.data
if value then
-- Use the group's value_transformer to ensure that 'containing_polity' and 'no_containing_polity_cat'
-- keys are present if they should be.
value = group.value_transformer(group, key, value)
-- Categorize both in key, and in the larger polity that the key is part of,
-- e.g. ] goes in both "Places in Osaka Prefecture" and "Places in Japan".
local retcats = {"Places in " .. key}
if value.containing_polity and not value.no_containing_polity_cat then
table.insert(retcats, "Places in " .. value.containing_polity)
end
return {
= retcats
}
end
end
end
-- Check for cities mentioned as holonyms.
if holonym_placetype == "city" then
for _, city_group in ipairs(m_shared.cities) do
local value = city_group.data
if value and value.alias_of then
local new_value = city_group.data
if not new_value then
error("City '" .. holonym_placename .. "' has an entry with non-existent alias_of='" .. value.alias_of .. "'")
end
holonym_placename = value.alias_of
value = new_value
end
if value then
-- Check if any of the city's containing polities are explicitly mentioned. If not, make sure
-- that no other polities of the same sort are mentioned.
local containing_polities = m_shared.get_city_containing_polities(city_group, holonym_placename, value)
local containing_polities_match = false
local containing_polities_mismatch = false
for _, polity in ipairs(containing_polities) do
local bare_polity, linked_polity = m_shared.construct_bare_and_linked_version(polity)
local divtype = polity.divtype or city_group.default_divtype
local function holonym_matches_polity(placetype)
if not place_desc.holonyms_by_placetype then
return false
end
for _, holonym in ipairs(place_desc.holonyms_by_placetype) do
if holonym == bare_polity then
return true
end
end
return false
end
containing_polities_match = export.get_equiv_placetype_prop(divtype, holonym_matches_polity)
if containing_polities_match then
break
end
containing_polities_mismatch = export.get_equiv_placetype_prop(divtype,
function(pt) return not not place_desc.holonyms_by_placetype end)
if containing_polities_mismatch then
break
end
end
-- No mismatching containing polities, so add categories for the city and
-- its containing polities.
if not containing_polities_mismatch then
local retcats = {"Places in " .. holonym_placename}
for _, polity in ipairs(containing_polities) do
local divtype = polity.divtype or city_group.default_divtype
local drop_dead_now = false
-- Find the group and key corresponding to the polity.
for _, polity_group in ipairs(m_shared.polities) do
local key = polity
if polity_group.placename_to_key then
key = polity_group.placename_to_key(key)
end
local value = polity_group.data
if value then
value = polity_group.value_transformer(polity_group, key, value)
local key_divtype = value.divtype or polity_group.default_divtype
if key_divtype == divtype or type(key_divtype) == "table" and key_divtype == divtype then
table.insert(retcats, "Places in " .. key)
if value.no_containing_polity_cat then
-- Stop adding containing polities if no_containing_polity_cat
-- is found. (Used for 'United Kingdom'.)
drop_dead_now = true
end
break
end
end
end
if drop_dead_now then
break
end
end
return {
= retcats
}
end
end
end
end
end
-- This is used to add pages to "bare" categories like 'en:Georgia, USA' for ] and any foreign-language terms
-- that are translations of the state of Georgia. We look at the page title (or its overridden value in pagename=),
-- as well as the glosses in t=/t2= etc. and the modern names in modern=. We need to pay attention to the entry
-- placetypes specified so we don't overcategorize; e.g. the US state of Georgia is ] in Russian but the
-- country of Georgia is ], and if we just looked for matching names, we'd get both Russian terms categorized
-- into both 'ru:Georgia, USA' and 'ru:Georgia'.
function export.get_bare_categories(args, place_descs)
local bare_cats = {}
local possible_placetypes = {}
for _, place_desc in ipairs(place_descs) do
for _, placetype in ipairs(place_desc.placetypes) do
if not export.placetype_is_ignorable(placetype) then
local equivs = export.get_placetype_equivs(placetype)
for _, equiv in ipairs(equivs) do
table.insert(possible_placetypes, equiv.placetype)
end
end
end
end
local city_in_placetypes = false
for _, placetype in ipairs(possible_placetypes) do
-- Check to see whether any variant of 'city' is in placetypes, e.g. 'capital city', 'subprovincial city',
-- 'metropolitan city', 'prefecture-level city', etc.
if placetype == "city" or placetype:find(" city$") then
city_in_placetypes = true
break
end
end
local function check_term(term)
-- Treat Wikipedia links like local ones.
term = term:gsub("%[%[w:", "[["):gsub("%[%[wikipedia:", "[[")
term = export.remove_links_and_html(term)
term = term:gsub("^the ", "")
for _, group in ipairs(m_shared.polities) do
-- Try to find the term among the known polities.
local cat, bare_cat = call_place_cat_handler(group, possible_placetypes, term)
if bare_cat then
table.insert(bare_cats, bare_cat)
end
end
if city_in_placetypes then
for _, city_group in ipairs(m_shared.cities) do
local value = city_group.data
if value then
table.insert(bare_cats, value.alias_of or term)
-- No point in looking further as we don't (currently) have categories for two distinct cities with
-- the same name.
break
end
end
end
end
-- FIXME: Should we only do the following if the language is English (requires that the lang is passed in)?
check_term(args.pagename or mw.title.getCurrentTitle().subpageText)
for _, t in ipairs(args.t) do
check_term(t)
end
for _, modern in ipairs(args.modern) do
check_term(modern)
end
return bare_cats
end
-- This is used to augment the holonyms associated with a place description with the containing polities. For example,
-- given the following:
-- # The {{w|City of Penrith}}, {{place|en|a=a|lgarea|in|s/New South Wales}}.
-- We auto-add Australia as another holonym so that the term gets categorized into
-- ].
-- To avoid over-categorizing we need to check to make sure no other countries are specified as holonyms.
function export.augment_holonyms_with_containing_polity(place_descs)
for _, place_desc in ipairs(place_descs) do
if place_desc.holonyms then
local new_holonyms = {}
for _, holonym in ipairs(place_desc.holonyms) do
if holonym.placetype and not export.placetype_is_ignorable(holonym.placetype) then
local possible_placetypes = {}
local equivs = export.get_placetype_equivs(holonym.placetype)
for _, equiv in ipairs(equivs) do
table.insert(possible_placetypes, equiv.placetype)
end
for _, group in ipairs(m_shared.polities) do
-- Try to find the term among the known polities.
local key, _ = call_place_cat_handler(group, possible_placetypes, holonym.placename)
if key then
local value = group.data
if value then
value = group.value_transformer(group, key, value)
if not value.no_containing_polity_cat and value.containing_polity and
value.containing_polity_type then
local existing_polities_of_type
local containing_type = value.containing_polity_type
local function get_existing_polities_of_type(placetype)
return export.get_equiv_placetype_prop(placetype,
function(pt) return place_desc.holonyms_by_placetype end
)
end
-- Usually there's a single containing type but write as if more than one can be
-- specified (e.g. {"administrative region", "region"}).
if type(containing_type) == "string" then
existing_polities_of_type = get_existing_polities_of_type(containing_type)
else
for _, containing_pt in ipairs(containing_type) do
existing_polities_of_type = get_existing_polities_of_type(containing_pt)
if existing_polities_of_type then
break
end
end
end
if existing_polities_of_type then
-- Don't augment. Either the containing polity is already specified as a holonym,
-- or some other polity is, which we consider a conflict.
else
if type(containing_type) == "table" then
-- If the containing type is a list, use the first element as the canonical
-- variant.
containing_type = containing_type
end
-- Don't side-effect holonyms while processing them.
table.insert(new_holonyms, {placetype = containing_type,
placename = value.containing_polity, no_display = true})
end
end
end
end
end
end
end
for _, new_holonym in ipairs(new_holonyms) do
table.insert(place_desc.holonyms, new_holonym)
export.key_holonym_into_place_desc(place_desc, new_holonym)
end
end
end
-- FIXME, consider doing cities as well.
end
-- Inner data returned by cat handler for districts, neighborhoods, etc.
local function district_inner_data(value, itself_dest)
local retval = {
= value,
= value,
= value,
= value,
= value,
= value,
= value,
= value,
}
if itself_dest then
retval = itself_dest
end
return retval
end
-- Cat handler for districts and areas. Districts are tricky because they can
-- either be political subdivisions or city neighborhoods. We handle this as follows:
-- (1) For countries etc. where they can be political subdivisions, an entry under
-- "district" will be inserted for the country with something similar to the following:
--
-- {
-- = {"Districts of Foo"},
-- = {"Neighborhoods in Foo"},
-- = {"Neighborhoods in Foo"},
-- = {"Neighborhoods in Foo"},
-- ...
-- }
--
-- This way, a district in a city will categorize under "Neighborhoods in Foo"
-- while some other district will categorize under "Districts of Foo".
-- (2) For the remaining countries, we have a cat_handler that returns the following
-- for all known countries and primary subdivisions:
--
-- {
-- = {"Neighborhoods in Foo"},
-- = {"Neighborhoods in Foo"},
-- = {"Neighborhoods in Foo"},
-- ...
-- }
--
-- This way, a district under a city will still categorize under "Neighborhoods in Foo"
-- while other districts won't categorize.
local function district_cat_handler(placetype, holonym_placetype, holonym_placename)
for _, group in ipairs(m_shared.polities) do
-- Find the appropriate key format for the holonym (e.g. "pref/Osaka" -> "Osaka Prefecture").
local key, _ = call_place_cat_handler(group, holonym_placetype, holonym_placename)
if key then
local value = group.data
if value then
value = group.value_transformer(group, key, value)
if value.british_spelling then
return district_inner_data({"Neighbourhoods in " .. key})
else
return district_inner_data({"Neighborhoods in " .. key})
end
end
end
end
end
local function chinese_subcity_cat_handler(holonym_placetype, holonym_placename, place_desc)
local spec = m_shared.chinese_provinces_and_autonomous_regions
if spec and holonym_placetype == (spec.divtype or "province") then
return {
= {"Cities in " .. holonym_placename}
}
end
end
function export.check_already_seen_string(holonym_placename, already_seen_strings)
local canon_placename = lc(m_links.remove_links(holonym_placename))
if type(already_seen_strings) ~= "table" then
already_seen_strings = {already_seen_strings}
end
for _, already_seen_string in ipairs(already_seen_strings) do
if canon_placename:find(already_seen_string) then
return true
end
end
return false
end
-- Prefix display handler that adds a prefix such as "Metropolitan Borough of " to the display
-- form of holonyms. We make sure the holonym doesn't contain the prefix or some variant already.
-- We do this by checking if any of the strings in ALREADY_SEEN_STRINGS, either a single string or
-- a list of strings, or the prefix if ALREADY_SEEN_STRINGS is omitted, are found in the holonym
-- placename, ignoring case and links. If the prefix isn't already present, we create a link that
-- uses the raw form as the link destination but the prefixed form as the display form, unless the
-- holonym already has a link in it, in which case we just add the prefix.
local function prefix_display_handler(prefix, holonym_placename, already_seen_strings)
if export.check_already_seen_string(holonym_placename, already_seen_strings or lc(prefix)) then
return holonym_placename
end
if holonym_placename:find("%[%[") then
return prefix .. " " .. holonym_placename
end
return prefix .. " ]"
end
-- Suffix display handler that adds a suffix such as " parish" to the display form of holonyms.
-- Works identically to prefix_display_handler but for suffixes instead of prefixes.
local function suffix_display_handler(suffix, holonym_placename, already_seen_strings)
if export.check_already_seen_string(holonym_placename, already_seen_strings or lc(suffix)) then
return holonym_placename
end
if holonym_placename:find("%[%[") then
return holonym_placename .. " " .. suffix
end
return "] " .. suffix
end
-- Display handler for counties. Irish counties are displayed as e.g. "County ]".
-- Others are displayed as-is.
local function county_display_handler(holonym_placetype, holonym_placename)
local unlinked_placename = m_links.remove_links(holonym_placename)
if m_shared.irish_counties or
m_shared.northern_irish_counties then
return prefix_display_handler("County", holonym_placename)
end
return holonym_placename
end
-- Display handler for boroughs. New York City boroughs are display as-is. Others are suffixed
-- with "borough".
local function borough_display_handler(holonym_placetype, holonym_placename)
local unlinked_placename = m_links.remove_links(holonym_placename)
if m_shared.new_york_boroughs then
-- Hack: don't display "borough" after the names of NYC boroughs
return holonym_placename
end
return suffix_display_handler("borough", holonym_placename)
end
-- Display handler for prefectures. Japanese prefectures are displayed as e.g. "] Prefecture".
-- Others are displayed as e.g. "] prefecture".
local function prefecture_display_handler(holonym_placetype, holonym_placename)
local unlinked_placename = m_links.remove_links(holonym_placename)
local suffix = m_shared.japanese_prefectures and "Prefecture" or "prefecture"
return suffix_display_handler(suffix, holonym_placename)
end
------------------------------------------------------------------------------------------
-- Categorization data --
------------------------------------------------------------------------------------------
export.cat_data = {
= {
preposition = "of",
= {
= {true},
},
},
= {
article = "the",
preposition = "of",
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
article = "the",
preposition = "of",
= {
= {"Ancient settlements", "Historical capitals"},
},
},
= {
= {
= {"Ancient settlements"},
},
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return district_cat_handler("area", holonym_placetype, holonym_placename)
end,
},
= {
preposition = "of",
},
= {
= {
= {true},
},
},
= {
preposition = "of",
fallback = "city",
},
= {
preposition = "of",
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "Suf",
no_affix_strings = "oblast",
},
= {
preposition = "of",
affix_type = "Suf",
no_affix_strings = "okrug",
},
= {
preposition = "of",
= {
= {"Districts and autonomous regions of +++"},
},
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
display_handler = borough_display_handler,
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
if holonym_placetype == "county" then
local cat_form = holonym_placename .. ", England"
if not m_shared.english_counties then
cat_form = "the " .. cat_form
if not m_shared.english_counties then
cat_form = nil
end
end
if cat_form then
return {
= {"Districts of " .. cat_form, "Districts of England"}
}
end
end
if (holonym_placetype == "country" or holonym_placetype == "constituent country") and
holonym_placename == "England" then
return {
= {"Districts of +++"},
}
end
end,
= {
= {"Boroughs of +++, USA"},
},
= {
= {"Boroughs in +++"},
},
= {
= {"Boroughs in +++, USA"},
},
= {
= {"Boroughs in +++, USA"},
},
},
= {
article = "the",
preposition = "of",
= {
= {"Borough seats of +++, USA"},
},
},
= {
preposition = "of",
fallback = "river",
},
= {
preposition = "of",
affix_type = "suf",
= {
= {true},
},
},
= {
article = "the",
preposition = "of",
cat_handler = capital_city_cat_handler,
= {
= {true},
},
},
= {
affix_type = "Suf",
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
if holonym_placetype == "state" then
return city_type_cat_handler("census-designated place", holonym_placetype, holonym_placename)
end
end,
= {
= {true},
},
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("city", holonym_placetype, holonym_placename)
end,
= {
= {true},
= {true},
},
},
= {
= {
= {"City-states", "Cities", "Countries", "Countries in +++", "National capitals"},
= {"City-states", "Cities", "Countries", "National capitals"},
},
},
= {
preposition = "of",
affix_type = "suf",
= {
= {"Civil parishes of +++"},
},
},
= {
preposition = "of",
= {
= {"Polities"},
= {true},
},
},
= {
preposition = "of",
},
= {
preposition = "of",
= {
= {"Communes of +++, Chile", "Communes of Chile"},
= {true},
},
},
= {
affix_type = "suf",
no_affix_strings = "block",
},
= {
preposition = "of",
fallback = "country",
},
= {
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "suf",
= {
= {true},
= {true},
},
},
= {
= {
= {true, "Countries"},
= {true},
},
},
= {
preposition = "of",
-- UNITED STATES
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
local spec = m_shared.us_states
if spec and holonym_placetype == "state" and not spec.county_type then
return {
= {"Counties of " .. holonym_placename .. ", USA"}
}
end
end,
display_handler = county_display_handler,
= {
},
= {
= {"Traditional counties of +++"},
},
= {
= {"Traditional counties of +++"},
},
= {
= {"Polities"},
= {true},
},
},
= {
= {
= {"Cities in +++"},
},
},
= {
preposition = "of",
affix_type = "suf",
fallback = "borough",
},
= {
article = "the",
preposition = "of",
-- UNITED STATES
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
local spec = m_shared.us_states
if spec and holonym_placetype == "state" and not spec.county_type then
return {
= {"County seats of " .. holonym_placename .. ", USA"}
}
end
end,
},
= {
article = "the",
preposition = "of",
fallback = "town",
},
= {
preposition = "of",
affix_type = "suf",
holonym_article = "the",
= {
= {true},
},
},
= {
preposition = "of",
= {
= {"Dependent territories"},
= {"Dependent territories of +++"},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
fallback = "river",
},
= {
preposition = "of",
affix_type = "suf",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return district_cat_handler("district", holonym_placetype, holonym_placename)
end,
= {
= {"Districts and autonomous regions of +++"},
},
-- No default. Countries for which districts are political subdivisions will get entries.
},
= {
preposition = "of",
affix_type = "suf",
no_affix_strings = {"district", "municipality"},
fallback = "municipality",
},
= {
preposition = "of",
= {
= {true},
},
},
= {
preposition = "of",
},
= {
preposition = "of",
},
= {
preposition = "of",
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
article = "the",
preposition = "of",
= {
= {"Prefectures of +++", "Departmental capitals"},
},
},
= {
-- Nigeria
preposition = "of",
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
local function check_for_recognized(divlist, default_divtype, placename_to_key)
local key = placename_to_key and placename_to_key(holonym_placename) or holonym_placename
local spec = divlist
if not spec then
key = "the " .. key
spec = divlist
end
if spec and holonym_placetype == (spec.divtype or default_divtype) then
return {
= {"Ghost towns in " .. key}
}
end
end
return (
check_for_recognized(m_shared.us_states, "state", function(placename) return placename .. ", USA" end) or
check_for_recognized(m_shared.canadian_provinces_and_territories, "province") or
check_for_recognized(m_shared.australian_states_and_territories, "state")
)
end,
= {
= {true},
= {true},
},
},
= {
preposition = "of",
affix_type = "suf",
},
= {
-- China (historical subdivision)
preposition = "of",
},
= {
-- Poland (historical subdivision)
preposition = "of",
affix_type = "Pref",
},
= {
preposition = "of",
holonym_article = "the",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
article = "the",
preposition = "of",
= {
= {"Historical settlements", "Historical capitals"},
},
},
= {
preposition = "of",
= {
= {"Traditional counties of +++"},
},
= {
= {"Traditional counties of +++"},
},
= {
= {"Historical political subdivisions"},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {"Historical and traditional regions"},
},
},
= {
= {
= {"Historical settlements"},
},
},
= {
preposition = "of",
affix_type = "Suf",
},
= {
= {
= {true},
},
},
= {
plural = "kibbutzim",
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "Suf",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
article = "the",
fallback = "city",
},
= {
preposition = "of",
affix_type = "suf",
affix = "district",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
if holonym_placetype == "county" then
local cat_form = holonym_placename .. ", England"
if not m_shared.english_counties then
cat_form = "the " .. cat_form
if not m_shared.english_counties then
cat_form = nil
end
end
if cat_form then
return {
= {"Districts of " .. cat_form, "Districts of England"}
}
end
end
if (holonym_placetype == "country" or holonym_placetype == "constituent country") and
holonym_placename == "England" then
return {
= {"Districts of +++"},
}
end
end,
},
= {
preposition = "of",
affix_type = "pref",
affix = "borough",
fallback = "local government district",
},
= {
preposition = "of",
= {
= {"Seas"},
},
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = {"borough", "city"},
fallback = "local government district",
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = {"metropolitan", "city"},
fallback = "city",
= {
= {"Metropolitan cities of Italy"},
},
},
= {
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = "district",
fallback = "municipality",
},
= {
preposition = "of",
= {
= {true, "Municipalities of Austria"},
= {true},
},
= {
= {"Municipalities of +++, Brazil"},
= {true},
},
= {
= {"Municipalities of +++, Finland", "Municipalities of Finland"},
= {true},
},
= {
= {"Municipalities of +++, Netherlands", "Municipalities of the Netherlands"},
= {true},
},
= {
= {"Municipalities of +++, Philippines", "Municipalities of the Philippines"},
= {true},
},
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("neighborhood", holonym_placetype, holonym_placename,
"allow if holonym is city", "no containing polity")
end,
},
= {
-- China (type of economic development zone)
preposition = "in",
},
= {
article = "the",
preposition = "of",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return capital_city_cat_handler(holonym_placetype, holonym_placename, place_desc, "non-city")
end,
= {
= {"Capital cities"},
},
},
= {
preposition = "of",
affix_type = "Suf",
},
= {
holonym_article = "the",
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "Suf",
},
= {
preposition = "of",
affix_type = "suf",
= {
= {"Parishes of +++, USA"},
},
},
= {
preposition = "of",
fallback = "municipality",
= {
= {"Parishes of +++", "Municipalities of Canada"},
},
},
= {
article = "the",
preposition = "of",
= {
= {"Parish seats of +++, USA"},
},
},
= {
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
= {
= {"Regions of +++"},
},
},
= {
-- Include this empty so we don't categorize 'planned community' into
-- villages, as 'community' does.
},
= {
= {
= {true},
},
},
= {
preposition = "of",
display_handler = prefecture_display_handler,
= {
= {true},
},
},
= {
-- China
cat_handler = chinese_subcity_cat_handler,
= {
= {"Cities in +++"},
},
},
= {
preposition = "of",
= {
= {true},
= {true},
},
},
= {
preposition = "of",
affix_type = "Suf",
},
= {
holonym_article = "the",
= {
= {true},
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {"Counties and regions of +++"},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = "district",
fallback = "district",
= {
= {"Regional districts of +++"},
},
},
= {
preposition = "of",
affix_type = "Suf",
no_affix_strings = {"municipality", "county"},
fallback = "municipality",
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = "municipality",
fallback = "municipality",
},
= {
preposition = "of",
},
= {
preposition = "of",
= {
= {true},
},
},
= {
holonym_article = "the",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("river", holonym_placetype, holonym_placename)
end,
= {
= {true},
= {true},
},
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = {"royal", "borough"},
fallback = "local government district",
},
= {
affix_type = "Suf",
},
= {
preposition = "of",
affix_type = "Pref",
no_affix_strings = "municipality",
fallback = "municipality",
},
= {
preposition = "of",
},
= {
holonym_article = "the",
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
},
= {
-- China
preposition = "in",
},
= {
= {
= {true},
},
},
= {
preposition = "of",
= {
= {true},
},
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "suf",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return district_cat_handler("subdivision", holonym_placetype, holonym_placename)
end,
},
= {
preposition = "of",
},
= {
preposition = "of",
},
= {
-- China
cat_handler = chinese_subcity_cat_handler,
= {
= {"Cities in +++"},
},
},
= {
-- China
preposition = "of",
},
= {
preposition = "of",
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("suburb", holonym_placetype, holonym_placename,
"allow if holonym is city", "no containing polity")
end,
},
= {
affix_type = "suf",
no_affix_strings = {"tehsil", "tahsil"},
},
= {
preposition = "of",
= {
= {"Polities"},
= {true},
},
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("town", holonym_placetype, holonym_placename)
end,
= {
= {true},
= {true},
},
},
= {
= {
= {true},
},
},
= {
preposition = "of",
fallback = "municipality",
= {
= {"Townships in +++", "Townships in Canada", "Municipalities of Canada"},
},
},
= {
= {
= {"Historical and traditional regions"},
},
},
= {
preposition = "of",
fallback = "river",
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
if holonym_placetype == "state" then
return city_type_cat_handler("unincorporated community", holonym_placetype, holonym_placename)
end
end,
= {
= {true},
},
},
= {
preposition = "of",
article = "a",
},
= {
article = "a",
fallback = "local government district",
},
= {
article = "a",
fallback = "local government district",
},
= {
article = "a",
fallback = "township municipality",
},
= {
article = "a",
= {
= {true},
},
},
= {
= {
= {"Countries"},
},
},
= {
= {
= {true},
},
},
= {
cat_handler = function(holonym_placetype, holonym_placename, place_desc)
return city_type_cat_handler("village", holonym_placetype, holonym_placename)
end,
= {
= {true},
= {true},
},
},
= {
preposition = "of",
= {
= {"Villages in +++", "Villages in Canada", "Municipalities of Canada"},
},
},
= {
preposition = "of",
holonym_article = "the",
},
= {
plural = "volcanoes",
= {
= {true},
},
},
= {
preposition = "of",
affix_type = "suf",
affix = "community",
= {
= {"Communities of +++"},
},
},
= {
cat_handler = generic_cat_handler,
},
}
-- Now augment the category data with political subdivisions extracted from the
-- shared data. We don't need to do this if there's already an entry under "default"
-- for the divtype of the containing polity.
for _, group in ipairs(m_shared.polities) do
for key, value in pairs(group.data) do
value = group.value_transformer(group, key, value)
if value.poldiv or value.miscdiv then
local bare_key, linked_key = m_shared.construct_bare_and_linked_version(key)
local divtype = value.divtype or group.default_divtype
if type(divtype) ~= "table" then
divtype = {divtype}
end
for pass=1,2 do
local list
if pass == 1 then
list = value.poldiv
else
list = value.miscdiv
end
if list then
for _, div in ipairs(list) do
if type(div) == "string" then
div = {div}
end
local sgdiv = m_strutils.singularize(div)
for _, dt in ipairs(divtype) do
if not export.cat_data then
-- If there is an entry in placetype_equivs, it will be ignored once
-- we insert an entry in cat_data. For example, "traditional county" is
-- listed as a miscdiv of Scotland and Northern Ireland but it's also
-- an entry in placetype_equivs. Once we insert an entry here for
-- "traditional county", it will override placetype_equivs. To get
-- around that, simulate the effect of placetype_equivs using a
-- fallback = "..." entry.
if export.placetype_equivs then
export.cat_data = {
preposition = "of",
fallback = export.placetype_equivs,
}
else
export.cat_data = {
preposition = "of",
= {
},
}
end
end
if not export.cat_data or not export.cat_data then
local itself_dest = bare_key == key and {true} or {ucfirst(div) .. " of " .. key}
if sgdiv == "district" then
-- see comment above under district_cat_handler().
local neighborhoods_in = value.british_spelling and "Neighbourhoods in " .. key or "Neighborhoods in " .. key
local inner_data = district_inner_data({neighborhoods_in}, itself_dest)
export.cat_data = inner_data
else
export.cat_data = {
= itself_dest,
}
end
end
end
end
end
end
end
end
end
return export