Jump to content

Module:Sandbox/AbstractWikipedia/Wikidata

From Meta, a Wikimedia project coordination wiki
Module documentation

This is the Wikidata module of the Abstract Wikipedia template-renderer prototype.

It defines various helper function which access Wikidata items and lexemes, and return certain properties of them in a convenient format (e.g. a boolean, a string or a table, depending on the required data).


local p = {}

local gf = require("Module:Sandbox/AbstractWikipedia/GrammaticalFeatures")


-- Helper function to safely explore a path in table, and return nil upon
-- missing fields
local function explorePath(data, ...)
	for _, v in ipairs(arg) do
		if type(data) ~= "table" then
			mw.log("Cannot access path element "..v)
			return nil
		end
		data = data[v]
	end
	return data
end


-- Returns the list of demonyms (together with their features) for a q_id in a
-- given language.
function p.getDemonyms (q_id, lang)
	lang = lang or language -- language should be a global variable
	local demonyms = mw.wikibase.getBestStatements( q_id, "P1549" )
	local result = {}
	for _, demonym_table in ipairs(demonyms) do
		local value = explorePath(demonym_table, "mainsnak", "datavalue", "value")
		if (value and explorePath(value, "language") == lang) then
			local label = explorePath(value, "text")
			if label then 
				local demonym = { label = label, features = {} }
				feature_tables = explorePath(demonym_table, "qualifiers", "P518") --, 1, "datavalue", "value", "id")
				if feature_tables then 
					for _, feature_table in ipairs(feature_tables) do
						local feature = explorePath(feature_table, "datavalue", "value", "id")
						if feature then
							table.insert(demonym.features, feature)
						end
					end
				end
				-- A demonym can be linked to a lexeme
				sense_id = explorePath(demonym_table, "qualifiers", "P7018", 1, "datavalue", "value", "id")
				if sense_id then
					demonym.lexeme = mw.wikibase.lexeme.splitLexemeId(sense_id)
				end
				table.insert(result, demonym)
			end
		end
	end
	return result
end

-- Helper function to get the label of an Item, and possibly do some fallback
-- logic if the label is missing.
-- Note that language should be a globally-defined variable.
function p.getLabel (q_id, lang)
	lang = lang or language -- language should be a global variable
	local label = mw.wikibase.getLabelByLang( q_id, lang )
	if not label then
		label = "<missing label>"
		-- There could be some fallback logic here (i.e. use another language)
		mw.log("Missing label of "..q_id.." in language "..lang)
	end
	return label
end

-- Return the property associated with a certain q_id through a p_id
-- Currently only the first "best" property is returned
-- Use expected_type to validate the correctness of the type
function p.getProperty (q_id, p_id, expected_type)
	local property = mw.wikibase.getBestStatements( q_id, p_id )
	property = explorePath(property, 1, "mainsnak", "datavalue")
	if (property and expected_type) then
		local actual_type = explorePath(property, "type")
		if expected_type ~= actual_type then
			error("Property "..p_id.." expected to yield type "..expected_type..". Got ".. actual_type)
		end
	end
	return explorePath(property, "value")
end

-- Returns the id of an item associated through the given p_id with a given q_id
function p.getItemId (q_id, p_id)
	local property = p.getProperty(q_id, p_id, "wikibase-entityid")
	return explorePath(property, "id")
end 

-- For items with different male and female labels, returns  both labels,
-- and possible lexemes associated with them. This relies on annotations
-- of the "male form of label" (P3321) "female form of label" (P2521) on items.
-- Additionally, if an item has a "literal translation" (P2441) property linked
-- to a lexeme, it will fetch that as an "unspecified" gender. 
-- See discussion in https://phabricator.wikimedia.org/T320263.
function p.getGenderedLabels (q_id, lang)
	lang = lang or language -- language should be a global variable
	local result = {}
	for gender, property in pairs({ male = "P3321", female = "P2521", unspecified = "P2441"}) do
		local labels = mw.wikibase.getBestStatements( q_id, property )
		-- We need to find the label in the right language
		for _, label in ipairs(labels) do
			if (explorePath(label, "mainsnak", "datavalue", "value", "language") == lang) then
				local lexeme_id
				sense_id = explorePath(label, "qualifiers", "P7018", 1, "datavalue", "value", "id")
				if sense_id then
					lexeme_id = mw.wikibase.lexeme.splitLexemeId(sense_id)
				end
				local label = explorePath(label, "mainsnak", "datavalue", "value", "text")
				result[gender] = { label = label, lexeme = lexeme_id} 
				break
			end
		end
		if not result[gender] then 
			result[gender] = {}
		end
	end
	return result
end


-- Return true if an entity has a date of death property
function p.isDead ( q_id )
	local date_of_death = mw.wikibase.getBestStatements( q_id, 'P570' )
	return (#date_of_death > 0)
end

-- Check whether a q-id references a human being
function p.isHuman ( q_id )
	local reference = mw.wikibase.getReferencedEntityId( q_id, 'P31', { 'Q5' } )
	return (reference == "Q5")
end

-- Returns "masculine", "feminine", "other" or "unknown"
-- according to "sex or gender" property
function p.getHumanGender ( q_id )
	local gender = p.getItemId( q_id, "P21" )
	-- Handling of non-binary gender is language dependent and would have to
	-- be done in a language-specific implementation.
	if (gender == "Q6581097" or gender == "Q2449503" or gender == "Q44148") then
		return "masculine"
	elseif (gender == "Q6581072" or gender == "Q1052281" or gender == "Q43445") then
		return "feminine"
	elseif (gender == "Q1097630" or gender == "Q48270") then
		return "other"
	else 
		return "unknown"
	end
end

-- Returns the grammatical gender associated with a lexeme ("masculine", "feminine")
-- or nil, if not present
function p.getGrammaticalGender ( q_id )
	local gender = p.getItemId(q_id, "P5185")
	if not gender then return nil end
	if (gf.features_map[gender]) then
		return gf.features_map[gender].gender
	else  -- fallback to English label
		return p.getLabel(gender, "en")
	end
end

-- Maps a wikidata feature (a q-id) to a table (possibly empty) of 
-- category-feature pairings.
function p.expandFeature ( q_id )
	if gf.features_map[q_id] then
		return gf.features_map[q_id]
	end
	-- Fallback: find category by means of the "instance of" property
	local category = q_id
	local possible_category = p.getItemId( q_id, "P31")
	if possible_category then
		category = p.getLabel(possible_category, "en")
	end
	local result = {}
	result[category] = p.getLabel(q_id, "en")
	return result
end


-- Gets the lemma in the rendering language or falls back to first language
-- represented.
function p.getLemma (wdLexeme, lexeme_id)
	local lemma, used_language = wdLexeme:getLemma(language)
	if not lemma then
		lemmas = wdLexeme:getLemmas()
		lemma = lemmas[1][1]
		used_language = lemmas[1][2]
		mw.log("Lexeme "..lexeme_id.." has no lemma for language "..language..". Using instead language "..used_language..".")
	end
	return lemma, used_language
end

function p.getPOS (wdLexeme)
	local category = wdLexeme:getLexicalCategory()
	if gf.categories_map[category] then
		return gf.categories_map[category]
	else -- fallback: Use English label
		return p.getLabel( category, "en" )
	end
end

return p