EK100MIR

Paused

EK100MIR / lavila /utils /evaluation_ek100mir.py

Upload 29 files

b3660df verified over 1 year ago

10 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.

	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	# Part of the code is from
	# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/NDCG.py`
	# and
	# `https://github.com/mwray/Joint-Part-of-Speech-Embeddings/tree/main/src/evaluation/mAP.py`
	# Modified by Yue Zhao

	import numpy as np


	def calculate_DCG(similarity_matrix, relevancy_matrix, k_counts):
	"""
	Calculates the Discounted Cumulative Gain (DCG) between two modalities for
	the first modality.
	DCG = \sum_{i=1}^k \frac{rel_i}{log_2(i + 1)}
	i.e. the sum of the k relevant retrievals which is calculated as the scaled
	relevancy for the ith item. The scale is designed such that early
	retrievals are more important than later retrievals.
	Params:
	- similarity_matrix: matrix of size n1 x n2 where n1 is the number of
	items in the first modality and n2 is the number of items in the
	second modality. The [ith,jth] element is the predicted similarity
	between the ith item from the first modality and the jth item from
	the second modality.
	- relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
	above). The [ith, jth] element is the semantic relevancy between the
	ith item from the first modality and the jth item from the second
	modality.
	- k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
	includes information on which items to use to calculate the DCG for
	(see calculate_k_counts for more info on this matrix).
	Returns:
	- The DCG for each item in the first modality, a n1 length vector.
	"""
	x_sz, y_sz = similarity_matrix.shape
	ranks = np.argsort(similarity_matrix)[:, ::-1]
	# Create vector of size (n,) where n is the length of the last dimension in
	# similarity matrix
	# This vector is of the form log(i+1)
	logs = np.log2(np.arange(y_sz) + 2)
	# Convert logs into the divisor for the DCG calculation, of size similarity
	# matrix
	divisors = np.repeat(np.expand_dims(logs, axis=0), x_sz, axis=0)

	# mask out the sorted relevancy matrix to only use the first k relevant
	# retrievals for each item.
	columns = np.repeat(np.expand_dims(np.arange(x_sz), axis=1), y_sz, axis=1)
	numerators = relevancy_matrix[columns, ranks] * k_counts
	# Calculate the final DCG score (note that this isn't expected to sum to 1)
	return np.sum(numerators / divisors, axis=1)


	def calculate_k_counts(relevancy_matrix):
	"""
	Works out the maximum number of allowed retrievals when working out the
	Discounted Cumulative Gain. For each query the DCG only uses the first k
	items retrieved which constitute the k relevant items for that query
	(otherwise the nDCG scores can be deceptively high for bad rankings).
	Params:
	- relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
	items in the first modality and n2 is the number of items in the
	second modality. The [ith, jth] element is the semantic relevancy
	between the ith item from the first modality and the jth item from
	the second modality.
	Returns:
	- Matrix of size n1 x n2 (see relevancy matrix for more info). This is
	created as a mask such that if the [ith, jth] element is 1 it
	represents a valid item to use for the calculation of DCG for the
	ith item after sorting. For example, if relevancy matrix of:
	[[1, 0.5, 0],
	[0, 0 , 1]]
	is given, then the k_counts matrix will be:
	[[1, 1, 0],
	[1, 0, 0]]
	i.e. the first row has 2 non-zero items, so the first two retrieved
	items should be used in the calculation. In the second row there is
	only 1 relevant item, therefore only the first retrieved item should
	be used for the DCG calculation.
	"""
	return (np.sort(relevancy_matrix)[:, ::-1] > 0).astype(int)


	def calculate_IDCG(relevancy_matrix, k_counts):
	"""
	Calculates the Ideal Discounted Cumulative Gain (IDCG) which is the value
	of the Discounted Cumulative Gain (DCG) for a perfect retrieval, i.e. the
	items in the second modality were retrieved in order of their descending
	relevancy.
	Params:
	- relevancy_matrix: matrix of size n1 x n2 where n1 is the number of
	items in the first modality and n2 is the number of items in the
	second modality. The [ith, jth] element is the semantic relevancy
	between the ith item from the first modality and the jth item from
	the second modality.
	- k_counts: matrix of size n1 x n2 (see similarity_matrix above) which
	includes information on which items to use to calculate the DCG for
	(see calculate_k_counts for more info on this matrix).
	"""
	return calculate_DCG(relevancy_matrix, relevancy_matrix, k_counts)


	def calculate_nDCG(similarity_matrix, relevancy_matrix, k_counts=None, IDCG=None, reduction='mean'):
	"""
	Calculates the normalised Discounted Cumulative Gain (nDCG) between two
	modalities for the first modality using the Discounted Cumulative Gain
	(DCG) and the Ideal Discounted Cumulative Gain (IDCG).
	nDCG = \frac{DCG}{IDCG}
	Params:
	- similarity_matrix: matrix of size n1 x n2 where n1 is the number of
	items in the first modality and n2 is the number of items in the second
	modality. The [ith,jth] element is the predicted similarity between
	the ith item from the first modality and the jth item from the second
	modality.
	- relevancy_matrix: matrix of size n1 x n2 (see similarity_matrix
	above). The [ith, jth] element is the semantic relevancy between the
	ith item from the first modality and the jth item from the second
	modality.
	- k_counts: optional parameter: matrix of size n1 x n2 (see
	similarity_matrix above) which includes information on which items to
	use to calculate the DCG for (see calculate_k_counts for more info on
	this matrix). This will be calculated using calculate_IDCG if not
	present, but should be pre-processed for efficiency.
	- IDCG: Optional parameter which includes the pre-processed Ideal
	Discounted Cumulative Gain (IDCG). This is a vector of size n1 (see
	similarity_matrix above) which contains the IDCG value for each item
	from the first modality. This will be calculated using calculate_IDCG
	if not present, but should be pre-processed for efficiency.
	- reduction: what to use to reduce the different nDCG scores. By
	default this applies np.mean across all different queries.
	Returns:
	- The nDCG values for the first modality.
	"""
	if k_counts is None:
	k_counts = calculate_k_counts(relevancy_matrix)
	DCG = calculate_DCG(similarity_matrix, relevancy_matrix, k_counts)
	if IDCG is None:
	IDCG = calculate_IDCG(relevancy_matrix, k_counts)
	if reduction == 'mean':
	return np.mean(DCG / IDCG)
	elif reduction is None:
	return DCG / IDCG


	def calculate_mAP(sim_mat, relevancy_matrix):
	"""
	Computes the mean average precision according to the following formula of
	average precision:
	\frac{\sum_{k=1}^n p(k) x rel(k)}{num_rel_docs}
	where p(k) is the precision at k, rel(k) is an indicator function
	determining whether the kth returned item is relevant or not and
	num_rel_docs is the number of relevant items to find within the search.
	The mean average precision is the mean of the average precision for each
	query item (i.e row in the matrix)
	This function takes in two parameters:
	- sim_mat: a NxM matrix which represents the similarity between two
	modalities (with modality 1 being of size N and modality 2 of size M).
	- relevancy_matrix: an NxM matrix which represents the relevancy between two
	modalities of items (with modality 1 being of size N and modality 2 of
	size M).
	"""
	# Find the order of the items in modality 2 according to modality 1
	ranked_order = (-sim_mat).argsort()
	ranked_sim_mat = sim_mat[np.arange(sim_mat.shape[0])[:, None], ranked_order]
	# re-order the relevancy matrix to accommodate the proposals
	ranked_rel_mat = relevancy_matrix[np.arange(relevancy_matrix.shape[0])[:, None], ranked_order]

	# find the number of relevant items found at each k
	cumulative_rel_mat = np.cumsum(ranked_rel_mat, axis=1)
	# Mask this ensuring that it is non zero if the kth term is 1 (rel(k) above)
	cumulative_rel_mat[ranked_rel_mat != 1] = 0
	# find the divisor for p(k)
	divisor = np.arange(ranked_rel_mat.shape[1]) + 1

	# find the number of relevant docs per query item
	number_rel_docs = np.sum(ranked_rel_mat == 1, axis=1)

	# find the average precision per query, within np.sum finds p(k) * rel(k)
	avg_precision = np.sum(cumulative_rel_mat / divisor, axis=1) / number_rel_docs
	mAP = np.mean(avg_precision)
	return mAP


	def get_mAP(similarity_matrix, rel_matrix):
	vis_map = calculate_mAP(similarity_matrix, rel_matrix)
	txt_map = calculate_mAP(similarity_matrix.T, rel_matrix.T)
	return vis_map, txt_map, (vis_map + txt_map) / 2


	def get_nDCG(similarity_matrix, rel_matrix):
	vis_k_counts = calculate_k_counts(rel_matrix)
	txt_k_counts = calculate_k_counts(rel_matrix.T)
	vis_IDCG = calculate_IDCG(rel_matrix, vis_k_counts)
	txt_IDCG = calculate_IDCG(rel_matrix.T, txt_k_counts)
	vis_nDCG = calculate_nDCG(similarity_matrix, rel_matrix, k_counts=vis_k_counts, IDCG=vis_IDCG)
	txt_nDCG = calculate_nDCG(similarity_matrix.T, rel_matrix.T, k_counts=txt_k_counts, IDCG=txt_IDCG)
	return vis_nDCG, txt_nDCG, (vis_nDCG + txt_nDCG) / 2