Spaces:

kpfadnis
/

InspectorRAGet

Running

App Files Files

InspectorRAGet / src /utilities /aggregators.ts

kpfadnis

chore (maintainance): Update depdencies, copyright and minimal support for chat template.

e23b66d 9 months ago

raw

history blame

8.23 kB

	/**
	*
	* Copyright 2023-2025 InspectorRAGet Team
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	**/

	import { countBy, intersection, union, range } from 'lodash';

	import {
	Aggregator,
	AggregationConfidenceLevels,
	AggregationStatistics,
	MetricValue,
	} from '@/src/types';
	import { castToNumber, castToValue } from '@/src/utilities/metrics';

	export const meanAggregator: Aggregator = {
	name: 'mean',
	displayName: 'Mean',
	apply: (
	scores: number[] \| string[],
	references: MetricValue[],
	): AggregationStatistics => {
	// Step 1: Cast score to numbers
	const numericScores = scores.map((score) =>
	typeof score === 'string' ? castToNumber(score, references) : score,
	);

	// Step 2: Calculate aggregate value & standard deviation
	const mean = numericScores.reduce((a, b) => a + b) / numericScores.length;
	const std = Math.sqrt(
	numericScores
	.map((score) => Math.pow(score - mean, 2))
	.reduce((a, b) => a + b) / numericScores.length,
	);

	// Step 3: Calculate confidence level
	const sorted_counter = Object.entries(countBy(scores));
	const numberOfUniqueValues = sorted_counter.length;
	const mostCommonValueCount = sorted_counter[0][1];

	return {
	value: Math.round((mean + Number.EPSILON) * 100) / 100,
	std: Math.round((std + Number.EPSILON) * 100) / 100,
	confidence:
	mostCommonValueCount === scores.length
	? AggregationConfidenceLevels.HIGH
	: numberOfUniqueValues === scores.length
	? AggregationConfidenceLevels.LOW
	: AggregationConfidenceLevels.MEDIUM,
	};
	},
	};

	export const medianAggregator: Aggregator = {
	name: 'median',
	displayName: 'Median',
	apply: (
	scores: number[] \| string[],
	references: MetricValue[],
	): AggregationStatistics => {
	// Step 1: Cast score to numbers
	const numericScores = scores.map((score) =>
	typeof score === 'string' ? castToNumber(score, references) : score,
	);

	// Step 2: Sort the numeric scores
	const sortedNumericScores = numericScores.toSorted((a, b) => a - b);

	// Step 3: Calculate aggregate value & standard deviation
	const median =
	sortedNumericScores.length % 2 == 0
	? sortedNumericScores[sortedNumericScores.length / 2 - 1]
	: sortedNumericScores[(sortedNumericScores.length + 1) / 2 - 1];
	const std = Math.sqrt(
	sortedNumericScores
	.map((score) => Math.pow(score - median, 2))
	.reduce((a, b) => a + b) / sortedNumericScores.length,
	);

	// Step 4: Calculate confidence level
	const sorted_counter = Object.entries(countBy(scores));
	const numberOfUniqueValues = sorted_counter.length;
	const mostCommonValueCount = sorted_counter[0][1];

	return {
	value: castToValue(median, references),
	std: Math.round((std + Number.EPSILON) * 100) / 100,
	confidence:
	mostCommonValueCount === scores.length
	? AggregationConfidenceLevels.HIGH
	: numberOfUniqueValues === scores.length
	? AggregationConfidenceLevels.LOW
	: AggregationConfidenceLevels.MEDIUM,
	};
	},
	};

	export const majorityAggregator: Aggregator = {
	name: 'majority',
	displayName: 'Majority',
	apply: (
	scores: number[] \| string[],
	references: MetricValue[],
	): AggregationStatistics => {
	// Step 0: Create counter
	const counter: { [key: string]: number } = countBy(scores);
	// Step 1: Sort counter values
	const sorted_counter = Object.entries(counter);
	sorted_counter.sort((x, y) => {
	return y[1] - x[1];
	});

	// Step 2: Number of unique values, most common value and its count
	const numberOfAnnotators = scores.length;
	const numberOfUniqueValues = sorted_counter.length;
	const mostCommonValue = sorted_counter[0][0];
	const mostCommonValueCount = sorted_counter[0][1];

	// Step 3: Initialize defaults for aggregate value, standard deviation and confidence level
	let value = 'Indeterminate';
	let confidence = AggregationConfidenceLevels.LOW;

	// Step 3.a: Cast score to numbers
	const numericScores = scores.map((score) =>
	typeof score === 'string' ? castToNumber(score, references) : score,
	);
	// Step 2: Calculate aggregate value & standard deviation
	const mean = numericScores.reduce((a, b) => a + b) / numericScores.length;
	const std = Math.sqrt(
	numericScores
	.map((score) => Math.pow(score - mean, 2))
	.reduce((a, b) => a + b) / numericScores.length,
	);

	// Step 3: Calculate aggregate value, standard deviation and confidence level
	if (mostCommonValueCount === numberOfAnnotators) {
	value = mostCommonValue;
	confidence = AggregationConfidenceLevels.HIGH;
	} else if (
	numberOfUniqueValues === numberOfAnnotators &&
	numberOfUniqueValues > 1
	) {
	value = 'Indeterminate';
	confidence = AggregationConfidenceLevels.LOW;
	} else if (
	numberOfUniqueValues > Math.ceil(numberOfAnnotators / 2) \|\|
	(mostCommonValueCount < Math.ceil(numberOfAnnotators / 2) &&
	numberOfUniqueValues === Math.ceil(numberOfAnnotators / 2) &&
	Math.abs(
	castToNumber(mostCommonValue, references) -
	castToNumber(sorted_counter[1][0], references),
	) > 1)
	) {
	value = 'Indeterminate';
	confidence = AggregationConfidenceLevels.LOW;
	} else if (
	numberOfUniqueValues == 2 &&
	Math.abs(
	castToNumber(mostCommonValue, references) -
	castToNumber(sorted_counter[1][0], references),
	) < 2
	) {
	value = mostCommonValue;
	confidence = AggregationConfidenceLevels.MEDIUM;
	} else {
	value = mostCommonValue;
	confidence = AggregationConfidenceLevels.LOW;
	}

	return {
	value: value,
	std: Math.round((std + Number.EPSILON) * 100) / 100,
	confidence: confidence,
	};
	},
	};

	/**
	* Returns unions of all scores
	* NOTE: Applies only to array of numbers or strings
	*/
	export const unionAggregator: Aggregator = {
	name: 'union',
	displayName: 'Union',
	apply: (scores: number[][] \| string[][]): (number \| string)[] => {
	return union(...scores);
	},
	};

	/**
	* Returns intersection of all scores
	* NOTE: Applies only to array of numbers or strings
	*/
	export const intersectionAggregator: Aggregator = {
	name: 'intersection',
	displayName: 'Intersection',
	apply: (scores: number[][] \| string[][]): (number \| string)[] => {
	return intersection(...scores);
	},
	};

	/**
	* Returns majority of all scores
	* NOTE: Applies only to array of numbers or strings
	*/
	export const majorityUnionAggregator: Aggregator = {
	name: 'majority',
	displayName: 'Majority',
	apply: (scores: number[][] \| string[][]): (number \| string)[] => {
	// Step 1: Determine number of annotators
	const numberOfAnnotators = scores.length;
	const annotatorIds = range(numberOfAnnotators);

	// Step 2: Create combination of annotators
	const annotatorIdCombinations: number[][] = [];
	for (
	let size = numberOfAnnotators;
	size >= Math.ceil(numberOfAnnotators / 2);
	size--
	) {
	for (
	let startIdx = 0;
	startIdx + size <= numberOfAnnotators;
	startIdx++
	) {
	annotatorIdCombinations.push(
	annotatorIds.slice(startIdx, startIdx + size),
	);
	}
	}

	// Step 3: Combine relevant contexts chosen by at least majority of annotators
	return union(
	...annotatorIdCombinations.map((annotatorIdCombination) => {
	return intersection(
	...annotatorIdCombination.map((annotatorId) => scores[annotatorId]),
	);
	}),
	);
	},
	};