kpfadnis's picture
chore (maintainance): Update depdencies, copyright and minimal support for chat template.
e23b66d
raw
history blame
14.7 kB
/**
*
* Copyright 2023-2025 InspectorRAGet Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import { countBy, isNumber } from 'lodash';
import { Metric, MetricValue } from '@/src/types';
export const MetricDefinitions = {
coherence: 'The response is coherent, natural, and not dismissive.',
naturalness: 'The response is coherent, natural, and not dismissive.',
specificity:
'The response provides appropriate amount of useful information.',
appropriateness:
'The response provides appropriate amount of useful information.',
faithfulness: 'The response is faithful and grounded on the context.',
feedback:
"Annotator's comments about quality of response, potential issues etc.",
};
export const AgreementLevels = {
ABSOLUTE_AGREEMENT: 3,
HIGH_AGREEMENT: 2,
LOW_AGREEMENT: 1,
NO_AGREEMENT: 0,
};
export const AgreementLevelDefinitions = {
Absolute: 'All annotators selected a same value for a given metric.',
High: 'Majority of annotators selected a same value for a given metric and the most common value and the 2nd most common value were less that 2 units apart.',
Low: 'Majority of annotators selected a same value for a given metric.',
No: 'Majority of annotators selected different values for a given metric.',
};
export function extractMetricDisplayValue(
value: string | number,
references?: MetricValue[],
): string {
// If value is of type "string"
if (typeof value === 'string') {
// Step 1: Check if references are provided to convert "string" value to "numeric" value
if (references) {
// Step 1.a: Find appropriate reference by comparing "string" values
const reference = references.find((entry) => entry.value === value);
// Step 1.b: If numeric value exists in reference, then return it
if (reference && reference.displayValue) {
return reference.displayValue;
} else {
return value;
}
} else {
return value;
}
} else {
// Value is of type "number"
return parseFloat(value.toFixed(2)).toString();
}
}
export function extractMetricDisplayName(metric: Metric): string {
return metric.displayName
? metric.displayName
: metric.name.charAt(0).toUpperCase() + metric.name.slice(1).toLowerCase();
}
/**
* Converts numeric value to metric value using references in case of 'categorical' metrics
* @param value numeric value to convert
* @param references reference metric values
* @returns metric value
*/
export function castToValue(
value: number,
references?: MetricValue[],
): string | number {
// Step 1: Check if references are provided to convert "numeric" value to "string" value
if (references) {
// Step 1.a: Find appropriate reference by comparing "string" values
const reference = references.find((entry) => entry.numericValue === value);
// Step 1.b: If value exists in reference, then return it
if (reference && reference.value) {
return reference.value;
} else {
return value;
}
}
// Default return
return value;
}
export function castToNumber(
value: string | number,
references?: MetricValue[],
key?: 'value' | 'displayValue',
): number {
// If value is of type "string"
if (typeof value === 'string') {
// Step 1: Check if references are provided to convert "string" value to "numeric" value
if (references) {
// Step 1.a: Find appropriate reference by comparing "string" values
const reference = references.find((entry) =>
key ? entry[key] === value : entry.value === value,
);
// Step 1.b: If numeric value exists in reference, then return it
if (
reference &&
reference.hasOwnProperty('numericValue') &&
typeof reference.numericValue === 'number'
) {
return reference.numericValue;
} else {
return parseFloat(value);
}
}
// Step 2: Cast to int, if references are absent
else if (value === 'N/A' || value === '') {
return 0;
} else {
return parseFloat(value);
}
}
// Value is of type "number"
else {
return value;
}
}
/**
* Compute mean value
* @param metric metric under consideration
* @param scores distribution of values
* @returns
*/
function computeMean(
metric: Metric,
scores: string[] | number[],
): { level: number; value: number | string } {
// Step 1: Create counter
const counter: { [key: string]: number } = countBy(scores);
// Step 2: Sort counter values
const sorted_counter = Object.entries(counter);
sorted_counter.sort((x, y) => {
return y[1] - x[1];
});
// Step 3: Number of unique values, most common value and its count
const numberOfUniqueValues = sorted_counter.length;
const mostCommonValueCount = sorted_counter[0][1];
// Step 4: Calculate mean
let sum: number = 0;
for (const [value, count] of Object.entries(counter)) {
sum +=
(typeof value === 'string' ? castToNumber(value, metric.values) : value) *
count;
}
const mean = Math.round((sum / scores.length + Number.EPSILON) * 100) / 100;
// Step 5: Common patterns
// Step 5.a: Absolute agreement
if (mostCommonValueCount === scores.length)
return {
level: AgreementLevels.ABSOLUTE_AGREEMENT,
value: mean,
};
// Step 5.b: Absolute disagreement/No agreement
if (numberOfUniqueValues === scores.length)
return {
level: AgreementLevels.NO_AGREEMENT,
value: mean,
};
// Step 6: Default return
return {
level: AgreementLevels.HIGH_AGREEMENT,
value: mean,
};
}
/**
* Compute median value
* @param metric metric under consideration
* @param counter distribution of values
* @returns
*/
function computeMedian(
metric: Metric,
scores: string[] | number[],
): { level: number; value: number | string } {
// Step 1: Create counter
const counter: { [key: string]: number } = countBy(scores);
// Step 2: Sort counter values
const sorted_counter = Object.entries(counter);
sorted_counter.sort((x, y) => {
return y[1] - x[1];
});
// Step 3: Number of unique values, most common value and its count
const numberOfUniqueValues = sorted_counter.length;
const mostCommonValueCount = sorted_counter[0][1];
// Step 4: Cast score to numbers
const numericScores = scores.map((score) =>
typeof score === 'string' ? castToNumber(score, metric.values) : score,
);
// Step 5: Sort the numeric scores
const sortedNumericScores = numericScores.toSorted((a, b) => a - b);
// Step 6: Calculate median
const median =
sortedNumericScores.length % 2 == 0
? sortedNumericScores[sortedNumericScores.length / 2 - 1]
: sortedNumericScores[(sortedNumericScores.length + 1) / 2 - 1];
// Step 7: Common patterns
// Step 7.a: Absolute agreement
if (mostCommonValueCount === scores.length)
return {
level: AgreementLevels.ABSOLUTE_AGREEMENT,
value: castToValue(median, metric.values),
};
// Step 7.b: Absolute disagreement/No agreement
if (numberOfUniqueValues === scores.length)
return {
level: AgreementLevels.NO_AGREEMENT,
value: castToValue(median, metric.values),
};
// Step 8: Default return
return {
level: AgreementLevels.HIGH_AGREEMENT,
value: castToValue(median, metric.values),
};
}
/**
* Compute majority value
* @param metric metric under consideration
* @param counter distribution of values
* @param numberOfAnnotators number of annotators
* @returns
*/
function computeMajority(
metric: Metric,
counter: { [key: string]: number },
numberOfAnnotators: number,
): { level: number; value: number | string } {
// Step 0: Sort counter values
const sorted_counter = Object.entries(counter);
sorted_counter.sort((x, y) => {
return y[1] - x[1];
});
// Step 1: Number of unique values, most common value and its count
const numberOfUniqueValues = sorted_counter.length;
const mostCommonValue = sorted_counter[0][0];
const mostCommonValueCount = sorted_counter[0][1];
// Step 2: Common patterns
// Step 2.a: Absolute agreement
if (mostCommonValueCount === numberOfAnnotators)
return {
level: AgreementLevels.ABSOLUTE_AGREEMENT,
value: mostCommonValue,
};
// Step 2.b: Absolute disagreement/No agreement
if (numberOfUniqueValues === numberOfAnnotators)
return {
level: AgreementLevels.NO_AGREEMENT,
value: 'Indeterminate',
};
// Step 3: Calculate agreement levels
// Step 3.a: No agreement
// * More than half annotators selected different values
// OR
// * Less than half annotators selected same value and Top-2 most common values are greater than 1 unit apart
if (
numberOfUniqueValues > Math.ceil(numberOfAnnotators / 2) ||
(mostCommonValueCount < Math.ceil(numberOfAnnotators / 2) &&
numberOfUniqueValues === Math.ceil(numberOfAnnotators / 2) &&
Math.abs(
castToNumber(mostCommonValue, metric.values) -
castToNumber(sorted_counter[1][0], metric.values),
) > 1)
) {
return {
level: AgreementLevels.NO_AGREEMENT,
value: 'Indeterminate',
};
}
// Step 3.b: High agreement
// * Maximum two unique values and those are less than 2 unit apart
if (
numberOfUniqueValues == 2 &&
Math.abs(
castToNumber(mostCommonValue, metric.values) -
castToNumber(sorted_counter[1][0], metric.values),
) < 2
) {
return {
level: AgreementLevels.HIGH_AGREEMENT,
value: mostCommonValue,
};
}
// Step 3.c: Default return
return {
level: AgreementLevels.LOW_AGREEMENT,
value: mostCommonValue,
};
}
export function calculateAggregateValue(
metric: Metric,
entries: { [key: string]: any },
) {
if (metric.author === 'algorithm') {
if (metric.aggregator) {
let scores: string[] | number[] = Object.values(entries).map(
(entry) => entry.value,
);
if (metric.aggregator === 'average' || metric.aggregator === 'mean') {
return computeMean(metric, scores);
} else if (metric.aggregator === 'median') {
return computeMedian(metric, scores);
} else {
return computeMajority(metric, countBy(scores), scores.length);
}
} else {
return {
level: AgreementLevels.NO_AGREEMENT,
value: undefined,
};
}
} else {
if (metric.aggregator) {
let scores: string[] | number[] = Object.values(entries).map(
(entry) => entry.value,
);
if (metric.aggregator === 'average' || metric.aggregator === 'mean') {
return computeMean(metric, scores);
} else if (metric.aggregator === 'median') {
return computeMedian(metric, scores);
} else {
return computeMajority(metric, countBy(scores), scores.length);
}
} else {
return {
level: AgreementLevels.NO_AGREEMENT,
value: undefined,
};
}
}
}
export function mergeAgreementObjects({
source,
target,
}: {
source: object;
target: object;
}) {
if (source) {
Object.entries(source).forEach(([group, entry]) => {
for (const [key, value] of Object.entries(entry)) {
if (target.hasOwnProperty(group)) {
if (target[group].hasOwnProperty(key)) {
target[group][key] += value;
} else {
target[group][key] = value;
}
} else {
target[group] = { [key]: value };
}
}
});
}
}
export function bin(value: number | string, metric: Metric, n?: number) {
if (typeof value === 'number' && metric.type === 'numerical') {
if (metric.range && metric.range.length == 3) {
for (
let idx: number = 0;
metric.range[0] + idx * metric.range[2] + metric.range[2] <=
metric.range[1];
idx++
) {
const start: number = parseFloat(
(metric.range[0] + idx * metric.range[2]).toFixed(2),
);
const end: number = parseFloat(
(metric.range[0] + idx * metric.range[2] + metric.range[2]).toFixed(
2,
),
);
if (start <= value && value <= end) {
return `${start}-${end}`;
}
}
}
}
return value;
}
export function compareMetricAggregatedValues(
a: { key: string | number; value: number },
b: { key: string | number; value: number },
metric: Metric,
): number {
if (metric.aggregator && metric.aggregator === 'average') {
if (typeof a.key === 'number' && typeof b.key === 'number') {
return a.key - b.key;
} else if (typeof a.key === 'string' && typeof b.key === 'string') {
return parseFloat(a.key) - parseFloat(b.key);
} else {
return 0;
}
} else if (metric.aggregator && metric.aggregator === 'majority') {
if (typeof a.key === 'string' && typeof b.key === 'string') {
if (a.key === 'Indeterminate' || b.key === 'Indeterminate') {
if (b.key === 'Indeterminate' && a.key != 'Indeterminate') {
return 1;
} else if (a.key === 'Indeterminate' && b.key != 'Indeterminate') {
return -1;
}
return 0;
}
const aValue = metric.values?.find((entry) => entry.value == a.key);
const bValue = metric.values?.find((entry) => entry.value == b.key);
if (aValue && bValue) {
// Do direct value comparison in numerical values exists
if (
(aValue.numericValue != undefined || aValue.numericValue != null) &&
isNumber(aValue.numericValue) &&
(bValue.numericValue != undefined || bValue.numericValue != null) &&
isNumber(bValue.numericValue)
) {
return aValue.numericValue - bValue.numericValue;
}
// For numerical values, do direct value comparison
else if (typeof a.value === 'number' && typeof b.value === 'number') {
return a.value - b.value;
} else {
return a.key.localeCompare(b.key);
}
}
// Do string comparison with non-ASCII support
return a.key.localeCompare(b.key);
}
// Default: Preserve same order
return 0;
}
return a.key > b.key ? 1 : -1;
}