Spaces:

kpfadnis
/

InspectorRAGet

Running

File size: 14,661 Bytes

/**
 *
 * Copyright 2023-2025 InspectorRAGet Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **/

import { countBy, isNumber } from 'lodash';
import { Metric, MetricValue } from '@/src/types';

export const MetricDefinitions = {
  coherence: 'The response is coherent, natural, and not dismissive.',
  naturalness: 'The response is coherent, natural, and not dismissive.',
  specificity:
    'The response provides appropriate amount of useful information.',
  appropriateness:
    'The response provides appropriate amount of useful information.',
  faithfulness: 'The response is faithful and grounded on the context.',
  feedback:
    "Annotator's comments about quality of response, potential issues etc.",
};

export const AgreementLevels = {
  ABSOLUTE_AGREEMENT: 3,
  HIGH_AGREEMENT: 2,
  LOW_AGREEMENT: 1,
  NO_AGREEMENT: 0,
};

export const AgreementLevelDefinitions = {
  Absolute: 'All annotators selected a same value for a given metric.',
  High: 'Majority of annotators selected a same value for a given metric and the most common value and the 2nd most common value were less that 2 units apart.',
  Low: 'Majority of annotators selected a same value for a given metric.',
  No: 'Majority of annotators selected different values for a given metric.',
};

export function extractMetricDisplayValue(
  value: string | number,
  references?: MetricValue[],
): string {
  // If value is of type "string"
  if (typeof value === 'string') {
    // Step 1: Check if references are provided to convert "string" value to "numeric" value
    if (references) {
      // Step 1.a: Find appropriate reference by comparing "string" values
      const reference = references.find((entry) => entry.value === value);

      // Step 1.b: If numeric value exists in reference, then return it
      if (reference && reference.displayValue) {
        return reference.displayValue;
      } else {
        return value;
      }
    } else {
      return value;
    }
  } else {
    // Value is of type "number"
    return parseFloat(value.toFixed(2)).toString();
  }
}

export function extractMetricDisplayName(metric: Metric): string {
  return metric.displayName
    ? metric.displayName
    : metric.name.charAt(0).toUpperCase() + metric.name.slice(1).toLowerCase();
}

/**
 * Converts numeric value to metric value using references in case of 'categorical' metrics
 * @param value numeric value to convert
 * @param references reference metric values
 * @returns metric value
 */
export function castToValue(
  value: number,
  references?: MetricValue[],
): string | number {
  // Step 1: Check if references are provided to convert "numeric" value to "string" value
  if (references) {
    // Step 1.a: Find appropriate reference by comparing "string" values
    const reference = references.find((entry) => entry.numericValue === value);

    // Step 1.b: If value exists in reference, then return it
    if (reference && reference.value) {
      return reference.value;
    } else {
      return value;
    }
  }

  // Default return
  return value;
}

export function castToNumber(
  value: string | number,
  references?: MetricValue[],
  key?: 'value' | 'displayValue',
): number {
  // If value is of type "string"
  if (typeof value === 'string') {
    // Step 1: Check if references are provided to convert "string" value to "numeric" value
    if (references) {
      // Step 1.a: Find appropriate reference by comparing "string" values
      const reference = references.find((entry) =>
        key ? entry[key] === value : entry.value === value,
      );

      // Step 1.b: If numeric value exists in reference, then return it
      if (
        reference &&
        reference.hasOwnProperty('numericValue') &&
        typeof reference.numericValue === 'number'
      ) {
        return reference.numericValue;
      } else {
        return parseFloat(value);
      }
    }
    // Step 2: Cast to int, if references are absent
    else if (value === 'N/A' || value === '') {
      return 0;
    } else {
      return parseFloat(value);
    }
  }
  // Value is of type "number"
  else {
    return value;
  }
}

/**
 * Compute mean value
 * @param metric metric under consideration
 * @param scores distribution of values
 * @returns
 */
function computeMean(
  metric: Metric,
  scores: string[] | number[],
): { level: number; value: number | string } {
  // Step 1: Create counter
  const counter: { [key: string]: number } = countBy(scores);

  // Step 2: Sort counter values
  const sorted_counter = Object.entries(counter);
  sorted_counter.sort((x, y) => {
    return y[1] - x[1];
  });

  // Step 3: Number of unique values, most common value and its count
  const numberOfUniqueValues = sorted_counter.length;
  const mostCommonValueCount = sorted_counter[0][1];

  // Step 4: Calculate mean
  let sum: number = 0;
  for (const [value, count] of Object.entries(counter)) {
    sum +=
      (typeof value === 'string' ? castToNumber(value, metric.values) : value) *
      count;
  }
  const mean = Math.round((sum / scores.length + Number.EPSILON) * 100) / 100;

  // Step 5: Common patterns
  // Step 5.a: Absolute agreement
  if (mostCommonValueCount === scores.length)
    return {
      level: AgreementLevels.ABSOLUTE_AGREEMENT,
      value: mean,
    };

  // Step 5.b: Absolute disagreement/No agreement
  if (numberOfUniqueValues === scores.length)
    return {
      level: AgreementLevels.NO_AGREEMENT,
      value: mean,
    };

  // Step 6: Default return
  return {
    level: AgreementLevels.HIGH_AGREEMENT,
    value: mean,
  };
}

/**
 * Compute median value
 * @param metric metric under consideration
 * @param counter distribution of values
 * @returns
 */
function computeMedian(
  metric: Metric,
  scores: string[] | number[],
): { level: number; value: number | string } {
  // Step 1: Create counter
  const counter: { [key: string]: number } = countBy(scores);

  // Step 2: Sort counter values
  const sorted_counter = Object.entries(counter);
  sorted_counter.sort((x, y) => {
    return y[1] - x[1];
  });

  // Step 3: Number of unique values, most common value and its count
  const numberOfUniqueValues = sorted_counter.length;
  const mostCommonValueCount = sorted_counter[0][1];

  // Step 4: Cast score to numbers
  const numericScores = scores.map((score) =>
    typeof score === 'string' ? castToNumber(score, metric.values) : score,
  );

  // Step 5: Sort the numeric scores
  const sortedNumericScores = numericScores.toSorted((a, b) => a - b);

  // Step 6: Calculate median
  const median =
    sortedNumericScores.length % 2 == 0
      ? sortedNumericScores[sortedNumericScores.length / 2 - 1]
      : sortedNumericScores[(sortedNumericScores.length + 1) / 2 - 1];

  // Step 7: Common patterns
  // Step 7.a: Absolute agreement
  if (mostCommonValueCount === scores.length)
    return {
      level: AgreementLevels.ABSOLUTE_AGREEMENT,
      value: castToValue(median, metric.values),
    };

  // Step 7.b: Absolute disagreement/No agreement
  if (numberOfUniqueValues === scores.length)
    return {
      level: AgreementLevels.NO_AGREEMENT,
      value: castToValue(median, metric.values),
    };

  // Step 8: Default return
  return {
    level: AgreementLevels.HIGH_AGREEMENT,
    value: castToValue(median, metric.values),
  };
}

/**
 * Compute majority value
 * @param metric metric under consideration
 * @param counter distribution of values
 * @param numberOfAnnotators number of annotators
 * @returns
 */
function computeMajority(
  metric: Metric,
  counter: { [key: string]: number },
  numberOfAnnotators: number,
): { level: number; value: number | string } {
  // Step 0: Sort counter values
  const sorted_counter = Object.entries(counter);
  sorted_counter.sort((x, y) => {
    return y[1] - x[1];
  });

  // Step 1: Number of unique values, most common value and its count
  const numberOfUniqueValues = sorted_counter.length;
  const mostCommonValue = sorted_counter[0][0];
  const mostCommonValueCount = sorted_counter[0][1];

  // Step 2: Common patterns
  // Step 2.a: Absolute agreement
  if (mostCommonValueCount === numberOfAnnotators)
    return {
      level: AgreementLevels.ABSOLUTE_AGREEMENT,
      value: mostCommonValue,
    };

  // Step 2.b: Absolute disagreement/No agreement
  if (numberOfUniqueValues === numberOfAnnotators)
    return {
      level: AgreementLevels.NO_AGREEMENT,
      value: 'Indeterminate',
    };

  // Step 3: Calculate agreement levels
  // Step 3.a: No agreement
  // * More than half annotators selected different values
  // OR
  // * Less than half annotators selected same value and Top-2 most common values are greater than 1 unit apart
  if (
    numberOfUniqueValues > Math.ceil(numberOfAnnotators / 2) ||
    (mostCommonValueCount < Math.ceil(numberOfAnnotators / 2) &&
      numberOfUniqueValues === Math.ceil(numberOfAnnotators / 2) &&
      Math.abs(
        castToNumber(mostCommonValue, metric.values) -
          castToNumber(sorted_counter[1][0], metric.values),
      ) > 1)
  ) {
    return {
      level: AgreementLevels.NO_AGREEMENT,
      value: 'Indeterminate',
    };
  }

  // Step 3.b: High agreement
  // * Maximum two unique values and those are less than 2 unit apart
  if (
    numberOfUniqueValues == 2 &&
    Math.abs(
      castToNumber(mostCommonValue, metric.values) -
        castToNumber(sorted_counter[1][0], metric.values),
    ) < 2
  ) {
    return {
      level: AgreementLevels.HIGH_AGREEMENT,
      value: mostCommonValue,
    };
  }

  // Step 3.c: Default return
  return {
    level: AgreementLevels.LOW_AGREEMENT,
    value: mostCommonValue,
  };
}

export function calculateAggregateValue(
  metric: Metric,
  entries: { [key: string]: any },
) {
  if (metric.author === 'algorithm') {
    if (metric.aggregator) {
      let scores: string[] | number[] = Object.values(entries).map(
        (entry) => entry.value,
      );
      if (metric.aggregator === 'average' || metric.aggregator === 'mean') {
        return computeMean(metric, scores);
      } else if (metric.aggregator === 'median') {
        return computeMedian(metric, scores);
      } else {
        return computeMajority(metric, countBy(scores), scores.length);
      }
    } else {
      return {
        level: AgreementLevels.NO_AGREEMENT,
        value: undefined,
      };
    }
  } else {
    if (metric.aggregator) {
      let scores: string[] | number[] = Object.values(entries).map(
        (entry) => entry.value,
      );
      if (metric.aggregator === 'average' || metric.aggregator === 'mean') {
        return computeMean(metric, scores);
      } else if (metric.aggregator === 'median') {
        return computeMedian(metric, scores);
      } else {
        return computeMajority(metric, countBy(scores), scores.length);
      }
    } else {
      return {
        level: AgreementLevels.NO_AGREEMENT,
        value: undefined,
      };
    }
  }
}

export function mergeAgreementObjects({
  source,
  target,
}: {
  source: object;
  target: object;
}) {
  if (source) {
    Object.entries(source).forEach(([group, entry]) => {
      for (const [key, value] of Object.entries(entry)) {
        if (target.hasOwnProperty(group)) {
          if (target[group].hasOwnProperty(key)) {
            target[group][key] += value;
          } else {
            target[group][key] = value;
          }
        } else {
          target[group] = { [key]: value };
        }
      }
    });
  }
}

export function bin(value: number | string, metric: Metric, n?: number) {
  if (typeof value === 'number' && metric.type === 'numerical') {
    if (metric.range && metric.range.length == 3) {
      for (
        let idx: number = 0;
        metric.range[0] + idx * metric.range[2] + metric.range[2] <=
        metric.range[1];
        idx++
      ) {
        const start: number = parseFloat(
          (metric.range[0] + idx * metric.range[2]).toFixed(2),
        );
        const end: number = parseFloat(
          (metric.range[0] + idx * metric.range[2] + metric.range[2]).toFixed(
            2,
          ),
        );
        if (start <= value && value <= end) {
          return `${start}-${end}`;
        }
      }
    }
  }

  return value;
}

export function compareMetricAggregatedValues(
  a: { key: string | number; value: number },
  b: { key: string | number; value: number },
  metric: Metric,
): number {
  if (metric.aggregator && metric.aggregator === 'average') {
    if (typeof a.key === 'number' && typeof b.key === 'number') {
      return a.key - b.key;
    } else if (typeof a.key === 'string' && typeof b.key === 'string') {
      return parseFloat(a.key) - parseFloat(b.key);
    } else {
      return 0;
    }
  } else if (metric.aggregator && metric.aggregator === 'majority') {
    if (typeof a.key === 'string' && typeof b.key === 'string') {
      if (a.key === 'Indeterminate' || b.key === 'Indeterminate') {
        if (b.key === 'Indeterminate' && a.key != 'Indeterminate') {
          return 1;
        } else if (a.key === 'Indeterminate' && b.key != 'Indeterminate') {
          return -1;
        }
        return 0;
      }
      const aValue = metric.values?.find((entry) => entry.value == a.key);
      const bValue = metric.values?.find((entry) => entry.value == b.key);
      if (aValue && bValue) {
        // Do direct value comparison in numerical values exists
        if (
          (aValue.numericValue != undefined || aValue.numericValue != null) &&
          isNumber(aValue.numericValue) &&
          (bValue.numericValue != undefined || bValue.numericValue != null) &&
          isNumber(bValue.numericValue)
        ) {
          return aValue.numericValue - bValue.numericValue;
        }
        // For numerical values, do direct value comparison
        else if (typeof a.value === 'number' && typeof b.value === 'number') {
          return a.value - b.value;
        } else {
          return a.key.localeCompare(b.key);
        }
      }

      // Do string comparison with non-ASCII support
      return a.key.localeCompare(b.key);
    }

    // Default: Preserve same order
    return 0;
  }

  return a.key > b.key ? 1 : -1;
}