/** * * Copyright 2023-2025 InspectorRAGet Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ import { isEmpty, isNumber } from 'lodash'; import { hash } from '@/src/utilities/strings'; import { snakeCaseKeys } from '@/src/utilities/objects'; import { Data, MetricValue, RawData, TaskEvaluation, DisqualificationReason, DisqualifiedTasks, Task, RetrievedDocument, Notification, } from '@/src/types'; export const DataErrorKinds = { MISSING_METRIC: 'MISSING METRIC', MISSING_MODEL: 'MISSING MODEL', MISSING_VALUE: 'MISSING VALUE', }; function sortMetricValues(values: MetricValue[]) { values.sort((a, b) => { // For string values if (typeof a.value === 'string' && typeof b.value === 'string') { // Do direct value comparison in numerical values exists if ( (a.numericValue != undefined || a.numericValue != null) && isNumber(a.numericValue) && (b.numericValue != undefined || b.numericValue != null) && isNumber(b.numericValue) ) { return a.numericValue - b.numericValue; } // Do string comparison with non-ASCII support return a.value.localeCompare(b.value); } // For numerical values, do direct value comparison else if (typeof a.value === 'number' && typeof b.value === 'number') { return a.value - b.value; } // Default: Preserve same order return 0; }); } function disqualifyEvaluation( reasons: DisqualificationReason[], evaluation: TaskEvaluation, disqualifiedTasks: DisqualifiedTasks, evaluationsPerTask: { [key: string]: TaskEvaluation[] }, ) { // Step 1: Move from evaluations per task list to disqualified tasks list, if required if (evaluationsPerTask.hasOwnProperty(evaluation.taskId)) { // Step 1.a: Copy task to remove const qualifiedEvaluations = evaluationsPerTask[evaluation.taskId]; // Step 1.b: Remove task from qualified tasks list delete evaluationsPerTask[evaluation.taskId]; // Step 1.c: Add to disqualified tasks list disqualifiedTasks[evaluation.taskId] = { reasons: reasons, evaluations: [...qualifiedEvaluations, evaluation], }; } else { // Step 1: Add to disqualified tasks list if (disqualifiedTasks.hasOwnProperty(evaluation.taskId)) { disqualifiedTasks[evaluation.taskId].reasons = [ ...disqualifiedTasks[evaluation.taskId].reasons, ...reasons, ]; disqualifiedTasks[evaluation.taskId].evaluations.push(evaluation); } else { disqualifiedTasks[evaluation.taskId] = { reasons: reasons, evaluations: [evaluation], }; } } } export function processData( data: RawData, ): [Data, DisqualifiedTasks, Notification[]] { // Step 0: Define notifications const notifications: Notification[] = []; // Step 1: Identify all plottable metrics and required model IDs const plottableMetrics = data.metrics.filter( (metric) => metric.type === 'numerical' || metric.type === 'categorical', ); const requiredModelIDs = new Set(data.models.map((model) => model.modelId)); /** * Step 2: Disqualify tasks based on following guidelines * 1. Only preserve evaluations for models specified in the models sections * 2. If task does not have evaluations for all the models from models section * 3. If task does not have every metric from metrics section for all the models from models section */ const disqualifiedTasks: DisqualifiedTasks = {}; const evaluationsPerTask: { [key: string]: TaskEvaluation[] } = {}; // Step 2.a: Iterate over every evaluation entry data.evaluations.forEach((evaluation) => { // Step 2.a.i: Verfify annotations for all plottable metrics exist const disqualificationReasons: DisqualificationReason[] = []; plottableMetrics.forEach((metric) => { if (!evaluation.annotations.hasOwnProperty(metric.name)) { disqualificationReasons.push({ kind: DataErrorKinds.MISSING_METRIC, data: metric.name, }); } else { if (isEmpty(evaluation.annotations[metric.name])) { disqualificationReasons.push({ kind: DataErrorKinds.MISSING_VALUE, data: metric.name, }); } else { for (const evaluator of Object.keys( evaluation.annotations[metric.name], )) { if ( !evaluation.annotations[metric.name][evaluator].hasOwnProperty( 'value', ) ) { disqualificationReasons.push({ kind: DataErrorKinds.MISSING_VALUE, data: metric.name, }); } } } } }); // Step 2.a.ii: If annotations for all plottable metrics exist if (isEmpty(disqualificationReasons)) { // Step 2.a.ii.*: Only add if evaluation belongs to one of the models specified in the models section if (requiredModelIDs.has(evaluation.modelId)) { if (evaluationsPerTask.hasOwnProperty(evaluation.taskId)) { evaluationsPerTask[evaluation.taskId].push(evaluation); } else { if (disqualifiedTasks.hasOwnProperty(evaluation.taskId)) { disqualifiedTasks[evaluation.taskId].evaluations.push(evaluation); } else { evaluationsPerTask[evaluation.taskId] = [evaluation]; } } } } else { // Step 2.a.ii: Disqualify evaluation and associated task disqualifyEvaluation( disqualificationReasons, evaluation, disqualifiedTasks, evaluationsPerTask, ); } }); // Step 3.: Verify evaluations exist for every model from the models section // Step 3.a: Check first in all disqualified tasks Object.keys(disqualifiedTasks).forEach((taskId) => { // Step 3.a.i: If more or less number of evaluations exists if (disqualifiedTasks[taskId].evaluations.length !== data.models.length) { const availableModelIDs = new Set( disqualifiedTasks[taskId].evaluations.map( (evaluation) => evaluation.modelId, ), ); // Step 3.a.i.*: Missing model IDs const missingModelIDs = [...requiredModelIDs].filter( (modelId) => !availableModelIDs.has(modelId), ); // Step 3.a.i.**: Update disqualified task's reasons if (!isEmpty(missingModelIDs)) { disqualifiedTasks[taskId].reasons = [ ...disqualifiedTasks[taskId].reasons, ...missingModelIDs.map((modelId) => { return { kind: DataErrorKinds.MISSING_MODEL, data: modelId }; }), ]; } } }); // Step 3.b: Check in qualified tasks Object.keys(evaluationsPerTask).forEach((taskId) => { // Step 3.b.i: If more or less number of evaluations exists if (data.models.length !== evaluationsPerTask[taskId].length) { const availableModelIDs = new Set( evaluationsPerTask[taskId].map((evaluation) => evaluation.modelId), ); // Step 3.b.i.*: Missing model IDs const missingModelIDs = [...requiredModelIDs].filter( (modelId) => !availableModelIDs.has(modelId), ); // Step 3.b.i.**: Move task from qualified task list to disqualified task list if (!isEmpty(missingModelIDs)) { const disqualifiedEvaluations = evaluationsPerTask[taskId]; disqualifiedTasks[taskId] = { reasons: missingModelIDs.map((modelId) => { return { kind: DataErrorKinds.MISSING_MODEL, data: modelId }; }), evaluations: disqualifiedEvaluations, }; delete evaluationsPerTask[taskId]; } } }); // Step 5: Flatten qualified tasks into qualified evaluations list // Step 5.a: Retain unique qaulified task ID, annotator and qualified evaluation const uniqueQuailifiedTaskIds = new Set(); const annotators = new Set(); const qualifiedEvaluations: TaskEvaluation[] = []; // Step 5.b: Iterate over each qualified task Object.keys(evaluationsPerTask).forEach((taskId) => { uniqueQuailifiedTaskIds.add(taskId); evaluationsPerTask[taskId].forEach((evaluation) => { Object.keys(evaluation.annotations).forEach((metric) => { const entry = evaluation.annotations[metric]; Object.keys(entry).forEach((annotator) => annotators.add(annotator)); }); qualifiedEvaluations.push(evaluation); }); }); // Step 6: Create a list of qualified tasks const tasksMap = new Map( data.tasks.map((task) => { return [task.taskId, task]; }), ); const qualifiedTasks: Task[] = []; Array.from(uniqueQuailifiedTaskIds).forEach((taskId) => { const task = tasksMap.get(taskId); if (task) { qualifiedTasks.push(task); } }); return [ { name: data.name || 'Example', exampleId: hash(JSON.stringify(data)), models: data.models, metrics: data.metrics.map((metric) => { // Step 1: Sort metric values, if present if (metric.values) { sortMetricValues(metric.values); } // Step 2: Return with additional attributes return { ...metric, ...(metric.type === 'categorical' && metric.values && { minValue: metric.values[0], maxValue: metric.values[metric.values.length - 1], }), ...(metric.type === 'numerical' && metric.range && metric.range.length >= 2 && { minValue: metric.range[0], maxValue: metric.range[1], }), }; }), ...(data.filters && { filters: data.filters }), tasks: qualifiedTasks.map((task) => { return { ...task, taskType: task.taskType, }; }), documents: data.documents, evaluations: qualifiedEvaluations, annotators: Array.from(annotators), numTasks: qualifiedTasks.length, }, disqualifiedTasks, notifications, ]; } export function exportData( data: Data | undefined, tasks: Task[] | undefined, ): boolean { // Step 0: Verify if data is provided if (data) { let dataToExport: RawData = { name: data.name, ...(data.exampleId && { exampleId: data.exampleId }), ...(data.filters && { filters: data.filters }), models: data.models, metrics: data.metrics, ...(data.documents && { documents: data.documents, }), tasks: data.tasks, evaluations: data.evaluations.map((evaluation) => { return { taskId: evaluation.taskId, modelId: evaluation.modelId, modelResponse: evaluation.modelResponse, annotations: evaluation.annotations, ...(evaluation.contexts && { contexts: evaluation.contexts }), }; }), }; // Step 1: If tasks are defined if (tasks) { // Step 0: update flagged property tasks.forEach((task) => { if (!task.hasOwnProperty('flagged')) { task.flagged = false; } }); // Step 1.a: Create reduced analytics data, if not all tasks are specified if (data.tasks.length !== tasks.length) { // Step 1.a.i: Build documents map const documentsMap: Map = new Map( data.documents?.map((document) => [document.documentId, document]), ); // Step 1.a.ii: Necessary variables const relevantDocuments: Set = new Set(); const relevantTaskIds: Set = new Set(); // Step 1.a.iii: Iterate over tasks to identify referened documents/relevant context tasks.forEach((task) => { // Add task ID to relevant task ID set relevantTaskIds.add(task.taskId); if (documentsMap.size !== 0) { task.contexts?.forEach((context) => { // Add referenced document to relevant documents list if (typeof context !== 'string') { const referenceDocument = documentsMap.get(context.documentId); if (referenceDocument) { relevantDocuments.add(referenceDocument); } } }); } }); // Step 1.a.iv: Create an object to be exported dataToExport = { name: data.name, ...(data.exampleId && { exampleId: data.exampleId }), ...(data.filters && { filters: data.filters }), models: data.models, metrics: data.metrics, ...(relevantDocuments.size !== 0 && { documents: Array.from(relevantDocuments), }), tasks: tasks, evaluations: data.evaluations .filter((evaluation) => relevantTaskIds.has(evaluation.taskId)) .map((evaluation) => { return { taskId: evaluation.taskId, modelId: evaluation.modelId, modelResponse: evaluation.modelResponse, annotations: evaluation.annotations, ...(evaluation.contexts && { contexts: evaluation.contexts }), }; }), }; } else { // Step 1.b: Create an object to be exported by copying over tasks information dataToExport = { name: data.name, ...(data.exampleId && { exampleId: data.exampleId }), ...(data.filters && { filters: data.filters }), models: data.models, metrics: data.metrics, ...(data.documents && { documents: data.documents, }), tasks: tasks, evaluations: data.evaluations.map((evaluation) => { return { taskId: evaluation.taskId, modelId: evaluation.modelId, modelResponse: evaluation.modelResponse, annotations: evaluation.annotations, ...(evaluation.contexts && { contexts: evaluation.contexts }), }; }), }; } } // Step 2: Create tag var element = document.createElement('a'); // Step 2.a: Set attributes element.setAttribute( 'href', 'data:application/json;charset=utf-8, ' + encodeURIComponent(JSON.stringify(snakeCaseKeys(dataToExport))), ); element.setAttribute('download', 'analytics.json'); // Step 2.b: Add to DOM tree and click it document.body.appendChild(element); element.click(); // Step 2.c : Cleanup document.body.removeChild(element); // Step 3: Retun "true" indicating success return true; } // Step 3: Retun "false" indicating failure return false; }