/** * * Copyright 2023-2025 InspectorRAGet Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ 'use client'; import { isEmpty } from 'lodash'; import cx from 'classnames'; import { useEffect, useMemo, useState, memo } from 'react'; import { Tabs, TabList, Tab, TabPanels, TabPanel } from '@carbon/react'; import { DataVis_4, StringText, ChartRadar, UserData, ChartMultitype, Compare, HeatMap_03, } from '@carbon/icons-react'; import { Data, TaskEvaluation } from '@/src/types'; import { calculateAggregateValue } from '@/src/utilities/metrics'; import { useDataStore } from '@/src/store'; import { useBackButton } from '@/src/hooks/useBackButton'; import Task from '@/src/views/task/Task'; import ExampleTile from '@/src/components/example-tile/ExampleTile'; import DisabledTab from '@/src/components/disabled/DisabledTab'; import DataCharacteristics from '@/src/views/data-characteristics/DataCharacteristics'; import PredictionsTable from '@/src/views/predictions-table/PredictionsTable'; import PerformanceOverview from '@/src/views/performance-overview/PerformanceOverview'; import AnnotatorBehavior from '@/src/views/annotator-behavior/AnnotatorBehavior'; import ModelBehavior from '@/src/views/model-behavior/ModelBehavior'; import ModelComparator from '@/src/views/model-comparator/ModelComparator'; import MetricBehavior from '@/src/views/metric-behavior/MetricBehavior'; import classes from './Example.module.scss'; // =================================================================================== // MAIN FUNCTION // =================================================================================== export default memo(function Example({ data }: { data: Data }) { // Step 1: Initialize state and necessary variables const [seletedTaskId, setSelectedTaskId] = useState( undefined, ); // Step 2: Run effects // Step 2.a: Set example data in data store const { set: setData } = useDataStore(); useEffect(() => { setData(data); }, [data]); // Step 2.b: Restrict to `numerical` and `categorical` metrics const [eligibleMetricsMap, eligibleMetrics] = useMemo(() => { const metricMap = Object.fromEntries( data.metrics .filter( (metric) => metric.type === 'numerical' || metric.type === 'categorical', ) .map((metric) => [metric.name, metric]), ); return [metricMap, Object.values(metricMap)]; }, [data.metrics]); // Step 2.c: Build evaluations for each metric const [evaluationsPerMetric, filters] = useMemo(() => { // Step 2.c.i: Initialize model names, tasks map, queries map (used to visualize task-table), applicable filters and evaluations per metric map const modelNames: { [key: string]: string } = Object.fromEntries( data.models.map((model) => [model.modelId, model.name]), ); const tasks = new Map(); const queries = new Map(); const applicableFilters: { [key: string]: Set } = data.filters ? Object.fromEntries( data.filters.map((filter) => [filter, new Set()]), ) : {}; const evaluationsPerMetricMap: { [key: string]: TaskEvaluation[] } = {}; // Step 2.c.ii: Iterate over each task to populate tasks map, queries map and applicable filters data.tasks.forEach((task) => { // Step 1.c.ii.*: Add to tasks map tasks.set(task.taskId, task); // Step 1.c.ii.**: Add to queries map if (typeof task.input === 'string') { queries.set(task.taskId, task.input); } else if ( Array.isArray(task.input) && task.input[task.input.length - 1].hasOwnProperty('text') && task.input[task.input.length - 1]['text'] ) { queries.set(task.taskId, task.input[task.input.length - 1]['text']); } else if ( Array.isArray(task.input) && task.input[task.input.length - 1].hasOwnProperty('role') && (task.input[task.input.length - 1]['role'] === 'system' || task.input[task.input.length - 1]['role'] === 'developer' || task.input[task.input.length - 1]['role'] === 'user' || task.input[task.input.length - 1]['role'] === 'assistant') && task.input[task.input.length - 1].hasOwnProperty('content') && task.input[task.input.length - 1]['content'] ) { queries.set(task.taskId, task.input[task.input.length - 1]['content']); } else { queries.set(task.taskId, task.taskId); } // Step 1.c.ii.***: Add filters with value, if requested if (data.filters) { for (const filter of data.filters) { if (task.hasOwnProperty(filter)) { const value = task[filter]; if (typeof value === 'string') { applicableFilters[filter].add(value); } else if (Array.isArray(value)) { value.forEach((v) => { if (typeof v === 'string') { applicableFilters[filter].add(v); } }); } } } } }); // Step 2.c.iii: Remove filters with single value for (const key in applicableFilters) { if (applicableFilters[key].size < 2) { delete applicableFilters[key]; } } // Step 2.c.iv: Iterate over each evaluation data.evaluations?.forEach((evaluation) => { // Step 1.c.iv.*: Fetch relevant task const task = tasks.get(evaluation.taskId); // Step 1.c.iv.**: Indentify values for applicable filters for the current task const filters = {}; if (task && !isEmpty(applicableFilters)) { for (const filter in applicableFilters) { if (task.hasOwnProperty(filter)) { filters[filter] = task[filter]; } } } // Step 1.c.iv.***: Iterate over each annotation for (const metricName in evaluation.annotations) { // Process only eligible metrics if (!eligibleMetricsMap.hasOwnProperty(metricName)) { continue; } // Compute agreement statistics const aggregateStatistic = calculateAggregateValue( eligibleMetricsMap[metricName], evaluation.annotations[metricName], ); // Create metric wise evaluations object if (evaluationsPerMetricMap.hasOwnProperty(metricName)) { evaluationsPerMetricMap[metricName] = [ ...evaluationsPerMetricMap[metricName], { taskId: evaluation.taskId, modelId: evaluation.modelId, modelResponse: evaluation.modelResponse, annotations: evaluation.annotations, [metricName]: evaluation.annotations[metricName], ...(aggregateStatistic && { [`${metricName}_agg`]: aggregateStatistic, }), ...(queries.has(evaluation.taskId) && { query: queries.get(evaluation.taskId), }), ...filters, }, ]; } else { evaluationsPerMetricMap[metricName] = [ { taskId: evaluation.taskId, modelId: evaluation.modelId, modelResponse: evaluation.modelResponse, annotations: evaluation.annotations, [metricName]: evaluation.annotations[metricName], ...(aggregateStatistic && { [`${metricName}_agg`]: aggregateStatistic, }), ...(queries.has(evaluation.taskId) && { query: queries.get(evaluation.taskId), }), ...filters, }, ]; } } }); // Step 2.c.v: Sort evaluations based on model for (const evaluations of Object.values(evaluationsPerMetricMap)) { evaluations.sort((a, b) => { // Step 2.c.v.*: Compare model names, if available if ( modelNames.hasOwnProperty(a.modelId) && modelNames.hasOwnProperty(b.modelId) ) { return modelNames[a.modelId].localeCompare(modelNames[b.modelId]); } // Step 2.c.v.**: Compare model IDs (Fallback) return a.modelId.localeCompare(b.modelId); }); } // Step 2.c.vi: Return return [ evaluationsPerMetricMap, Object.fromEntries( Object.entries(applicableFilters).map(([filter, vals]) => [ filter, [...vals], ]), ), ]; }, [data.evaluations, data.tasks, data.models, eligibleMetricsMap]); const {} = useBackButton(); // Step 3: Return return (
{seletedTaskId && ( { setSelectedTaskId(undefined); }} /> )}
Data Characteristics Predictions Annotator Behavior Performance Overview Model Behavior Model Comparator Metric Behavior metric.author === 'human', )} filters={filters} > { setSelectedTaskId(taskId); }} > {data.models.length == 1 ? ( ) : ( { setSelectedTaskId(taskId); }} > )} {eligibleMetrics.length == 1 ? ( ) : ( { setSelectedTaskId(taskId); }} > )}
); });