Spaces:
Running
Running
/** | |
* | |
* Copyright 2023-2025 InspectorRAGet Team | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
**/ | |
'use client'; | |
import { isEmpty } from 'lodash'; | |
import cx from 'classnames'; | |
import { useEffect, useMemo, useState, memo } from 'react'; | |
import { Tabs, TabList, Tab, TabPanels, TabPanel } from '@carbon/react'; | |
import { | |
DataVis_4, | |
StringText, | |
ChartRadar, | |
UserData, | |
ChartMultitype, | |
Compare, | |
HeatMap_03, | |
} from '@carbon/icons-react'; | |
import { Data, TaskEvaluation } from '@/src/types'; | |
import { calculateAggregateValue } from '@/src/utilities/metrics'; | |
import { useDataStore } from '@/src/store'; | |
import { useBackButton } from '@/src/hooks/useBackButton'; | |
import Task from '@/src/views/task/Task'; | |
import ExampleTile from '@/src/components/example-tile/ExampleTile'; | |
import DisabledTab from '@/src/components/disabled/DisabledTab'; | |
import DataCharacteristics from '@/src/views/data-characteristics/DataCharacteristics'; | |
import PredictionsTable from '@/src/views/predictions-table/PredictionsTable'; | |
import PerformanceOverview from '@/src/views/performance-overview/PerformanceOverview'; | |
import AnnotatorBehavior from '@/src/views/annotator-behavior/AnnotatorBehavior'; | |
import ModelBehavior from '@/src/views/model-behavior/ModelBehavior'; | |
import ModelComparator from '@/src/views/model-comparator/ModelComparator'; | |
import MetricBehavior from '@/src/views/metric-behavior/MetricBehavior'; | |
import classes from './Example.module.scss'; | |
// =================================================================================== | |
// MAIN FUNCTION | |
// =================================================================================== | |
export default memo(function Example({ data }: { data: Data }) { | |
// Step 1: Initialize state and necessary variables | |
const [seletedTaskId, setSelectedTaskId] = useState<string | undefined>( | |
undefined, | |
); | |
// Step 2: Run effects | |
// Step 2.a: Set example data in data store | |
const { set: setData } = useDataStore(); | |
useEffect(() => { | |
setData(data); | |
}, [data]); | |
// Step 2.b: Restrict to `numerical` and `categorical` metrics | |
const [eligibleMetricsMap, eligibleMetrics] = useMemo(() => { | |
const metricMap = Object.fromEntries( | |
data.metrics | |
.filter( | |
(metric) => | |
metric.type === 'numerical' || metric.type === 'categorical', | |
) | |
.map((metric) => [metric.name, metric]), | |
); | |
return [metricMap, Object.values(metricMap)]; | |
}, [data.metrics]); | |
// Step 2.c: Build evaluations for each metric | |
const [evaluationsPerMetric, filters] = useMemo(() => { | |
// Step 2.c.i: Initialize model names, tasks map, queries map (used to visualize task-table), applicable filters and evaluations per metric map | |
const modelNames: { [key: string]: string } = Object.fromEntries( | |
data.models.map((model) => [model.modelId, model.name]), | |
); | |
const tasks = new Map<string, any>(); | |
const queries = new Map<string, string>(); | |
const applicableFilters: { [key: string]: Set<string> } = data.filters | |
? Object.fromEntries( | |
data.filters.map((filter) => [filter, new Set<string>()]), | |
) | |
: {}; | |
const evaluationsPerMetricMap: { [key: string]: TaskEvaluation[] } = {}; | |
// Step 2.c.ii: Iterate over each task to populate tasks map, queries map and applicable filters | |
data.tasks.forEach((task) => { | |
// Step 1.c.ii.*: Add to tasks map | |
tasks.set(task.taskId, task); | |
// Step 1.c.ii.**: Add to queries map | |
if (typeof task.input === 'string') { | |
queries.set(task.taskId, task.input); | |
} else if ( | |
Array.isArray(task.input) && | |
task.input[task.input.length - 1].hasOwnProperty('text') && | |
task.input[task.input.length - 1]['text'] | |
) { | |
queries.set(task.taskId, task.input[task.input.length - 1]['text']); | |
} else if ( | |
Array.isArray(task.input) && | |
task.input[task.input.length - 1].hasOwnProperty('role') && | |
(task.input[task.input.length - 1]['role'] === 'system' || | |
task.input[task.input.length - 1]['role'] === 'developer' || | |
task.input[task.input.length - 1]['role'] === 'user' || | |
task.input[task.input.length - 1]['role'] === 'assistant') && | |
task.input[task.input.length - 1].hasOwnProperty('content') && | |
task.input[task.input.length - 1]['content'] | |
) { | |
queries.set(task.taskId, task.input[task.input.length - 1]['content']); | |
} else { | |
queries.set(task.taskId, task.taskId); | |
} | |
// Step 1.c.ii.***: Add filters with value, if requested | |
if (data.filters) { | |
for (const filter of data.filters) { | |
if (task.hasOwnProperty(filter)) { | |
const value = task[filter]; | |
if (typeof value === 'string') { | |
applicableFilters[filter].add(value); | |
} else if (Array.isArray(value)) { | |
value.forEach((v) => { | |
if (typeof v === 'string') { | |
applicableFilters[filter].add(v); | |
} | |
}); | |
} | |
} | |
} | |
} | |
}); | |
// Step 2.c.iii: Remove filters with single value | |
for (const key in applicableFilters) { | |
if (applicableFilters[key].size < 2) { | |
delete applicableFilters[key]; | |
} | |
} | |
// Step 2.c.iv: Iterate over each evaluation | |
data.evaluations?.forEach((evaluation) => { | |
// Step 1.c.iv.*: Fetch relevant task | |
const task = tasks.get(evaluation.taskId); | |
// Step 1.c.iv.**: Indentify values for applicable filters for the current task | |
const filters = {}; | |
if (task && !isEmpty(applicableFilters)) { | |
for (const filter in applicableFilters) { | |
if (task.hasOwnProperty(filter)) { | |
filters[filter] = task[filter]; | |
} | |
} | |
} | |
// Step 1.c.iv.***: Iterate over each annotation | |
for (const metricName in evaluation.annotations) { | |
// Process only eligible metrics | |
if (!eligibleMetricsMap.hasOwnProperty(metricName)) { | |
continue; | |
} | |
// Compute agreement statistics | |
const aggregateStatistic = calculateAggregateValue( | |
eligibleMetricsMap[metricName], | |
evaluation.annotations[metricName], | |
); | |
// Create metric wise evaluations object | |
if (evaluationsPerMetricMap.hasOwnProperty(metricName)) { | |
evaluationsPerMetricMap[metricName] = [ | |
...evaluationsPerMetricMap[metricName], | |
{ | |
taskId: evaluation.taskId, | |
modelId: evaluation.modelId, | |
modelResponse: evaluation.modelResponse, | |
annotations: evaluation.annotations, | |
[metricName]: evaluation.annotations[metricName], | |
...(aggregateStatistic && { | |
[`${metricName}_agg`]: aggregateStatistic, | |
}), | |
...(queries.has(evaluation.taskId) && { | |
query: queries.get(evaluation.taskId), | |
}), | |
...filters, | |
}, | |
]; | |
} else { | |
evaluationsPerMetricMap[metricName] = [ | |
{ | |
taskId: evaluation.taskId, | |
modelId: evaluation.modelId, | |
modelResponse: evaluation.modelResponse, | |
annotations: evaluation.annotations, | |
[metricName]: evaluation.annotations[metricName], | |
...(aggregateStatistic && { | |
[`${metricName}_agg`]: aggregateStatistic, | |
}), | |
...(queries.has(evaluation.taskId) && { | |
query: queries.get(evaluation.taskId), | |
}), | |
...filters, | |
}, | |
]; | |
} | |
} | |
}); | |
// Step 2.c.v: Sort evaluations based on model | |
for (const evaluations of Object.values(evaluationsPerMetricMap)) { | |
evaluations.sort((a, b) => { | |
// Step 2.c.v.*: Compare model names, if available | |
if ( | |
modelNames.hasOwnProperty(a.modelId) && | |
modelNames.hasOwnProperty(b.modelId) | |
) { | |
return modelNames[a.modelId].localeCompare(modelNames[b.modelId]); | |
} | |
// Step 2.c.v.**: Compare model IDs (Fallback) | |
return a.modelId.localeCompare(b.modelId); | |
}); | |
} | |
// Step 2.c.vi: Return | |
return [ | |
evaluationsPerMetricMap, | |
Object.fromEntries( | |
Object.entries(applicableFilters).map(([filter, vals]) => [ | |
filter, | |
[...vals], | |
]), | |
), | |
]; | |
}, [data.evaluations, data.tasks, data.models, eligibleMetricsMap]); | |
const {} = useBackButton(); | |
// Step 3: Return | |
return ( | |
<div className={classes.page}> | |
<div className={cx(classes.taskOverlay, seletedTaskId && classes.active)}> | |
{seletedTaskId && ( | |
<Task | |
taskId={seletedTaskId} | |
onClose={() => { | |
setSelectedTaskId(undefined); | |
}} | |
/> | |
)} | |
</div> | |
<div className={classes.headerContainer}> | |
<ExampleTile data={data} disableNavigation={true} expanded={false} /> | |
</div> | |
<div className={classes.analysisContainer}> | |
<Tabs> | |
<TabList | |
className={classes.tabList} | |
aria-label="Metrics tab" | |
contained | |
fullWidth | |
> | |
<Tab key={'data-characteristics-tab'} renderIcon={DataVis_4}> | |
Data Characteristics | |
</Tab> | |
<Tab key={'predictions-tab'} renderIcon={StringText}> | |
Predictions | |
</Tab> | |
<Tab key={'annotator-behavior-tab'} renderIcon={UserData}> | |
Annotator Behavior | |
</Tab> | |
<Tab key={'overview-tab'} renderIcon={ChartRadar}> | |
Performance Overview | |
</Tab> | |
<Tab key={'model-behavior-tab'} renderIcon={ChartMultitype}> | |
Model Behavior | |
</Tab> | |
<Tab key={'model-comparator-tab'} renderIcon={Compare}> | |
Model Comparator | |
</Tab> | |
<Tab key={'metric-behavior-tab'} renderIcon={HeatMap_03}> | |
Metric Behavior | |
</Tab> | |
</TabList> | |
<TabPanels> | |
<TabPanel key={'data-characteristics-panel'}> | |
<DataCharacteristics | |
tasks={data.tasks} | |
filters={filters} | |
></DataCharacteristics> | |
</TabPanel> | |
<TabPanel key={'predictions-panel'}> | |
<PredictionsTable | |
tasks={data.tasks} | |
models={data.models} | |
evaluations={data.evaluations} | |
filters={filters} | |
></PredictionsTable> | |
</TabPanel> | |
<TabPanel key={'annotator-behavior-panel'}> | |
<AnnotatorBehavior | |
evaluationsPerMetric={evaluationsPerMetric} | |
models={data.models} | |
metrics={data.metrics.filter( | |
(metric) => metric.author === 'human', | |
)} | |
filters={filters} | |
></AnnotatorBehavior> | |
</TabPanel> | |
<TabPanel key={'performance-overview-panel'}> | |
<PerformanceOverview | |
evaluationsPerMetric={evaluationsPerMetric} | |
models={data.models} | |
metrics={eligibleMetrics} | |
filters={filters} | |
numTasks={data.numTasks} | |
></PerformanceOverview> | |
</TabPanel> | |
<TabPanel key={'model-behavior-panel'}> | |
<ModelBehavior | |
evaluationsPerMetric={evaluationsPerMetric} | |
models={data.models} | |
metrics={eligibleMetrics} | |
filters={filters} | |
onTaskSelection={(taskId) => { | |
setSelectedTaskId(taskId); | |
}} | |
></ModelBehavior> | |
</TabPanel> | |
<TabPanel key={'model-comparator-panel'}> | |
{data.models.length == 1 ? ( | |
<DisabledTab | |
message={'Nothing to see here in absence of multiple models.'} | |
/> | |
) : ( | |
<ModelComparator | |
evaluationsPerMetric={evaluationsPerMetric} | |
models={data.models} | |
metrics={eligibleMetrics} | |
filters={filters} | |
onTaskSelection={(taskId) => { | |
setSelectedTaskId(taskId); | |
}} | |
></ModelComparator> | |
)} | |
</TabPanel> | |
<TabPanel key={'conditional-view'}> | |
{eligibleMetrics.length == 1 ? ( | |
<DisabledTab message="Nothing to see here in absence of multiple metrics." /> | |
) : ( | |
<MetricBehavior | |
evaluationsPerMetric={evaluationsPerMetric} | |
models={data.models} | |
metrics={eligibleMetrics} | |
filters={filters} | |
onTaskSelection={(taskId) => { | |
setSelectedTaskId(taskId); | |
}} | |
></MetricBehavior> | |
)} | |
</TabPanel> | |
</TabPanels> | |
</Tabs> | |
</div> | |
</div> | |
); | |
}); | |