Spaces:
Running
Running
/** | |
* | |
* Copyright 2023-2025 InspectorRAGet Team | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
**/ | |
'use client'; | |
import { countBy, isEmpty } from 'lodash'; | |
import cx from 'classnames'; | |
import { useState, useMemo, useEffect, useRef } from 'react'; | |
import { Tile, Button, Slider } from '@carbon/react'; | |
import { WarningAlt } from '@carbon/icons-react'; | |
import { ScatterChart } from '@carbon/charts-react'; | |
import { useTheme } from '@/src/theme'; | |
import { Model, Metric, TaskEvaluation } from '@/src/types'; | |
import { | |
castToNumber, | |
AgreementLevels, | |
extractMetricDisplayName, | |
} from '@/src/utilities/metrics'; | |
import { calculateFisherRandomization } from '@/src/utilities/significance'; | |
import { areObjectsIntersecting } from '@/src/utilities/objects'; | |
import { hash } from '@/src/utilities/strings'; | |
import Filters from '@/src/components/filters/Filters'; | |
import TasksTable from '@/src/views/tasks-table/TasksTable'; | |
import ModelSelector from '@/src/components/selectors/ModelSelector'; | |
import MetricSelector from '@/src/components/selectors/MetricSelector'; | |
import { getModelColorPalette } from '@/src/utilities/colors'; | |
import '@carbon/charts-react/styles.css'; | |
import classes from './ModelComparator.module.scss'; | |
// =================================================================================== | |
// TYPES | |
// =================================================================================== | |
type StatisticalInformation = { | |
p: number; | |
distributionA: number[]; | |
meanA: number; | |
distributionB: number[]; | |
meanB: number; | |
taskIds?: string[]; | |
}; | |
interface Props { | |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] }; | |
models: Model[]; | |
metrics: Metric[]; | |
filters: { [key: string]: string[] }; | |
onTaskSelection: Function; | |
} | |
// =================================================================================== | |
// COMPUTE FUNCTIONS | |
// =================================================================================== | |
/** | |
* Build an array containing evaluations only for selected models for each task. | |
* | |
* Eligbility criteria: | |
* | |
* 1. Must have evaluations for both selected models | |
* | |
* 2. Each evaluation must have an agreement value for selected metric | |
* | |
* | |
* @param evaluations evaluations for all task | |
* @param modelA selected model | |
* @param modelB selected model | |
* @param metric selected metric | |
* @returns | |
*/ | |
function extractEvaluationsPerTask( | |
evaluations: TaskEvaluation[], | |
modelA: Model, | |
modelB: Model, | |
metric: string, | |
selectedFilters: { [key: string]: string[] }, | |
selectedMetricRange?: number[], | |
) { | |
// Step 1: Initiaze necessary variable | |
const modelEvaluationsPerTask: { [key: string]: TaskEvaluation[] } = {}; | |
// Step 2: Add to model evaluations for a task, if evaluation meets eligbility criteria | |
evaluations.forEach((evaluation) => { | |
if ( | |
(evaluation.modelId === modelA.modelId || | |
evaluation.modelId === modelB.modelId) && | |
evaluation[`${metric}_agg`].level !== AgreementLevels.NO_AGREEMENT && | |
(!isEmpty(selectedFilters) | |
? areObjectsIntersecting(selectedFilters, evaluation) | |
: true) | |
) { | |
const modelEvaluationsForTask = | |
modelEvaluationsPerTask[evaluation.taskId]; | |
if (modelEvaluationsForTask) { | |
modelEvaluationsForTask.push(evaluation); | |
} else { | |
modelEvaluationsPerTask[evaluation.taskId] = [evaluation]; | |
} | |
} | |
}); | |
// Step 3: Retain only those task which has evaluations for both models | |
// and one or more models have aggregate value in the selected range | |
return Object.values(modelEvaluationsPerTask).filter( | |
(entry) => | |
entry.length == 2 && | |
(selectedMetricRange | |
? (entry[0][`${metric}_agg`].value >= selectedMetricRange[0] && | |
entry[0][`${metric}_agg`].value <= selectedMetricRange[1]) || | |
(entry[1][`${metric}_agg`].value >= selectedMetricRange[0] && | |
entry[1][`${metric}_agg`].value <= selectedMetricRange[1]) | |
: true), | |
); | |
} | |
/** | |
* Run statistical significance test based on Fisher randomization method. | |
* @param evaluationsPerMetric evaluations per metric | |
* @param metrics metrics | |
* @param modelA selected model | |
* @param modelB selected model | |
* @param selectedMetric If `undefined`, run for all metrics in `evaluationsPerMetric` object | |
* @returns | |
*/ | |
function runStatisticalSignificanceTest( | |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] }, | |
metrics: Metric[], | |
modelA: Model, | |
modelB: Model, | |
selectedMetric: Metric | undefined, | |
selectedFilters: { [key: string]: string[] }, | |
selectedMetricRange?: number[], | |
) { | |
// Step 1: Initialize necessary variables | |
const evaluationsPerMetricPerTask: { [key: string]: TaskEvaluation[][] } = {}; | |
// Step 2: Retain evaluations for tasks where both models have agreement value | |
if (selectedMetric) { | |
const evaluationsPerTask = extractEvaluationsPerTask( | |
evaluationsPerMetric[selectedMetric.name], | |
modelA, | |
modelB, | |
selectedMetric.name, | |
selectedFilters, | |
selectedMetricRange, | |
); | |
if (evaluationsPerTask.length !== 0) { | |
evaluationsPerMetricPerTask[selectedMetric.name] = evaluationsPerTask; | |
} | |
} else { | |
Object.keys(evaluationsPerMetric).forEach((metric) => { | |
const evaluationsPerTask = extractEvaluationsPerTask( | |
evaluationsPerMetric[metric], | |
modelA, | |
modelB, | |
metric, | |
selectedFilters, | |
selectedMetricRange, | |
); | |
if (evaluationsPerTask.length !== 0) { | |
evaluationsPerMetricPerTask[metric] = evaluationsPerTask; | |
} | |
}); | |
} | |
// Step 3: Compute model value distribution for every metric | |
const distributionA: { [key: string]: number[] } = {}; | |
const distributionB: { [key: string]: number[] } = {}; | |
const taskIds: { [key: string]: string[] } = {}; | |
Object.keys(evaluationsPerMetricPerTask).forEach((metric) => { | |
const metricValues = metrics.find((entry) => entry.name === metric)?.values; | |
taskIds[metric] = evaluationsPerMetricPerTask[metric].map( | |
(entry) => entry[0].taskId, | |
); | |
distributionA[metric] = evaluationsPerMetricPerTask[metric].map((entry) => | |
castToNumber( | |
entry[0].modelId === modelA.modelId | |
? entry[0][`${metric}_agg`].value | |
: entry[1][`${metric}_agg`].value, | |
metricValues, | |
), | |
); | |
distributionB[metric] = evaluationsPerMetricPerTask[metric].map((entry) => | |
castToNumber( | |
entry[1].modelId === modelB.modelId | |
? entry[1][`${metric}_agg`].value | |
: entry[0][`${metric}_agg`].value, | |
metricValues, | |
), | |
); | |
}); | |
// Step 3: Compute p value and means for every metric by comparing distributions | |
const information: { [key: string]: StatisticalInformation } = {}; | |
Object.keys(evaluationsPerMetricPerTask).forEach((metric) => { | |
const [p, meanA, meanB] = calculateFisherRandomization( | |
distributionA[metric], | |
distributionB[metric], | |
); | |
information[metric] = { | |
p: p, | |
distributionA: distributionA[metric], | |
meanA: meanA, | |
distributionB: distributionB[metric], | |
meanB: meanB, | |
taskIds: taskIds[metric], | |
}; | |
}); | |
return information; | |
} | |
// =================================================================================== | |
// RENDER FUNCTIONS | |
// =================================================================================== | |
function prepareScatterPlotData( | |
modelA: string, | |
distributionA: number[], | |
modelB: string, | |
distributionB: number[], | |
taskIds?: string[], | |
) { | |
if (distributionA.length !== distributionB.length) { | |
return []; | |
} | |
// Step 2: Collate model wise predictions per task | |
const distributions: { values: number[]; taskId: string }[] = []; | |
distributionA.forEach((valueA, index) => { | |
distributions.push({ | |
taskId: taskIds ? taskIds[index] : `${index}`, | |
values: [valueA, distributionB[index]], | |
}); | |
}); | |
// Step 3: Primary sort based on model A's value | |
distributions.sort((a, b) => a.values[0] - b.values[0]); | |
// Step 4: Scondary sort based on Model B's value | |
distributions.sort((a, b) => a.values[1] - b.values[1]); | |
// Step 5: Prepare chart data | |
const chartData: { [key: string]: string | number }[] = []; | |
distributions.forEach((entry, idx) => { | |
// Model A record | |
chartData.push({ | |
group: modelA, | |
key: idx, | |
value: entry.values[0], | |
...(taskIds && { taskId: entry.taskId }), | |
}); | |
// Model B record | |
chartData.push({ | |
group: modelB, | |
key: idx, | |
value: entry.values[1], | |
...(taskIds && { taskId: entry.taskId }), | |
}); | |
}); | |
return chartData; | |
} | |
function renderResult( | |
statisticalInformationPerMetric: { [key: string]: StatisticalInformation }, | |
metric: Metric, | |
modelA: Model, | |
modelB: Model, | |
numEvaluations: number, | |
modelColors: { [key: string]: string }, | |
modelOrder: string[], | |
theme?: string, | |
) { | |
if (statisticalInformationPerMetric.hasOwnProperty(metric.name)) { | |
return ( | |
<div | |
key={'statisticalInformation-metric-' + metric.name} | |
className={classes.performanceInformation} | |
> | |
<h5> | |
<strong>{extractMetricDisplayName(metric)}</strong> | |
</h5> | |
<Tile className={classes.tile}> | |
<div className={classes.tileContent}> | |
<span className={classes.tileContentInformation}>p-value</span> | |
<span | |
className={classes.tileContentValue} | |
suppressHydrationWarning={true} | |
> | |
{statisticalInformationPerMetric[metric.name]['p'].toFixed(4)} | |
</span> | |
<span | |
className={classes.tileContentDecision} | |
suppressHydrationWarning={true} | |
> | |
{statisticalInformationPerMetric[metric.name]['p'] <= 0.05 | |
? 'Significant' | |
: 'Not significant'} | |
</span> | |
</div> | |
</Tile> | |
<ScatterChart | |
data={prepareScatterPlotData( | |
modelA.name, | |
statisticalInformationPerMetric[metric.name].distributionA, | |
modelB.name, | |
statisticalInformationPerMetric[metric.name].distributionB, | |
statisticalInformationPerMetric[metric.name].taskIds, | |
)} | |
options={{ | |
axes: { | |
left: { | |
mapsTo: 'value', | |
...(metric.type === 'numerical' && | |
typeof metric.minValue === 'number' && | |
typeof metric.maxValue === 'number' && { | |
domain: [metric.minValue, metric.maxValue], | |
}), | |
...(metric.type === 'categorical' && | |
typeof metric.minValue !== 'number' && | |
typeof metric.maxValue !== 'number' && { | |
domain: [ | |
castToNumber(metric.minValue?.value || 0, metric.values), | |
castToNumber(metric.maxValue?.value || 4, metric.values), | |
], | |
}), | |
title: extractMetricDisplayName(metric), | |
}, | |
bottom: { | |
mapsTo: 'key', | |
ticks: { | |
values: [], | |
}, | |
title: `Tasks (${ | |
statisticalInformationPerMetric[metric.name].distributionA | |
.length | |
}/${numEvaluations})`, | |
}, | |
}, | |
width: '500px', | |
height: '500px', | |
toolbar: { | |
enabled: false, | |
}, | |
color: { | |
scale: modelColors, | |
}, | |
legend: { | |
order: modelOrder, | |
}, | |
theme: theme, | |
}} | |
></ScatterChart> | |
</div> | |
); | |
} else { | |
return null; | |
} | |
} | |
// =================================================================================== | |
// MAIN FUNCTION | |
// =================================================================================== | |
export default function ModelComparator({ | |
evaluationsPerMetric, | |
models, | |
metrics, | |
filters, | |
onTaskSelection, | |
}: Props) { | |
// Step 1: Initialize state and necessary variables | |
const [WindowWidth, setWindowWidth] = useState<number>( | |
global?.window && window.innerWidth, | |
); | |
const [modelA, setModelA] = useState<Model>(models[0]); | |
const [modelB, setModelB] = useState<Model>(models[1]); | |
const [selectedMetric, setSelectedMetric] = useState<Metric | undefined>( | |
undefined, | |
); | |
const [selectedFilters, setSelectedFilters] = useState<{ | |
[key: string]: string[]; | |
}>({}); | |
const [statisticalInformationPerMetric, setStatisticalInformationPerMetric] = | |
useState<{ [key: string]: StatisticalInformation } | undefined>(undefined); | |
const [modelColors, modelOrder] = getModelColorPalette(models); | |
const [selectedMetricRange, setSelectedMetricRange] = useState<number[]>(); | |
const chartRef = useRef(null); | |
// Step 2: Run effects | |
// Step 2.a: Window resizing | |
useEffect(() => { | |
const handleWindowResize = () => { | |
setWindowWidth(window.innerWidth); | |
}; | |
// Step: Add event listener | |
window.addEventListener('resize', handleWindowResize); | |
// Step: Cleanup to remove event listener | |
return () => { | |
window.removeEventListener('resize', handleWindowResize); | |
}; | |
}, []); | |
// Step 2.a: Fetch theme | |
const { theme } = useTheme(); | |
//Step 2.c: Bucket human and algoritmic metrics | |
const [humanMetrics, algorithmMetrics] = useMemo(() => { | |
const hMetrics: Metric[] = []; | |
const aMetrics: Metric[] = []; | |
Object.values(metrics).forEach((metric) => { | |
if (metric.author === 'human') { | |
hMetrics.push(metric); | |
} else if (metric.author === 'algorithm') { | |
aMetrics.push(metric); | |
} | |
}); | |
return [hMetrics, aMetrics]; | |
}, [metrics]); | |
// Step 2.d: Reset selected metric range, only applicable for numerical metrics | |
useEffect(() => { | |
if ( | |
selectedMetric && | |
selectedMetric.type === 'numerical' && | |
selectedMetric.range | |
) { | |
setSelectedMetricRange([ | |
selectedMetric.range[0], | |
selectedMetric.range[1], | |
]); | |
} else setSelectedMetricRange(undefined); | |
}, [selectedMetric]); | |
// Step 2.e: Identify visible evaluations | |
const filteredEvaluations = useMemo(() => { | |
if (selectedMetric) { | |
// Step 1: Identify evaluations for selected models | |
const evaluationsForSelectedModels = evaluationsPerMetric[ | |
selectedMetric.name | |
].filter( | |
(evaluation) => | |
(evaluation.modelId === modelA.modelId || | |
evaluation.modelId === modelB.modelId) && | |
(!isEmpty(selectedFilters) | |
? areObjectsIntersecting(selectedFilters, evaluation) | |
: true), | |
); | |
// Step 2: Collate evaluation per task id | |
const evaluationsPerTask: { [key: string]: { [key: string]: number } } = | |
{}; | |
evaluationsForSelectedModels.forEach((evaluation) => { | |
const entry = evaluationsPerTask[evaluation.taskId]; | |
if (entry) { | |
entry[evaluation.modelId] = | |
evaluation[`${selectedMetric.name}_agg`].value; | |
} else { | |
evaluationsPerTask[evaluation.taskId] = { | |
[evaluation.modelId]: | |
evaluation[`${selectedMetric.name}_agg`].value, | |
}; | |
} | |
}); | |
// Step 3: Only select evaluation tasks where models aggregate values differe | |
// and one or more models have aggregate value in the selected range | |
const visibleEvaluationTaskIds = Object.keys(evaluationsPerTask).filter( | |
(taskId) => | |
Object.keys(countBy(Object.values(evaluationsPerTask[taskId]))) | |
.length > 1 && | |
(selectedMetricRange | |
? (Object.values(evaluationsPerTask[taskId])[0] >= | |
selectedMetricRange[0] && | |
Object.values(evaluationsPerTask[taskId])[0] <= | |
selectedMetricRange[1]) || | |
(Object.values(evaluationsPerTask[taskId])[1] >= | |
selectedMetricRange[0] && | |
Object.values(evaluationsPerTask[taskId])[1] <= | |
selectedMetricRange[1]) | |
: true), | |
); | |
// Step 4: Return evaluations for selected evaluation tasks where models aggregate values differe | |
return evaluationsForSelectedModels.filter((evaluation) => | |
visibleEvaluationTaskIds.includes(evaluation.taskId), | |
); | |
} | |
return []; | |
}, [ | |
evaluationsPerMetric, | |
selectedMetric, | |
modelA, | |
modelB, | |
selectedMetricRange, | |
]); | |
// Step 2.f: Reset statistical information, if either of model changes or filters are changed | |
useEffect(() => { | |
setStatisticalInformationPerMetric(undefined); | |
}, [modelA, modelB, selectedFilters]); | |
// Step 2.g: Recalculate statistical information, if metric changes | |
useEffect(() => { | |
if ( | |
!selectedMetric && | |
statisticalInformationPerMetric && | |
Object.keys(statisticalInformationPerMetric).length == 1 | |
) { | |
setStatisticalInformationPerMetric( | |
runStatisticalSignificanceTest( | |
evaluationsPerMetric, | |
metrics, | |
modelA, | |
modelB, | |
selectedMetric, | |
selectedFilters, | |
selectedMetricRange, | |
), | |
); | |
} else if ( | |
selectedMetric && | |
selectedMetricRange && | |
statisticalInformationPerMetric && | |
statisticalInformationPerMetric.hasOwnProperty(selectedMetric.name) | |
) { | |
setStatisticalInformationPerMetric( | |
runStatisticalSignificanceTest( | |
evaluationsPerMetric, | |
metrics, | |
modelA, | |
modelB, | |
selectedMetric, | |
selectedFilters, | |
selectedMetricRange, | |
), | |
); | |
} | |
}, [selectedMetric, selectedMetricRange]); | |
// Step 2.h: Compute computation complexity | |
const complexity = useMemo(() => { | |
let size = 0; | |
if (selectedMetric) { | |
size = evaluationsPerMetric[selectedMetric.name].length / models.length; | |
} else { | |
size = Object.values(evaluationsPerMetric) | |
.map((evaluations) => evaluations.length / models.length) | |
.reduce((a, b) => a + b, 0); | |
} | |
if (size > 1000) { | |
return 'high'; | |
} | |
return 'low'; | |
}, [evaluationsPerMetric, selectedMetric]); | |
// Step 2.i: Add chart event | |
useEffect(() => { | |
// Step 2.i.*: Local copy of reference | |
let ref = null; | |
// Step 2.i.**: Update reference and add event | |
if (chartRef && chartRef.current) { | |
ref = chartRef.current; | |
//@ts-ignore | |
ref.chart.services.events.addEventListener( | |
'scatter-click', | |
({ detail }) => { | |
onTaskSelection(detail.datum.taskId); | |
}, | |
); | |
} | |
// Step 2.i.***: Cleanup function | |
return () => { | |
if (ref) { | |
//@ts-ignore | |
ref.chart.services.events.removeEventListener( | |
'scatter-click', | |
({ detail }) => { | |
onTaskSelection(detail.datum.taskId); | |
}, | |
); | |
} | |
}; | |
}, [chartRef, selectedMetric, statisticalInformationPerMetric]); | |
// Step 3: Render | |
return ( | |
<div className={classes.page}> | |
<div className={classes.selectors}> | |
<div className={classes.modelSelector}> | |
<ModelSelector | |
id={'modelA-selector-excluding-model-' + modelB.modelId} | |
key={'modelA-selector-excluding-model-' + modelB.modelId} | |
models={models} | |
defaultValue={modelA} | |
onSelect={(modelId: string) => { | |
const selectedModel = models.find( | |
(model) => model.modelId === modelId, | |
); | |
if (selectedModel) { | |
setModelA(selectedModel); | |
} | |
}} | |
disabledModels={[modelB]} | |
/> | |
</div> | |
<div className={classes.modelSelector}> | |
<ModelSelector | |
id={'modelB-selector-excluding-model-' + modelA.modelId} | |
key={'modelB-selector-excluding-model-' + modelA.modelId} | |
models={models} | |
defaultValue={modelB} | |
onSelect={(modelId: string) => { | |
const selectedModel = models.find( | |
(model) => model.modelId === modelId, | |
); | |
if (selectedModel) { | |
setModelB(selectedModel); | |
} | |
}} | |
disabledModels={[modelA]} | |
/> | |
</div> | |
<div className={classes.metricSelector}> | |
<MetricSelector | |
metrics={metrics} | |
onSelect={(metric: Metric | undefined) => { | |
setSelectedMetric(metric); | |
}} | |
warn={!selectedMetric} | |
warnText={'You must select a single metric to view tasks. '} | |
/> | |
</div> | |
{selectedMetric && | |
selectedMetric.type === 'numerical' && | |
selectedMetric.range ? ( | |
<div> | |
<Slider | |
ariaLabelInput="Lower bound" | |
unstable_ariaLabelInputUpper="Upper bound" | |
labelText={`Choose range`} | |
value={ | |
selectedMetricRange | |
? selectedMetricRange[0] | |
: selectedMetric.range[0] | |
} | |
unstable_valueUpper={ | |
selectedMetricRange | |
? selectedMetricRange[1] | |
: selectedMetric.range[1] | |
} | |
min={selectedMetric.range[0]} | |
max={selectedMetric.range[1]} | |
step={ | |
selectedMetric.range.length === 3 ? selectedMetric.range[2] : 1 | |
} | |
onChange={({ | |
value, | |
valueUpper, | |
}: { | |
value: number; | |
valueUpper?: number; | |
}) => { | |
setSelectedMetricRange((prev) => [ | |
value, | |
valueUpper | |
? valueUpper | |
: prev | |
? prev[1] | |
: selectedMetric.range | |
? selectedMetric.range[2] | |
: 100, | |
]); | |
}} | |
/> | |
</div> | |
) : null} | |
<div className={classes.calculateBtn}> | |
<Button | |
onClick={() => { | |
// Run statistical significance calculations | |
setStatisticalInformationPerMetric( | |
runStatisticalSignificanceTest( | |
evaluationsPerMetric, | |
metrics, | |
modelA, | |
modelB, | |
selectedMetric, | |
selectedFilters, | |
selectedMetricRange, | |
), | |
); | |
}} | |
> | |
Calculate | |
</Button> | |
</div> | |
</div> | |
{!isEmpty(filters) ? ( | |
<Filters | |
keyPrefix="ModelComparator" | |
filters={filters} | |
selectedFilters={selectedFilters} | |
setSelectedFilters={setSelectedFilters} | |
/> | |
) : null} | |
{statisticalInformationPerMetric ? ( | |
<div className={classes.row}> | |
<div className={classes.hypothesisContainer}> | |
<span className={classes.hypothesisStatement}> | |
H<sub>0</sub>: {modelA.name} and {modelB.name} scores are derived | |
from the same distribution. | |
</span> | |
<span className={classes.hypothesisValidityCondition}> | |
<span>{'Reject the null hypothesis if p < 0.05'}</span> | |
</span> | |
</div> | |
{!selectedMetric && humanMetrics.length ? ( | |
<div className={classes.row}> | |
<h4>Human Evaluations</h4> | |
<div | |
className={cx( | |
humanMetrics.length > 3 | |
? classes.graphsGrid | |
: classes.graphsFlex, | |
)} | |
> | |
{humanMetrics.map((metric) => | |
renderResult( | |
statisticalInformationPerMetric, | |
metric, | |
modelA, | |
modelB, | |
evaluationsPerMetric[metric.name].length / models.length, | |
modelColors, | |
modelOrder, | |
theme, | |
), | |
)} | |
</div> | |
</div> | |
) : null} | |
{!selectedMetric && algorithmMetrics.length ? ( | |
<div className={classes.row}> | |
<h4>Algorithmic Evaluations</h4> | |
<div | |
className={cx( | |
algorithmMetrics.length > 3 | |
? classes.graphsGrid | |
: classes.graphsFlex, | |
)} | |
> | |
{algorithmMetrics.map((metric) => | |
renderResult( | |
statisticalInformationPerMetric, | |
metric, | |
modelA, | |
modelB, | |
evaluationsPerMetric[metric.name].length / models.length, | |
modelColors, | |
modelOrder, | |
theme, | |
), | |
)} | |
</div> | |
</div> | |
) : null} | |
{selectedMetric && | |
statisticalInformationPerMetric.hasOwnProperty( | |
selectedMetric.name, | |
) ? ( | |
<div className={classes.row}> | |
<div | |
key={`statisticalInformation-metric-${selectedMetric.name}--${hash(JSON.stringify(statisticalInformationPerMetric[selectedMetric.name]))}`} | |
className={classes.performanceInformation} | |
> | |
<h5> | |
<strong>{extractMetricDisplayName(selectedMetric)}</strong> | |
</h5> | |
<Tile className={classes.tile}> | |
<div className={classes.tileContent}> | |
<span className={classes.tileContentInformation}> | |
p-value | |
</span> | |
<span | |
className={classes.tileContentValue} | |
suppressHydrationWarning={true} | |
> | |
{statisticalInformationPerMetric[selectedMetric.name][ | |
'p' | |
].toFixed(4)} | |
</span> | |
<span | |
className={classes.tileContentDecision} | |
suppressHydrationWarning={true} | |
> | |
{statisticalInformationPerMetric[selectedMetric.name][ | |
'p' | |
] <= 0.05 | |
? 'Significant' | |
: 'Not significant'} | |
</span> | |
</div> | |
</Tile> | |
<ScatterChart | |
ref={chartRef} | |
data={prepareScatterPlotData( | |
modelA.name, | |
statisticalInformationPerMetric[selectedMetric.name] | |
.distributionA, | |
modelB.name, | |
statisticalInformationPerMetric[selectedMetric.name] | |
.distributionB, | |
statisticalInformationPerMetric[selectedMetric.name] | |
.taskIds, | |
)} | |
options={{ | |
axes: { | |
left: { | |
mapsTo: 'value', | |
...(selectedMetric.type === 'numerical' && | |
typeof selectedMetric.minValue === 'number' && | |
typeof selectedMetric.maxValue === 'number' && { | |
domain: [ | |
selectedMetric.minValue, | |
selectedMetric.maxValue, | |
], | |
}), | |
...(selectedMetric.type === 'categorical' && | |
typeof selectedMetric.minValue !== 'number' && | |
typeof selectedMetric.maxValue !== 'number' && { | |
domain: [ | |
castToNumber( | |
selectedMetric.minValue?.value || 0, | |
selectedMetric.values, | |
), | |
castToNumber( | |
selectedMetric.maxValue?.value || 4, | |
selectedMetric.values, | |
), | |
], | |
}), | |
title: extractMetricDisplayName(selectedMetric), | |
}, | |
bottom: { | |
mapsTo: 'key', | |
ticks: { | |
values: [], | |
}, | |
title: `Tasks (${ | |
statisticalInformationPerMetric[selectedMetric.name] | |
.distributionA.length | |
}/${ | |
evaluationsPerMetric[selectedMetric.name].length / | |
models.length | |
})`, | |
}, | |
}, | |
width: `${Math.round(WindowWidth * 0.8)}px`, | |
height: '500px', | |
toolbar: { | |
enabled: false, | |
}, | |
color: { | |
scale: modelColors, | |
}, | |
legend: { | |
order: modelOrder, | |
}, | |
theme: theme, | |
}} | |
></ScatterChart> | |
</div> | |
</div> | |
) : ( | |
<> | |
<div className={classes.tasksContainerNotification}> | |
<span | |
className={classes.tasksContainerNotificationText} | |
>{`Press calculate to measure statistical significance ${selectedMetric ? 'for' : 'across'} "${selectedMetric ? extractMetricDisplayName(selectedMetric) : 'all'}" metric${selectedMetric ? '' : 's'}`}</span> | |
<span | |
className={classes.tasksContainerNotificationText} | |
>{`for "${modelA.name}" and "${modelB.name}" models.`}</span> | |
{complexity === 'high' ? ( | |
<div className={classes.tasksContainerWarning}> | |
<WarningAlt | |
height={'24px'} | |
width={'24px'} | |
className={classes.tasksContainerWarningIcon} | |
/> | |
<span className={classes.tasksContainerWarningText}> | |
It might take few minutes to build this view. | |
</span> | |
</div> | |
) : null} | |
</div> | |
</> | |
)} | |
</div> | |
) : ( | |
<> | |
<div className={classes.tasksContainerNotification}> | |
<span | |
className={classes.tasksContainerNotificationText} | |
>{`Press calculate to measure statistical significance ${selectedMetric ? 'for' : 'across'} "${selectedMetric ? extractMetricDisplayName(selectedMetric) : 'all'}" metric${selectedMetric ? '' : 's'}`}</span> | |
<span | |
className={classes.tasksContainerNotificationText} | |
>{`for "${modelA.name}" and "${modelB.name}" models.`}</span> | |
{complexity === 'high' ? ( | |
<div className={classes.tasksContainerWarning}> | |
<WarningAlt | |
height={'24px'} | |
width={'24px'} | |
className={classes.tasksContainerWarningIcon} | |
/> | |
<span className={classes.tasksContainerWarningText}> | |
It might take few minutes to build this view. | |
</span> | |
</div> | |
) : null} | |
</div> | |
</> | |
)} | |
{selectedMetric && | |
statisticalInformationPerMetric && | |
statisticalInformationPerMetric.hasOwnProperty(selectedMetric.name) && ( | |
<div className={classes.row}> | |
<h4> | |
Tasks{selectedMetric && filteredEvaluations && <sup>*</sup>} | |
</h4> | |
{filteredEvaluations ? ( | |
<> | |
<TasksTable | |
metrics={[selectedMetric]} | |
evaluations={filteredEvaluations} | |
models={[modelA, modelB]} | |
filters={filters} | |
onClick={onTaskSelection} | |
/> | |
<span className={classes.tasksTableWarning}> | |
<sup>*</sup> Only tasks with different model aggregate scores | |
are shown in the above table. | |
</span> | |
</> | |
) : null} | |
</div> | |
)} | |
</div> | |
); | |
} | |