Spaces:
Running
Running
feat (filters): Enable filtering on model & metric comparison views.
Browse files
src/types.ts
CHANGED
|
@@ -236,20 +236,20 @@ export interface Data extends TileData {
|
|
| 236 |
}
|
| 237 |
|
| 238 |
// ===================================================================================
|
| 239 |
-
//
|
| 240 |
// ===================================================================================
|
| 241 |
-
export interface
|
| 242 |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] };
|
| 243 |
filters: { [key: string]: string[] };
|
| 244 |
-
expression: object;
|
| 245 |
models: Model[];
|
| 246 |
-
|
|
|
|
| 247 |
metric?: Metric;
|
| 248 |
allowedValues?: string[];
|
| 249 |
annotator?: string;
|
| 250 |
}
|
| 251 |
|
| 252 |
-
export interface
|
| 253 |
records: {
|
| 254 |
taskId: string;
|
| 255 |
modelName: string;
|
|
|
|
| 236 |
}
|
| 237 |
|
| 238 |
// ===================================================================================
|
| 239 |
+
// FILTERATION WORKER
|
| 240 |
// ===================================================================================
|
| 241 |
+
export interface FilterationRequest {
|
| 242 |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] };
|
| 243 |
filters: { [key: string]: string[] };
|
|
|
|
| 244 |
models: Model[];
|
| 245 |
+
expression?: object;
|
| 246 |
+
agreementLevels?: { [key: string]: number | string }[];
|
| 247 |
metric?: Metric;
|
| 248 |
allowedValues?: string[];
|
| 249 |
annotator?: string;
|
| 250 |
}
|
| 251 |
|
| 252 |
+
export interface FilterationResponse {
|
| 253 |
records: {
|
| 254 |
taskId: string;
|
| 255 |
modelName: string;
|
src/views/example/Example.tsx
CHANGED
|
@@ -338,7 +338,7 @@ export default memo(function Example({ data }: { data: Data }) {
|
|
| 338 |
evaluationsPerMetric={evaluationsPerMetric}
|
| 339 |
models={data.models}
|
| 340 |
metrics={eligibleMetrics}
|
| 341 |
-
filters={
|
| 342 |
onTaskSelection={(taskId) => {
|
| 343 |
setSelectedTaskId(taskId);
|
| 344 |
}}
|
|
@@ -353,7 +353,7 @@ export default memo(function Example({ data }: { data: Data }) {
|
|
| 353 |
evaluationsPerMetric={evaluationsPerMetric}
|
| 354 |
models={data.models}
|
| 355 |
metrics={eligibleMetrics}
|
| 356 |
-
filters={
|
| 357 |
onTaskSelection={(taskId) => {
|
| 358 |
setSelectedTaskId(taskId);
|
| 359 |
}}
|
|
|
|
| 338 |
evaluationsPerMetric={evaluationsPerMetric}
|
| 339 |
models={data.models}
|
| 340 |
metrics={eligibleMetrics}
|
| 341 |
+
filters={filters}
|
| 342 |
onTaskSelection={(taskId) => {
|
| 343 |
setSelectedTaskId(taskId);
|
| 344 |
}}
|
|
|
|
| 353 |
evaluationsPerMetric={evaluationsPerMetric}
|
| 354 |
models={data.models}
|
| 355 |
metrics={eligibleMetrics}
|
| 356 |
+
filters={filters}
|
| 357 |
onTaskSelection={(taskId) => {
|
| 358 |
setSelectedTaskId(taskId);
|
| 359 |
}}
|
src/views/metric-behavior/MetricBehavior.module.scss
CHANGED
|
@@ -76,7 +76,7 @@
|
|
| 76 |
align-items: center;
|
| 77 |
}
|
| 78 |
|
| 79 |
-
.
|
| 80 |
display: flex;
|
| 81 |
column-gap: $spacing-02;
|
| 82 |
}
|
|
|
|
| 76 |
align-items: center;
|
| 77 |
}
|
| 78 |
|
| 79 |
+
.graphTitle {
|
| 80 |
display: flex;
|
| 81 |
column-gap: $spacing-02;
|
| 82 |
}
|
src/views/metric-behavior/MetricBehavior.tsx
CHANGED
|
@@ -353,7 +353,7 @@ export default memo(function MetricBehavior({
|
|
| 353 |
|
| 354 |
// Step 2.b: Filter evaluations based on selected models
|
| 355 |
const filteredEvaluationsPerMetric = useMemo(() => {
|
| 356 |
-
|
| 357 |
for (const [metric, evals] of Object.entries(evaluationsPerMetric)) {
|
| 358 |
filtered[metric] = evals.filter(
|
| 359 |
(evaluation) =>
|
|
@@ -700,18 +700,24 @@ export default memo(function MetricBehavior({
|
|
| 700 |
</div>
|
| 701 |
) : (
|
| 702 |
<div className={classes.row}>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
<HeatmapChart
|
| 704 |
data={metricToMetricCorrelation}
|
| 705 |
options={{
|
| 706 |
// @ts-ignore
|
| 707 |
axes: {
|
| 708 |
bottom: {
|
| 709 |
-
title: '
|
| 710 |
mapsTo: 'metricA',
|
| 711 |
scaleType: ScaleTypes.LABELS,
|
| 712 |
},
|
| 713 |
left: {
|
| 714 |
-
title: '
|
| 715 |
mapsTo: 'metricB',
|
| 716 |
scaleType: ScaleTypes.LABELS,
|
| 717 |
},
|
|
@@ -759,10 +765,15 @@ export default memo(function MetricBehavior({
|
|
| 759 |
</div>
|
| 760 |
) : (
|
| 761 |
<div className={classes.row}>
|
| 762 |
-
<h4>
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
</h4>
|
| 767 |
<HeatmapChart
|
| 768 |
ref={chartRef}
|
|
|
|
| 353 |
|
| 354 |
// Step 2.b: Filter evaluations based on selected models
|
| 355 |
const filteredEvaluationsPerMetric = useMemo(() => {
|
| 356 |
+
const filtered: { [key: string]: TaskEvaluation[] } = {};
|
| 357 |
for (const [metric, evals] of Object.entries(evaluationsPerMetric)) {
|
| 358 |
filtered[metric] = evals.filter(
|
| 359 |
(evaluation) =>
|
|
|
|
| 700 |
</div>
|
| 701 |
) : (
|
| 702 |
<div className={classes.row}>
|
| 703 |
+
<h4 className={classes.graphTitle}>
|
| 704 |
+
<strong>Spearman correlation</strong>
|
| 705 |
+
<span>
|
| 706 |
+
{`(${Object.values(filteredEvaluationsPerMetric)[0].length ? Object.values(filteredEvaluationsPerMetric)[0].length / (selectedModels ? selectedModels.length : 1) : 0}/${Object.values(evaluationsPerMetric)[0].length / models.length})`}
|
| 707 |
+
</span>
|
| 708 |
+
</h4>
|
| 709 |
<HeatmapChart
|
| 710 |
data={metricToMetricCorrelation}
|
| 711 |
options={{
|
| 712 |
// @ts-ignore
|
| 713 |
axes: {
|
| 714 |
bottom: {
|
| 715 |
+
title: 'Metrics',
|
| 716 |
mapsTo: 'metricA',
|
| 717 |
scaleType: ScaleTypes.LABELS,
|
| 718 |
},
|
| 719 |
left: {
|
| 720 |
+
title: 'Metrics',
|
| 721 |
mapsTo: 'metricB',
|
| 722 |
scaleType: ScaleTypes.LABELS,
|
| 723 |
},
|
|
|
|
| 765 |
</div>
|
| 766 |
) : (
|
| 767 |
<div className={classes.row}>
|
| 768 |
+
<h4 className={classes.graphTitle}>
|
| 769 |
+
<strong>
|
| 770 |
+
% instances with same scores (
|
| 771 |
+
{extractMetricDisplayName(selectedMetricA)} vs.
|
| 772 |
+
{extractMetricDisplayName(selectedMetricB)})
|
| 773 |
+
</strong>
|
| 774 |
+
<span>
|
| 775 |
+
{`(${Object.values(filteredEvaluationsPerMetric)[0].length ? Object.values(filteredEvaluationsPerMetric)[0].length / (selectedModels ? selectedModels.length : 1) : 0}/${Object.values(evaluationsPerMetric)[0].length / models.length})`}
|
| 776 |
+
</span>
|
| 777 |
</h4>
|
| 778 |
<HeatmapChart
|
| 779 |
ref={chartRef}
|
src/views/model-behavior/ModelBehavior.tsx
CHANGED
|
@@ -38,7 +38,12 @@ import { GroupedBarChart } from '@carbon/charts-react';
|
|
| 38 |
import { ScaleTypes } from '@carbon/charts';
|
| 39 |
|
| 40 |
import { useTheme } from '@/src/theme';
|
| 41 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
import {
|
| 43 |
AgreementLevels,
|
| 44 |
AgreementLevelDefinitions,
|
|
@@ -221,7 +226,7 @@ export default function ModelBehavior({
|
|
| 221 |
);
|
| 222 |
|
| 223 |
// Step 2.c.ii: Set up event listener for messages from the worker
|
| 224 |
-
worker.onmessage = function (event: MessageEvent<
|
| 225 |
// Step 2.c.ii.*: Copy over response data
|
| 226 |
const { records, evaluations } = event.data;
|
| 227 |
|
|
@@ -337,8 +342,8 @@ export default function ModelBehavior({
|
|
| 337 |
filterationWorker.postMessage({
|
| 338 |
evaluationsPerMetric: evaluationsPerMetric,
|
| 339 |
filters: selectedFilters,
|
| 340 |
-
expression: expression,
|
| 341 |
models: selectedModels,
|
|
|
|
| 342 |
agreementLevels: selectedAgreementLevels,
|
| 343 |
metric: selectedMetric,
|
| 344 |
allowedValues: selectedAllowedValues,
|
|
|
|
| 38 |
import { ScaleTypes } from '@carbon/charts';
|
| 39 |
|
| 40 |
import { useTheme } from '@/src/theme';
|
| 41 |
+
import {
|
| 42 |
+
TaskEvaluation,
|
| 43 |
+
Model,
|
| 44 |
+
Metric,
|
| 45 |
+
FilterationResponse,
|
| 46 |
+
} from '@/src/types';
|
| 47 |
import {
|
| 48 |
AgreementLevels,
|
| 49 |
AgreementLevelDefinitions,
|
|
|
|
| 226 |
);
|
| 227 |
|
| 228 |
// Step 2.c.ii: Set up event listener for messages from the worker
|
| 229 |
+
worker.onmessage = function (event: MessageEvent<FilterationResponse>) {
|
| 230 |
// Step 2.c.ii.*: Copy over response data
|
| 231 |
const { records, evaluations } = event.data;
|
| 232 |
|
|
|
|
| 342 |
filterationWorker.postMessage({
|
| 343 |
evaluationsPerMetric: evaluationsPerMetric,
|
| 344 |
filters: selectedFilters,
|
|
|
|
| 345 |
models: selectedModels,
|
| 346 |
+
expression: expression,
|
| 347 |
agreementLevels: selectedAgreementLevels,
|
| 348 |
metric: selectedMetric,
|
| 349 |
allowedValues: selectedAllowedValues,
|
src/workers/filter.ts
CHANGED
|
@@ -18,11 +18,11 @@
|
|
| 18 |
|
| 19 |
import { isEmpty } from 'lodash';
|
| 20 |
|
| 21 |
-
import {
|
| 22 |
import { areObjectsIntersecting } from '@/src/utilities/objects';
|
| 23 |
import { evaluate } from '@/src/utilities/expressions';
|
| 24 |
|
| 25 |
-
onmessage = function (event: MessageEvent<
|
| 26 |
// Step 1: Initialize necessary variables
|
| 27 |
const {
|
| 28 |
evaluationsPerMetric,
|
|
@@ -55,7 +55,7 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
| 55 |
// Step 3: If a metric is selected
|
| 56 |
if (metric) {
|
| 57 |
// Step 3.a: If an expression is specified
|
| 58 |
-
if (
|
| 59 |
// Step 3.a.ii: Build an object containing evaluations per model for every task
|
| 60 |
const evaluationsPerTaskPerModel: {
|
| 61 |
[key: string]: { [key: string]: TaskEvaluation };
|
|
@@ -121,9 +121,10 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
| 121 |
// Step 3.b.ii: Verify against aggregate value
|
| 122 |
if (
|
| 123 |
evaluation.modelId in models &&
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
| 127 |
(!allowedValues ||
|
| 128 |
isEmpty(allowedValues) ||
|
| 129 |
allowedValues.includes(evaluation[`${metric.name}_agg`].value))
|
|
@@ -173,9 +174,10 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
| 173 |
// Step 3.a: Verify against aggregate value
|
| 174 |
if (
|
| 175 |
evaluation.modelId in models &&
|
| 176 |
-
agreementLevels
|
| 177 |
-
|
| 178 |
-
|
|
|
|
| 179 |
(!allowedValues ||
|
| 180 |
isEmpty(allowedValues) ||
|
| 181 |
allowedValues.includes(evaluation[`${metric}_agg`].value))
|
|
|
|
| 18 |
|
| 19 |
import { isEmpty } from 'lodash';
|
| 20 |
|
| 21 |
+
import { FilterationRequest, TaskEvaluation } from '@/src/types';
|
| 22 |
import { areObjectsIntersecting } from '@/src/utilities/objects';
|
| 23 |
import { evaluate } from '@/src/utilities/expressions';
|
| 24 |
|
| 25 |
+
onmessage = function (event: MessageEvent<FilterationRequest>) {
|
| 26 |
// Step 1: Initialize necessary variables
|
| 27 |
const {
|
| 28 |
evaluationsPerMetric,
|
|
|
|
| 55 |
// Step 3: If a metric is selected
|
| 56 |
if (metric) {
|
| 57 |
// Step 3.a: If an expression is specified
|
| 58 |
+
if (expression && !isEmpty(expression)) {
|
| 59 |
// Step 3.a.ii: Build an object containing evaluations per model for every task
|
| 60 |
const evaluationsPerTaskPerModel: {
|
| 61 |
[key: string]: { [key: string]: TaskEvaluation };
|
|
|
|
| 121 |
// Step 3.b.ii: Verify against aggregate value
|
| 122 |
if (
|
| 123 |
evaluation.modelId in models &&
|
| 124 |
+
(!agreementLevels ||
|
| 125 |
+
agreementLevels
|
| 126 |
+
.map((level) => level.value)
|
| 127 |
+
.includes(evaluation[`${metric.name}_agg`].level)) &&
|
| 128 |
(!allowedValues ||
|
| 129 |
isEmpty(allowedValues) ||
|
| 130 |
allowedValues.includes(evaluation[`${metric.name}_agg`].value))
|
|
|
|
| 174 |
// Step 3.a: Verify against aggregate value
|
| 175 |
if (
|
| 176 |
evaluation.modelId in models &&
|
| 177 |
+
(!agreementLevels ||
|
| 178 |
+
agreementLevels
|
| 179 |
+
.map((level) => level.value)
|
| 180 |
+
.includes(evaluation[`${metric}_agg`].level)) &&
|
| 181 |
(!allowedValues ||
|
| 182 |
isEmpty(allowedValues) ||
|
| 183 |
allowedValues.includes(evaluation[`${metric}_agg`].value))
|