/** * * Copyright 2023-2025 InspectorRAGet Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ 'use client'; import { isEmpty, find } from 'lodash'; import cx from 'classnames'; import { useState, useMemo, useEffect } from 'react'; import { SkeletonText, Select, SelectItem, Button, ButtonSkeleton, Tooltip, } from '@carbon/react'; import { ChevronDown, ChevronUp } from '@carbon/icons-react'; import { HistogramChart, GroupedBarChart, DonutChart, LineChart, StackedBarChart, } from '@carbon/charts-react'; import { ScaleTypes } from '@carbon/charts'; import { useTheme } from '@/src/theme'; import { Task, Aggregator } from '@/src/types'; import { unionAggregator, intersectionAggregator, majorityUnionAggregator, } from '@/src/utilities/aggregators'; import '@carbon/charts-react/styles.css'; import classes from './DataCharacteristics.module.scss'; // =================================================================================== // TYPES // =================================================================================== interface Props { tasks: Task[]; filters: { [key: string]: string[] }; } // =================================================================================== // HELPER FUNCTIONS // =================================================================================== function normalize(data: { [key: string]: number }) { const total = Object.values(data).reduce((a, b) => a + b, 0); return Object.fromEntries( Object.entries(data).map(([key, count]) => { return [key, Math.round((count / total) * 100 * 100) / 100]; }), ); } // =================================================================================== // COMPUTE FUNCTIONS // =================================================================================== function computeWordCount(tasks: Task[], filters: string[]) { // Step 1: Initialize necessary variable const wordCountInInputPerTask: { [key: string]: string | number }[] = []; // Step 2: Iterate over each task tasks.forEach((task) => { // Step 2.a: Identify text let text = ''; if (typeof task.input === 'string') { text = task.input; } else if (Array.isArray(task.input)) { task.input.forEach((turn) => { if (turn.hasOwnProperty('text') && turn.text) { text += turn.text.trim(); } else if ( turn.hasOwnProperty('content') && typeof turn.content === 'string' ) { text += turn.content.trim(); } }); } // Step 2.b: Build record const record = { count: text.trim().split(/\s+/).length, }; filters.forEach((filter) => { if (task.hasOwnProperty(filter) && task[filter]) { record[filter] = task[filter]; } }); // Step 2.c: Add record wordCountInInputPerTask.push(record); }); // Step 3: Return return wordCountInInputPerTask; } function computeUtterances(tasks: Task[], filters: string[]) { // Step 1: Initialize necessary variable const utterancesInInputPerTask: { [key: string]: string | number }[] = []; // Step 2: Iterate over each task tasks.forEach((task) => { // Step 2.a: Identify input is array if (Array.isArray(task.input)) { // Step 2.b: Build record const record = { count: task.input.length, }; filters.forEach((filter) => { if (task.hasOwnProperty(filter) && task[filter]) { record[filter] = task[filter]; } }); // Step 2.c: Add record utterancesInInputPerTask.push(record); } }); // Step 3: Return return utterancesInInputPerTask; } function computeContextRelevance( tasks: Task[], filters: string[], aggregator: Aggregator, ) { // Step 0: Helper functions function add( records: { key: string; value: number; [key: string]: string | number; }[], recordToAdd: { key: string; [key: string]: string | number; }, ) { // Step 1: Find existing records const existingRecord = find(records, recordToAdd); // Step 2: Add record if (existingRecord) { existingRecord.value += 1; } else { records.push({ ...recordToAdd, value: 1 }); } } // Step 1: Initialize necessary variable const relevantContextIndexes: { key: string; value: number; [key: string]: string | number; }[] = []; // Step 2: Iterate over each task tasks.forEach((task) => { // Step 2.a: Fetch context relevance annotations, if applicable if ( task.annotations && !isEmpty(task.annotations) && task.annotations.context_relevance && !isEmpty(task.annotations.context_relevance) ) { const context_relevances = Object.values( task.annotations.context_relevance, ); // Step 2.b: For each relevant context post aggregation for (const relevantContextIdx of aggregator.apply(context_relevances)) { if (!isEmpty(filters)) { filters.forEach((filter) => { if (task.hasOwnProperty(filter) && task[filter]) { if (Array.isArray(task[filter])) { task[filter].forEach((group) => { add(relevantContextIndexes, { key: `${relevantContextIdx + 1}`, [filter]: group, }); }); } else { add(relevantContextIndexes, { key: `${relevantContextIdx + 1}`, [filter]: task[filter], }); } } }); } else { add(relevantContextIndexes, { key: `${relevantContextIdx + 1}`, }); } } } }); // Step 3: Return return relevantContextIndexes; } async function computeStatistics( tasks: Task[], filters: { [key: string]: string[] }, selectedAggregator, setStatistics, setLoading: Function, ) { const statistics = { input: {}, tasks_distribution: {}, contexts: {}, }; // Step 1: Calculate length of input in words statistics['input']['word_count'] = computeWordCount( tasks, Object.keys(filters), ); // Step 2: Calculate number of utterance in input, if applicable statistics['input']['utterance_count'] = computeUtterances( tasks, filters ? Object.keys(filters) : [], ); // Step 3: Calculate tasks per filter if (!isEmpty(filters)) { // Step 2.a: Initialize counter const taskDistributionPerFilter: { [key: string]: { [key: string]: number }; } = Object.fromEntries( Object.entries(filters).map(([filterName, filterValues]) => [ filterName, Object.fromEntries(filterValues.map((value) => [value, 0])), ]), ); // Step 2.b: Iterate over tasks tasks.forEach((task) => { for (const filter of Object.keys(taskDistributionPerFilter)) { if (task.hasOwnProperty(filter)) { if (Array.isArray(task[filter])) { task[filter].forEach((value) => { taskDistributionPerFilter[filter][value] += 1; }); } else { taskDistributionPerFilter[filter][task[filter]] += 1; } } } }); // Step 2.c: Normalize and add to statistics Object.keys(taskDistributionPerFilter).forEach((filter) => { statistics['tasks_distribution'][filter] = normalize( taskDistributionPerFilter[filter], ); }); } // Step 4: Calculate context relevance const contextRelevance = computeContextRelevance( tasks, Object.keys(filters), selectedAggregator, ); if (!isEmpty(contextRelevance)) { statistics['contexts']['relevance'] = contextRelevance; } // Step 4: Set statistics and set loading to "false" setStatistics(statistics); setLoading(false); } // =================================================================================== // RENDER FUNCTIONS // =================================================================================== function SkeletonGraphs({ keyValue }: { keyValue: string }) { return (