{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import requests\n",
    "hash = \"QmR8etyW3TPFadNtNrW54vfnFqmh8vBrMARWV76EmxCZyk\"\n",
    "ipfs_address = \"https://gateway.autonolas.tech/ipfs/\"\n",
    "\n",
    "accuracy_link= ipfs_address + hash\n",
    "response = requests.get(accuracy_link)\n",
    "print(response)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "headers = ['tool', 'tool_accuracy', 'total_requests', 'min', 'max']\n"
     ]
    }
   ],
   "source": [
    "from io import StringIO\n",
    "accuracy_store = {}\n",
    "data = StringIO(response.text)\n",
    "csv_reader = csv.reader(data, delimiter=',')\n",
    "for row in csv_reader:\n",
    "    if row[0] == \"tool\":\n",
    "        print(f\"headers = {row}\")\n",
    "        continue\n",
    "    accuracy_store[row[0]] = [\n",
    "        row[2],\n",
    "        row[1],\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'claude-prediction-offline': ['481', '57.380457380457386'], 'claude-prediction-online': ['1055', '61.137440758293835'], 'prediction-offline': ['4465', '67.41321388577828'], 'prediction-offline-sme': ['61', '70.49180327868852'], 'prediction-online': ['9490', '66.00632244467862'], 'prediction-online-sme': ['14642', '65.67408823931157'], 'prediction-request-rag': ['2691', '63.58231140839836'], 'prediction-request-rag-claude': ['7428', '65.64351103931072'], 'prediction-request-reasoning': ['17372', '67.11374625834677'], 'prediction-request-reasoning-claude': ['2470', '66.72064777327935'], 'prediction-url-cot-claude': ['1596', '61.904761904761905']}\n"
     ]
    }
   ],
   "source": [
    "print(accuracy_store)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
    "tools = pd.read_parquet('../data/tools.parquet')\n",
    "trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "INC_TOOLS = [\n",
    "    \"prediction-online\",\n",
    "    \"prediction-offline\",\n",
    "    \"claude-prediction-online\",\n",
    "    \"claude-prediction-offline\",\n",
    "    \"prediction-offline-sme\",\n",
    "    \"prediction-online-sme\",\n",
    "    \"prediction-request-rag\",\n",
    "    \"prediction-request-reasoning\",\n",
    "    \"prediction-url-cot-claude\",\n",
    "    \"prediction-request-rag-claude\",\n",
    "    \"prediction-request-reasoning-claude\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | win\n",
       " | tool\n",
       " | tool_accuracy\n",
       " | total_requests\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 66.308244\n",
       " | 279\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 58.914027\n",
       " | 1105\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 67.717915\n",
       " | 2283\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-offline-sme\n",
       " | 55.555556\n",
       " | 18\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online\n",
       " | 65.459066\n",
       " | 5631\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-online-sme\n",
       " | 67.417656\n",
       " | 8167\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag\n",
       " | 64.217072\n",
       " | 1769\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-rag-claude\n",
       " | 69.554566\n",
       " | 4490\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning\n",
       " | 68.813594\n",
       " | 9828\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-request-reasoning-claude\n",
       " | 68.910256\n",
       " | 2184\n",
       " | 
\n",
       "    \n",
       "      | 10\n",
       " | prediction-url-cot-claude\n",
       " | 64.584980\n",
       " | 1265\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | min\n",
       " | max\n",
       " | 
\n",
       "    \n",
       "      | tool\n",
       " | \n",
       " | \n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | claude-prediction-offline\n",
       " | 2024-04-23 13:09:30\n",
       " | 2024-06-10 00:31:30\n",
       " | 
\n",
       "    \n",
       "      | claude-prediction-online\n",
       " | 2024-04-12 12:24:20\n",
       " | 2024-06-09 21:41:20\n",
       " | 
\n",
       "    \n",
       "      | prediction-offline\n",
       " | 2024-04-12 12:20:10\n",
       " | 2024-06-08 23:45:00\n",
       " | 
\n",
       "    \n",
       "      | prediction-offline-sme\n",
       " | 2024-04-16 07:58:45\n",
       " | 2024-04-29 20:45:15\n",
       " | 
\n",
       "    \n",
       "      | prediction-online\n",
       " | 2024-04-16 05:52:40\n",
       " | 2024-06-09 21:47:20\n",
       " | 
\n",
       "    \n",
       "      | prediction-online-sme\n",
       " | 2024-04-12 11:51:30\n",
       " | 2024-06-10 00:06:00\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-rag\n",
       " | 2024-04-12 11:39:40\n",
       " | 2024-06-09 21:17:45\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-rag-claude\n",
       " | 2024-04-12 11:14:30\n",
       " | 2024-06-07 11:42:30\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-reasoning\n",
       " | 2024-04-12 11:57:05\n",
       " | 2024-06-09 21:50:45\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-reasoning-claude\n",
       " | 2024-04-12 11:53:55\n",
       " | 2024-06-05 05:00:10\n",
       " | 
\n",
       "    \n",
       "      | prediction-url-cot-claude\n",
       " | 2024-04-12 11:37:15\n",
       " | 2024-06-05 05:21:10\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | tool_accuracy\n",
       " | total_requests\n",
       " | min\n",
       " | max\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 66.308244\n",
       " | 279\n",
       " | 2024-04-23 13:09:30\n",
       " | 2024-06-10 00:31:30\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 58.914027\n",
       " | 1105\n",
       " | 2024-04-12 12:24:20\n",
       " | 2024-06-09 21:41:20\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 67.717915\n",
       " | 2283\n",
       " | 2024-04-12 12:20:10\n",
       " | 2024-06-08 23:45:00\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-offline-sme\n",
       " | 55.555556\n",
       " | 18\n",
       " | 2024-04-16 07:58:45\n",
       " | 2024-04-29 20:45:15\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online\n",
       " | 65.459066\n",
       " | 5631\n",
       " | 2024-04-16 05:52:40\n",
       " | 2024-06-09 21:47:20\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-online-sme\n",
       " | 67.417656\n",
       " | 8167\n",
       " | 2024-04-12 11:51:30\n",
       " | 2024-06-10 00:06:00\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag\n",
       " | 64.217072\n",
       " | 1769\n",
       " | 2024-04-12 11:39:40\n",
       " | 2024-06-09 21:17:45\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-rag-claude\n",
       " | 69.554566\n",
       " | 4490\n",
       " | 2024-04-12 11:14:30\n",
       " | 2024-06-07 11:42:30\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning\n",
       " | 68.813594\n",
       " | 9828\n",
       " | 2024-04-12 11:57:05\n",
       " | 2024-06-09 21:50:45\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-request-reasoning-claude\n",
       " | 68.910256\n",
       " | 2184\n",
       " | 2024-04-12 11:53:55\n",
       " | 2024-06-05 05:00:10\n",
       " | 
\n",
       "    \n",
       "      | 10\n",
       " | prediction-url-cot-claude\n",
       " | 64.584980\n",
       " | 1265\n",
       " | 2024-04-12 11:37:15\n",
       " | 2024-06-05 05:21:10\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 40.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 17.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online-sme\n",
       " | 0.656716\n",
       " | 67.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.571429\n",
       " | 7.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-reasoning\n",
       " | 0.538462\n",
       " | 52.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-request-rag\n",
       " | 0.250000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-online\n",
       " | 0.185185\n",
       " | 27.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 40.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 17.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online-sme\n",
       " | 0.656716\n",
       " | 67.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.571429\n",
       " | 7.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-reasoning\n",
       " | 0.538462\n",
       " | 52.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-request-rag\n",
       " | 0.250000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-online\n",
       " | 0.185185\n",
       " | 27.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 7\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 1.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online-sme\n",
       " | 0.977273\n",
       " | 44.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-online\n",
       " | 0.975000\n",
       " | 40.0\n",
       " | 
\n",
       "    \n",
       "      | 0\n",
       " | prediction-offline\n",
       " | 0.677419\n",
       " | 31.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-reasoning\n",
       " | 0.534483\n",
       " | 58.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag-claude\n",
       " | 0.223881\n",
       " | 67.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.200000\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-request-rag\n",
       " | 0.000000\n",
       " | 8.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.000000\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 1.000000\n",
       " | 1.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 87.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 25.0\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 1.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online\n",
       " | 0.951220\n",
       " | 41.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.833333\n",
       " | 6.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag\n",
       " | 0.714286\n",
       " | 7.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning\n",
       " | 0.437500\n",
       " | 48.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online-sme\n",
       " | 0.394366\n",
       " | 71.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.0\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-offline\n",
       " | 1.0\n",
       " | 23.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online\n",
       " | 1.0\n",
       " | 14.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online-sme\n",
       " | 1.0\n",
       " | 18.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag\n",
       " | 1.0\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag-claude\n",
       " | 1.0\n",
       " | 8.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-url-cot-claude\n",
       " | 1.0\n",
       " | 6.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning\n",
       " | 0.0\n",
       " | 18.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.0\n",
       " | 3.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 1.000000\n",
       " | 6.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 58.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online-sme\n",
       " | 1.000000\n",
       " | 39.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag\n",
       " | 1.000000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning-claude\n",
       " | 1.000000\n",
       " | 8.0\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag-claude\n",
       " | 0.754717\n",
       " | 53.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning\n",
       " | 0.369048\n",
       " | 84.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online\n",
       " | 0.166667\n",
       " | 72.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 1\n",
       " | prediction-online-sme\n",
       " | 0.750000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.750000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-request-rag\n",
       " | 0.666667\n",
       " | 6.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-request-rag-claude\n",
       " | 0.500000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-reasoning\n",
       " | 0.400000\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-online\n",
       " | 0.000000\n",
       " | 1.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 11.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-online\n",
       " | 1.000000\n",
       " | 17.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online-sme\n",
       " | 1.000000\n",
       " | 30.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 45.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-reasoning\n",
       " | 0.874016\n",
       " | 127.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-request-rag\n",
       " | 0.250000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.000000\n",
       " | 2.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.000000\n",
       " | 7.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 1.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online-sme\n",
       " | 1.000000\n",
       " | 19.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 15.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag\n",
       " | 0.941176\n",
       " | 17.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online\n",
       " | 0.800000\n",
       " | 5.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.666667\n",
       " | 15.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning\n",
       " | 0.652174\n",
       " | 23.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-url-cot-claude\n",
       " | 0.333333\n",
       " | 3.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.000000\n",
       " | 4.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-url-cot-claude\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-reasoning\n",
       " | 0.916667\n",
       " | 12.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.900000\n",
       " | 10.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-request-rag\n",
       " | 0.714286\n",
       " | 14.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online-sme\n",
       " | 0.666667\n",
       " | 9.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-online\n",
       " | 0.500000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag-claude\n",
       " | 0.454545\n",
       " | 11.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 1.000000\n",
       " | 6.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 1.000000\n",
       " | 3.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 1.000000\n",
       " | 36.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag-claude\n",
       " | 1.000000\n",
       " | 50.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online-sme\n",
       " | 0.986486\n",
       " | 74.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-request-rag\n",
       " | 0.947368\n",
       " | 19.0\n",
       " | 
\n",
       "    \n",
       "      | 3\n",
       " | prediction-online\n",
       " | 0.910714\n",
       " | 56.0\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-url-cot-claude\n",
       " | 0.777778\n",
       " | 9.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-reasoning\n",
       " | 0.465753\n",
       " | 73.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.071429\n",
       " | 14.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | \n",
       " | tool\n",
       " | losing_percentage\n",
       " | num_calls\n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | 3\n",
       " | prediction-offline-sme\n",
       " | 1.000000\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | 7\n",
       " | prediction-request-rag-claude\n",
       " | 0.913007\n",
       " | 1184.0\n",
       " | 
\n",
       "    \n",
       "      | 2\n",
       " | prediction-offline\n",
       " | 0.893281\n",
       " | 1012.0\n",
       " | 
\n",
       "    \n",
       "      | 6\n",
       " | prediction-request-rag\n",
       " | 0.889881\n",
       " | 336.0\n",
       " | 
\n",
       "    \n",
       "      | 5\n",
       " | prediction-online-sme\n",
       " | 0.857143\n",
       " | 1722.0\n",
       " | 
\n",
       "    \n",
       "      | 4\n",
       " | prediction-online\n",
       " | 0.853553\n",
       " | 1154.0\n",
       " | 
\n",
       "    \n",
       "      | 8\n",
       " | prediction-request-reasoning\n",
       " | 0.847451\n",
       " | 2727.0\n",
       " | 
\n",
       "    \n",
       "      | 10\n",
       " | prediction-url-cot-claude\n",
       " | 0.846154\n",
       " | 130.0\n",
       " | 
\n",
       "    \n",
       "      | 1\n",
       " | claude-prediction-online\n",
       " | 0.735849\n",
       " | 53.0\n",
       " | 
\n",
       "    \n",
       "      | 9\n",
       " | prediction-request-reasoning-claude\n",
       " | 0.659664\n",
       " | 238.0\n",
       " | 
\n",
       "    \n",
       "      | 0\n",
       " | claude-prediction-offline\n",
       " | 0.591549\n",
       " | 142.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      | confidence\n",
       " | 0.00\n",
       " | 0.10\n",
       " | 0.20\n",
       " | 0.30\n",
       " | 0.40\n",
       " | 0.50\n",
       " | 0.55\n",
       " | 0.60\n",
       " | 0.65\n",
       " | 0.70\n",
       " | 0.75\n",
       " | 0.80\n",
       " | 0.85\n",
       " | 0.90\n",
       " | 0.95\n",
       " | 0.99\n",
       " | 1.00\n",
       " | 
\n",
       "    \n",
       "      | tool\n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | \n",
       " | 
\n",
       "  \n",
       "  \n",
       "    \n",
       "      | claude-prediction-offline\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 5.0\n",
       " | 46.0\n",
       " | 4.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 87.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 
\n",
       "    \n",
       "      | claude-prediction-online\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 2.0\n",
       " | 10.0\n",
       " | 7.0\n",
       " | 3.0\n",
       " | 0.0\n",
       " | 30.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 1.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-offline\n",
       " | 0.0\n",
       " | 267.0\n",
       " | 2.0\n",
       " | 13.0\n",
       " | 302.0\n",
       " | 189.0\n",
       " | 0.0\n",
       " | 231.0\n",
       " | 3.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 1.0\n",
       " | 2.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 1.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-offline-sme\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 2.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-online\n",
       " | 0.0\n",
       " | 22.0\n",
       " | 4.0\n",
       " | 5.0\n",
       " | 43.0\n",
       " | 23.0\n",
       " | 8.0\n",
       " | 670.0\n",
       " | 99.0\n",
       " | 2.0\n",
       " | 76.0\n",
       " | 28.0\n",
       " | 55.0\n",
       " | 25.0\n",
       " | 11.0\n",
       " | 0.0\n",
       " | 20.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-online-sme\n",
       " | 1.0\n",
       " | 27.0\n",
       " | 10.0\n",
       " | 0.0\n",
       " | 71.0\n",
       " | 2.0\n",
       " | 0.0\n",
       " | 679.0\n",
       " | 234.0\n",
       " | 39.0\n",
       " | 149.0\n",
       " | 76.0\n",
       " | 109.0\n",
       " | 80.0\n",
       " | 6.0\n",
       " | 0.0\n",
       " | 39.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-rag\n",
       " | 0.0\n",
       " | 3.0\n",
       " | 2.0\n",
       " | 0.0\n",
       " | 4.0\n",
       " | 4.0\n",
       " | 0.0\n",
       " | 25.0\n",
       " | 5.0\n",
       " | 48.0\n",
       " | 11.0\n",
       " | 36.0\n",
       " | 57.0\n",
       " | 16.0\n",
       " | 11.0\n",
       " | 1.0\n",
       " | 20.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-rag-claude\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 1.0\n",
       " | 32.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 175.0\n",
       " | 0.0\n",
       " | 513.0\n",
       " | 0.0\n",
       " | 209.0\n",
       " | 3.0\n",
       " | 40.0\n",
       " | 3.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-reasoning\n",
       " | 0.0\n",
       " | 3.0\n",
       " | 103.0\n",
       " | 1.0\n",
       " | 58.0\n",
       " | 97.0\n",
       " | 0.0\n",
       " | 315.0\n",
       " | 176.0\n",
       " | 441.0\n",
       " | 317.0\n",
       " | 339.0\n",
       " | 159.0\n",
       " | 44.0\n",
       " | 58.0\n",
       " | 0.0\n",
       " | 97.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-request-reasoning-claude\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 3.0\n",
       " | 4.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 27.0\n",
       " | 0.0\n",
       " | 38.0\n",
       " | 4.0\n",
       " | 76.0\n",
       " | 0.0\n",
       " | 8.0\n",
       " | 1.0\n",
       " | 0.0\n",
       " | 2.0\n",
       " | 
\n",
       "    \n",
       "      | prediction-url-cot-claude\n",
       " | 0.0\n",
       " | 2.0\n",
       " | 1.0\n",
       " | 2.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 40.0\n",
       " | 0.0\n",
       " | 60.0\n",
       " | 0.0\n",
       " | 22.0\n",
       " | 0.0\n",
       " | 3.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 0.0\n",
       " | 
\n",
       "  \n",
       "
\n",
       "