ketanmore
/

surya-layout-fine-tune-script

Model card Files Files and versions Community

ketanmore commited on Sep 2, 2024

Commit

e831476

verified ·

1 Parent(s): d80f76d

Upload temp_test.ipynb

Browse files

Files changed (1) hide show

temp_test.ipynb +1642 -0

temp_test.ipynb ADDED Viewed

	@@ -0,0 +1,1642 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ['HF_HOME'] = '/data2/ketan/orc/HF_Cache'\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "from torch.utils.data import DataLoader\n",
+    "from transformers import SegformerConfig\n",
+    "from surya.model.detection.segformer import SegformerForRegressionMask\n",
+    "from surya.input.processing import prepare_image_detection\n",
+    "from surya.model.detection.segformer import load_processor , load_model\n",
+    "from datasets import load_dataset\n",
+    "from tqdm import tqdm\n",
+    "from torch.utils.tensorboard import SummaryWriter\n",
+    "\n",
+    "import torch.nn.functional as F\n",
+    "import numpy as np \n",
+    "from PIL import ImageDraw, ImageFont\n",
+    "from surya.layout import parallel_get_regions\n",
+    "import cv2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded detection model vikp/surya_layout2 on device cuda with dtype torch.float16\n"
+     ]
+    }
+   ],
+   "source": [
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "\n",
+    "dataset = load_dataset(\"vikp/publaynet_bench\", split=\"train[:100]\")\n",
+    "train_loader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=lambda x: x)\n",
+    "\n",
+    "\n",
+    "model = load_model(\"vikp/surya_layout2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "optimizer = optim.Adam(model.parameters(), lr=0.00001)\n",
+    "\n",
+    "# Logging and Checkpoints\n",
+    "log_dir = \"logs\"\n",
+    "checkpoint_dir = \"checkpoints\"\n",
+    "os.makedirs(log_dir, exist_ok=True)\n",
+    "os.makedirs(checkpoint_dir, exist_ok=True)\n",
+    "writer = SummaryWriter(log_dir=log_dir)\n",
+    "\n",
+    "def calculate_iou(box1, box2):\n",
+    "    box1 = torch.tensor(box1, dtype=torch.float32, requires_grad=True) if not isinstance(box1, torch.Tensor) else box1\n",
+    "    box2 = torch.tensor(box2, dtype=torch.float32, requires_grad=True) if not isinstance(box2, torch.Tensor) else box2\n",
+    "    \n",
+    "    x_min = torch.max(box1[0], box2[0])\n",
+    "    y_min = torch.max(box1[1], box2[1])\n",
+    "    x_max = torch.min(box1[2], box2[2])\n",
+    "    y_max = torch.min(box1[3], box2[3])\n",
+    "    \n",
+    "    intersection = torch.clamp(x_max - x_min, min=0) * torch.clamp(y_max - y_min, min=0)\n",
+    "    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])\n",
+    "    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])\n",
+    "    union = area1 + area2 - intersection\n",
+    "    \n",
+    "    iou = intersection / union if union > 0 else torch.tensor(0.0, requires_grad=True)\n",
+    "    \n",
+    "    return iou\n",
+    "\n",
+    "def pair_boxes(pred_boxes, target_boxes):\n",
+    "    pred_boxes = [torch.tensor(box, dtype=torch.float32, requires_grad=True) for box in pred_boxes]\n",
+    "    target_boxes = [torch.tensor(box, dtype=torch.float32, requires_grad=True) for box in target_boxes]\n",
+    "    \n",
+    "    matched_pred_boxes = []\n",
+    "    matched_target_boxes = []\n",
+    "    \n",
+    "    for target in target_boxes:\n",
+    "        best_iou = 0\n",
+    "        best_pred = None\n",
+    "        for pred in pred_boxes:\n",
+    "            iou = calculate_iou(pred, target)\n",
+    "            if iou > best_iou:\n",
+    "                best_iou = iou\n",
+    "                best_pred = pred\n",
+    "        \n",
+    "        if best_pred is not None:\n",
+    "            matched_pred_boxes.append(best_pred)\n",
+    "            matched_target_boxes.append(target)\n",
+    "            pred_boxes = [p for p in pred_boxes if not torch.equal(p, best_pred)]\n",
+    "\n",
+    "    return matched_pred_boxes, matched_target_boxes\n",
+    "\n",
+    "def smooth_l1_loss(pred_boxes, target_boxes, beta=1.0):\n",
+    "    matched_pred_boxes, matched_target_boxes = pair_boxes(pred_boxes, target_boxes)\n",
+    "    \n",
+    "    if len(matched_pred_boxes) == 0:\n",
+    "        return torch.tensor(0.0, requires_grad=True)\n",
+    "    \n",
+    "    diff = torch.abs(torch.stack(matched_pred_boxes) - torch.stack(matched_target_boxes))\n",
+    "    loss = torch.where(diff < beta, 0.5 * (diff ** 2) / beta, diff - 0.5 * beta)\n",
+    "    return loss.mean()\n",
+    "\n",
+    "def iou_loss(pred_boxes, target_boxes):\n",
+    "    matched_pred_boxes, matched_target_boxes = pair_boxes(pred_boxes, target_boxes)\n",
+    "    \n",
+    "    if len(matched_pred_boxes) == 0:\n",
+    "        return torch.tensor(1.0, requires_grad=True)\n",
+    "    \n",
+    "    ious = [calculate_iou(pred, target) for pred, target in zip(matched_pred_boxes, matched_target_boxes)]\n",
+    "    return 1 - torch.mean(torch.tensor(ious, requires_grad=True))\n",
+    "\n",
+    "\n",
+    "def logits_to_bboxes(logits,image) :\n",
+    "    correct_shape = (300, 300)  \n",
+    "    logits_temp = F.interpolate(logits, size=correct_shape, mode='bilinear', align_corners=False)\n",
+    "    logits_temp = logits_temp.cpu().detach().numpy().astype(np.float32)\n",
+    "\n",
+    "    heatmap_count = logits_temp.shape[1]\n",
+    "    heatmaps = [logits_temp[i][k] for i in range(logits_temp.shape[0]) for k in range(heatmap_count)]\n",
+    "    regions = parallel_get_regions(heatmaps=heatmaps, orig_size=image.size, id2label=model.config.id2label)\n",
+    "\n",
+    "    final_bboxes = []\n",
+    "    for i in regions.bboxes :\n",
+    "        final_bboxes.append(i.bbox)\n",
+    "    return final_bboxes\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   1%|          | 1/100 [00:00<00:41,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [1/100], Moving Avg Loss: 16.4575\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   2%|▏         | 2/100 [00:00<00:41,  2.34it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [2/100], Moving Avg Loss: 15.4938\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   3%|▎         | 3/100 [00:01<00:40,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [3/100], Moving Avg Loss: 18.9512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   4%|▍         | 4/100 [00:01<00:39,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [4/100], Moving Avg Loss: 18.3995\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   5%|▌         | 5/100 [00:02<00:39,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [5/100], Moving Avg Loss: 20.1250\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   6%|▌         | 6/100 [00:02<00:39,  2.38it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [6/100], Moving Avg Loss: 18.9854\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   7%|▋         | 7/100 [00:02<00:38,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [7/100], Moving Avg Loss: 18.4753\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   8%|▊         | 8/100 [00:03<00:40,  2.27it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [8/100], Moving Avg Loss: 17.0382\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:   9%|▉         | 9/100 [00:03<00:41,  2.20it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [9/100], Moving Avg Loss: 17.7276\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  10%|█         | 10/100 [00:04<00:40,  2.23it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [10/100], Moving Avg Loss: 19.5423\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  11%|█         | 11/100 [00:04<00:39,  2.27it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [11/100], Moving Avg Loss: 18.4347\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  12%|█▏        | 12/100 [00:05<00:40,  2.20it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [12/100], Moving Avg Loss: 17.3114\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  13%|█▎        | 13/100 [00:05<00:39,  2.21it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [13/100], Moving Avg Loss: 16.2870\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  14%|█▍        | 14/100 [00:06<00:37,  2.29it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [14/100], Moving Avg Loss: 21.5170\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  15%|█▌        | 15/100 [00:06<00:36,  2.30it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [15/100], Moving Avg Loss: 21.3559\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  16%|█▌        | 16/100 [00:06<00:36,  2.29it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [16/100], Moving Avg Loss: 19.8276\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  17%|█▋        | 17/100 [00:07<00:35,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [17/100], Moving Avg Loss: 18.9123\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  18%|█▊        | 18/100 [00:07<00:34,  2.38it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [18/100], Moving Avg Loss: 23.7418\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  19%|█▉        | 19/100 [00:08<00:34,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [19/100], Moving Avg Loss: 22.2312\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  20%|██        | 20/100 [00:08<00:34,  2.35it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [20/100], Moving Avg Loss: 21.1758\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  21%|██        | 21/100 [00:09<00:32,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [21/100], Moving Avg Loss: 24.8048\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  22%|██▏       | 22/100 [00:09<00:32,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [22/100], Moving Avg Loss: 27.5316\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  23%|██▎       | 23/100 [00:09<00:32,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [23/100], Moving Avg Loss: 27.4807\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  24%|██▍       | 24/100 [00:10<00:31,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [24/100], Moving Avg Loss: 25.2076\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  25%|██▌       | 25/100 [00:10<00:30,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [25/100], Moving Avg Loss: 23.2897\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  26%|██▌       | 26/100 [00:11<00:30,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [26/100], Moving Avg Loss: 22.1549\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  27%|██▋       | 27/100 [00:11<00:29,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [27/100], Moving Avg Loss: 21.9602\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  28%|██▊       | 28/100 [00:11<00:29,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [28/100], Moving Avg Loss: 23.2106\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  29%|██▉       | 29/100 [00:12<00:30,  2.33it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [29/100], Moving Avg Loss: 21.3036\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  30%|███       | 30/100 [00:12<00:30,  2.31it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [30/100], Moving Avg Loss: 22.1421\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  31%|███       | 31/100 [00:13<00:29,  2.35it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [31/100], Moving Avg Loss: 27.1543\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  32%|███▏      | 32/100 [00:13<00:29,  2.34it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [32/100], Moving Avg Loss: 27.6630\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  33%|███▎      | 33/100 [00:14<00:28,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [33/100], Moving Avg Loss: 25.8453\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  34%|███▍      | 34/100 [00:14<00:27,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [34/100], Moving Avg Loss: 27.6460\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  35%|███▌      | 35/100 [00:14<00:26,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [35/100], Moving Avg Loss: 25.1319\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  36%|███▌      | 36/100 [00:15<00:26,  2.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [36/100], Moving Avg Loss: 25.8555\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  37%|███▋      | 37/100 [00:15<00:25,  2.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [37/100], Moving Avg Loss: 28.9348\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  38%|███▊      | 38/100 [00:16<00:25,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [38/100], Moving Avg Loss: 28.3364\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  39%|███▉      | 39/100 [00:16<00:24,  2.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [39/100], Moving Avg Loss: 26.0808\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  40%|████      | 40/100 [00:16<00:24,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [40/100], Moving Avg Loss: 26.2237\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  41%|████      | 41/100 [00:17<00:24,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [41/100], Moving Avg Loss: 26.2728\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  42%|████▏     | 42/100 [00:17<00:23,  2.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [42/100], Moving Avg Loss: 26.7065\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  43%|████▎     | 43/100 [00:18<00:23,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [43/100], Moving Avg Loss: 24.7438\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  44%|████▍     | 44/100 [00:18<00:23,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [44/100], Moving Avg Loss: 26.6885\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  45%|████▌     | 45/100 [00:18<00:22,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [45/100], Moving Avg Loss: 27.7764\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  46%|████▌     | 46/100 [00:19<00:22,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [46/100], Moving Avg Loss: 25.7708\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  47%|████▋     | 47/100 [00:19<00:22,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [47/100], Moving Avg Loss: 23.6295\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  48%|████▊     | 48/100 [00:20<00:21,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [48/100], Moving Avg Loss: 23.5793\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  49%|████▉     | 49/100 [00:20<00:20,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [49/100], Moving Avg Loss: 22.1319\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  50%|█████     | 50/100 [00:21<00:20,  2.46it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [50/100], Moving Avg Loss: 21.5178\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  51%|█████     | 51/100 [00:21<00:19,  2.48it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [51/100], Moving Avg Loss: 22.2565\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  52%|█████▏    | 52/100 [00:21<00:19,  2.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [52/100], Moving Avg Loss: 24.8366\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  53%|█████▎    | 53/100 [00:22<00:19,  2.35it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [53/100], Moving Avg Loss: 23.3091\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  54%|█████▍    | 54/100 [00:22<00:19,  2.35it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [54/100], Moving Avg Loss: 22.1764\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  55%|█████▌    | 55/100 [00:23<00:19,  2.32it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [55/100], Moving Avg Loss: 22.5117\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  56%|█████▌    | 56/100 [00:23<00:18,  2.34it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [56/100], Moving Avg Loss: 23.7047\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  57%|█████▋    | 57/100 [00:24<00:18,  2.38it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [57/100], Moving Avg Loss: 24.7985\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  58%|█████▊    | 58/100 [00:24<00:17,  2.37it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [58/100], Moving Avg Loss: 25.7531\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  59%|█████▉    | 59/100 [00:24<00:17,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [59/100], Moving Avg Loss: 24.8322\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  60%|██████    | 60/100 [00:25<00:17,  2.32it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [60/100], Moving Avg Loss: 24.5820\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  61%|██████    | 61/100 [00:25<00:16,  2.37it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [61/100], Moving Avg Loss: 29.7474\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  62%|██████▏   | 62/100 [00:26<00:15,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [62/100], Moving Avg Loss: 30.3602\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  63%|██████▎   | 63/100 [00:26<00:15,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [63/100], Moving Avg Loss: 29.7396\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  64%|██████▍   | 64/100 [00:26<00:14,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [64/100], Moving Avg Loss: 27.3900\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  65%|██████▌   | 65/100 [00:27<00:14,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [65/100], Moving Avg Loss: 25.9465\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  66%|██████▌   | 66/100 [00:27<00:13,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [66/100], Moving Avg Loss: 24.0045\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  67%|██████▋   | 67/100 [00:28<00:13,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [67/100], Moving Avg Loss: 22.4123\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  68%|██████▊   | 68/100 [00:28<00:13,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [68/100], Moving Avg Loss: 21.4466\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  69%|██████▉   | 69/100 [00:28<00:12,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [69/100], Moving Avg Loss: 21.5559\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  70%|███████   | 70/100 [00:29<00:12,  2.47it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [70/100], Moving Avg Loss: 20.5609\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  71%|███████   | 71/100 [00:29<00:11,  2.48it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [71/100], Moving Avg Loss: 19.9970\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  72%|███████▏  | 72/100 [00:30<00:11,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [72/100], Moving Avg Loss: 20.6782\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  73%|███████▎  | 73/100 [00:30<00:11,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [73/100], Moving Avg Loss: 23.7722\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  74%|███████▍  | 74/100 [00:31<00:10,  2.40it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [74/100], Moving Avg Loss: 22.6156\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  75%|███████▌  | 75/100 [00:31<00:10,  2.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [75/100], Moving Avg Loss: 27.7204\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  76%|███████▌  | 76/100 [00:31<00:09,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [76/100], Moving Avg Loss: 27.3355\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  77%|███████▋  | 77/100 [00:32<00:09,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [77/100], Moving Avg Loss: 26.1804\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  78%|███████▊  | 78/100 [00:32<00:09,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [78/100], Moving Avg Loss: 25.3216\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  79%|███████▉  | 79/100 [00:33<00:08,  2.43it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [79/100], Moving Avg Loss: 27.5742\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  80%|████████  | 80/100 [00:33<00:08,  2.41it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [80/100], Moving Avg Loss: 27.5931\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  81%|████████  | 81/100 [00:33<00:08,  2.34it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [81/100], Moving Avg Loss: 25.5491\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  82%|████████▏ | 82/100 [00:34<00:07,  2.33it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [82/100], Moving Avg Loss: 24.0114\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  83%|████████▎ | 83/100 [00:34<00:07,  2.33it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [83/100], Moving Avg Loss: 22.3863\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  84%|████████▍ | 84/100 [00:35<00:06,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [84/100], Moving Avg Loss: 23.4298\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  85%|████████▌ | 85/100 [00:35<00:06,  2.34it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [85/100], Moving Avg Loss: 21.6505\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  86%|████████▌ | 86/100 [00:36<00:06,  2.28it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [86/100], Moving Avg Loss: 22.6546\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  87%|████████▋ | 87/100 [00:36<00:05,  2.35it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [87/100], Moving Avg Loss: 21.1156\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  88%|████████▊ | 88/100 [00:36<00:05,  2.38it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [88/100], Moving Avg Loss: 23.9993\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  89%|████████▉ | 89/100 [00:37<00:04,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [89/100], Moving Avg Loss: 24.9765\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  90%|█████████ | 90/100 [00:37<00:04,  2.42it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [90/100], Moving Avg Loss: 28.3430\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  91%|█████████ | 91/100 [00:38<00:03,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [91/100], Moving Avg Loss: 28.5874\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  92%|█████████▏| 92/100 [00:38<00:03,  2.33it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [92/100], Moving Avg Loss: 27.0662\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  93%|█████████▎| 93/100 [00:39<00:03,  2.32it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [93/100], Moving Avg Loss: 29.0707\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  94%|█████████▍| 94/100 [00:39<00:02,  2.23it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [94/100], Moving Avg Loss: 26.7228\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  95%|█████████▌| 95/100 [00:40<00:02,  2.24it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [95/100], Moving Avg Loss: 24.9785\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  96%|█████████▌| 96/100 [00:40<00:01,  2.28it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [96/100], Moving Avg Loss: 28.0284\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  97%|█████████▋| 97/100 [00:40<00:01,  2.32it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [97/100], Moving Avg Loss: 25.9050\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  98%|█████████▊| 98/100 [00:41<00:00,  2.36it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [98/100], Moving Avg Loss: 26.5735\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1:  99%|█████████▉| 99/100 [00:41<00:00,  2.39it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [99/100], Moving Avg Loss: 24.9826\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/1: 100%|██████████| 100/100 [00:42<00:00,  2.38it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch [1/1], Step [100/100], Moving Avg Loss: 23.1838\n",
+      "Average Loss for Epoch 1: 24.4323\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "num_epochs = 1\n",
+    "for epoch in range(num_epochs):\n",
+    "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "    avg_loss = 0.0\n",
+    "\n",
+    "    for idx, item in enumerate(tqdm(dataset, desc=f\"Epoch {epoch + 1}/{num_epochs}\")):\n",
+    "\n",
+    "        images = [prepare_image_detection(img=item['image'], processor=load_processor())]\n",
+    "        images = torch.stack(images, dim=0).to(model.dtype).to(model.device)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(pixel_values=images)\n",
+    "        predicted_boxes = logits_to_bboxes(outputs.logits, item['image'])\n",
+    "        target_boxes = item['bboxes']\n",
+    "\n",
+    "        smooth_l1 = smooth_l1_loss(predicted_boxes, target_boxes)\n",
+    "        iou = iou_loss(predicted_boxes, target_boxes)\n",
+    "        loss = 0.5 * smooth_l1 + 0.5 * iou\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        running_loss += loss.item()\n",
+    "\n",
+    "        # Update moving average of the loss\n",
+    "        avg_loss = 0.9 * avg_loss + 0.1 * loss.item() if idx > 0 else loss.item()\n",
+    "\n",
+    "        # Print moving average loss\n",
+    "        print(f\"Epoch [{epoch + 1}/{num_epochs}], Step [{idx + 1}/{len(dataset)}], Moving Avg Loss: {avg_loss:.4f}\")\n",
+    "\n",
+    "    avg_loss = running_loss / len(dataset)\n",
+    "    writer.add_scalar('Training Loss', avg_loss, epoch + 1)\n",
+    "    print(f\"Average Loss for Epoch {epoch + 1}: {avg_loss:.4f}\")\n",
+    "\n",
+    "    torch.save(model.state_dict(), os.path.join(checkpoint_dir, f\"model_epoch_{epoch + 1}.pth\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint_path = '/data2/ketan/orc/surya-layout-fine-tune/checkpoints/model_epoch_3.pth'  \n",
+    "state_dict = torch.load(checkpoint_path,weights_only=True)\n",
+    "\n",
+    "model.load_state_dict(state_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.to('cpu')\n",
+    "model.save_pretrained(\"fine-tuned-surya-model-layout\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}