{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "959444f6-bb64-49f4-b537-d764680219ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install -U bitsandbytes\n",
    "#!pip install -U transformers\n",
    "#!pip install -U accelerate\n",
    "#!pip install -U peft\n",
    "#!pip install -U trl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "74ca2d22-ee78-4f31-8769-efd3dae9c46c",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!huggingface-cli whoami"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "682a7a96-c5b8-4595-b495-99e13b88a844",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "import bitsandbytes as bnb\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import transformers\n",
    "from datasets import Dataset\n",
    "from peft import LoraConfig, PeftConfig\n",
    "from trl import SFTTrainer\n",
    "from trl import setup_chat_format\n",
    "from transformers import (AutoModelForCausalLM, \n",
    "                          AutoTokenizer, \n",
    "                          BitsAndBytesConfig, \n",
    "                          TrainingArguments, \n",
    "                          pipeline, \n",
    "                          logging)\n",
    "from sklearn.metrics import (accuracy_score, \n",
    "                             classification_report, \n",
    "                             confusion_matrix)\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5cbaba37-4deb-4a60-b4a7-dacd3c75c62b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>hate_speech_count</th>\n",
       "      <th>offensive_language_count</th>\n",
       "      <th>neither_count</th>\n",
       "      <th>class</th>\n",
       "      <th>tweet</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>!!! RT @mayasolovely: As a woman you shouldn't...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>!!!!! RT @mleew17: boy dats cold...tyga dwn ba...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   count  hate_speech_count  offensive_language_count  neither_count  class  \\\n",
       "0      3                  0                         0              3      2   \n",
       "1      3                  0                         3              0      1   \n",
       "2      3                  0                         3              0      1   \n",
       "3      3                  0                         2              1      1   \n",
       "4      6                  0                         6              0      1   \n",
       "\n",
       "                                               tweet  \n",
       "0  !!! RT @mayasolovely: As a woman you shouldn't...  \n",
       "1  !!!!! RT @mleew17: boy dats cold...tyga dwn ba...  \n",
       "2  !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...  \n",
       "3  !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...  \n",
       "4  !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "df = pd.read_parquet(\"hf://datasets/tdavidson/hate_speech_offensive/data/train-00000-of-00001.parquet\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "90b89c75-ab77-42ee-b50c-466d6cc9de96",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_11705/3716630465.py:2: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[1 1 1 ... 1 1 2]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\n",
      "  df.loc[:,'label'] = df.loc[:,'label'].replace(0,'Hate')\n"
     ]
    }
   ],
   "source": [
    "df = df.rename(columns={\"class\": \"label\",\"tweet\": \"text\"}).sample(frac=1, random_state=85).reset_index(drop=True).head(3000)\n",
    "df.loc[:,'label'] = df.loc[:,'label'].replace(0,'Hate')\n",
    "df.loc[:,'label'] = df.loc[:,'label'].replace(1,'Offensive')\n",
    "df.loc[:,'label'] = df.loc[:,'label'].replace(2,'Normal')\n",
    "# Split the DataFrame\n",
    "train_size = 0.8\n",
    "eval_size = 0.1\n",
    "\n",
    "# Calculate sizes\n",
    "train_end = int(train_size * len(df))\n",
    "eval_end = train_end + int(eval_size * len(df))\n",
    "\n",
    "# Split the data\n",
    "X_train = df[:train_end]\n",
    "X_eval = df[train_end:eval_end]\n",
    "X_test = df[eval_end:]\n",
    "# Define the prompt generation functions\n",
    "def generate_prompt(data_point):\n",
    "    return f\"\"\"\n",
    "            Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
    "text: {data_point[\"text\"]}\n",
    "label: {data_point[\"label\"]}\"\"\".strip()\n",
    "\n",
    "def generate_test_prompt(data_point):\n",
    "    return f\"\"\"\n",
    "            Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
    "            text: {data_point[\"text\"]}\n",
    "            label: \"\"\".strip()\n",
    "\n",
    "# Generate prompts for training and evaluation data\n",
    "X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)\n",
    "X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)\n",
    "\n",
    "# Generate test prompts and extract true labels\n",
    "y_true = X_test.loc[:,'label']\n",
    "X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=[\"text\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "25859362-af09-43fd-93ef-b981159a7dba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "label\n",
       "Offensive    1877\n",
       "Normal        391\n",
       "Hate          132\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.label.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "12010dd7-0885-4296-9d8e-dae9d2addc30",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = Dataset.from_pandas(X_train[[\"text\"]])\n",
    "eval_data = Dataset.from_pandas(X_eval[[\"text\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e8d7feef-d881-4e90-86d6-a4b30ac040f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\\ntext: @kieffer_jason bitch u a thot oh fake ass nigga box up hoe u not bout nothing\\nlabel: Offensive'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['text'][2000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6e8f818f-2c29-4637-93d0-750144b3f599",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b36e9ec950eb41909ae7159673c5a774",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "base_model_name = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
    "\n",
    "bnb_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True,\n",
    "    bnb_4bit_use_double_quant=False,\n",
    "    bnb_4bit_quant_type=\"nf4\",\n",
    "    bnb_4bit_compute_dtype=\"float16\",\n",
    ")\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    base_model_name,\n",
    "    device_map=\"auto\",\n",
    "    torch_dtype=\"float16\",\n",
    "    quantization_config=bnb_config, \n",
    ")\n",
    "\n",
    "model.config.use_cache = False\n",
    "model.config.pretraining_tp = 1\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(base_model_name)\n",
    "\n",
    "tokenizer.pad_token_id = tokenizer.eos_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e10cba81-0a98-46bc-b4ae-19c89891af67",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0\n",
      "  0%|          | 1/300 [00:00<01:37,  3.06it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  1%|          | 3/300 [00:00<00:41,  7.20it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  2%|▏         | 5/300 [00:00<00:30,  9.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  2%|▏         | 7/300 [00:00<00:25, 11.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  3%|▎         | 9/300 [00:00<00:23, 12.39it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  4%|▎         | 11/300 [00:01<00:22, 12.92it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  4%|▍         | 13/300 [00:01<00:21, 13.45it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  5%|▌         | 15/300 [00:01<00:20, 13.64it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  6%|▌         | 17/300 [00:01<00:20, 13.89it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  6%|▋         | 19/300 [00:01<00:19, 14.21it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  7%|▋         | 21/300 [00:01<00:19, 14.03it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  8%|▊         | 23/300 [00:01<00:19, 14.19it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  8%|▊         | 25/300 [00:01<00:19, 14.38it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  9%|▉         | 27/300 [00:02<00:18, 14.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 10%|▉         | 29/300 [00:02<00:18, 14.71it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 10%|█         | 31/300 [00:02<00:18, 14.82it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 11%|█         | 33/300 [00:02<00:18, 14.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 12%|█▏        | 35/300 [00:02<00:17, 14.77it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 12%|█▏        | 37/300 [00:02<00:18, 14.25it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 13%|█▎        | 39/300 [00:02<00:18, 14.28it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 14%|█▎        | 41/300 [00:03<00:17, 14.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 14%|█▍        | 43/300 [00:03<00:18, 14.26it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 15%|█▌        | 45/300 [00:03<00:17, 14.31it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 16%|█▌        | 47/300 [00:03<00:17, 14.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 16%|█▋        | 49/300 [00:03<00:17, 14.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 17%|█▋        | 51/300 [00:03<00:17, 14.61it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 18%|█▊        | 53/300 [00:03<00:16, 14.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 18%|█▊        | 55/300 [00:04<00:16, 14.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 19%|█▉        | 57/300 [00:04<00:16, 14.33it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 20%|█▉        | 59/300 [00:04<00:16, 14.29it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 20%|██        | 61/300 [00:04<00:16, 14.28it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 21%|██        | 63/300 [00:04<00:16, 14.11it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 22%|██▏       | 65/300 [00:04<00:16, 14.31it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 22%|██▏       | 67/300 [00:04<00:16, 14.29it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 23%|██▎       | 69/300 [00:05<00:15, 14.48it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 24%|██▎       | 71/300 [00:05<00:15, 14.62it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 24%|██▍       | 73/300 [00:05<00:15, 14.48it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 25%|██▌       | 75/300 [00:05<00:15, 14.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 26%|██▌       | 77/300 [00:05<00:15, 14.71it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 26%|██▋       | 79/300 [00:05<00:15, 14.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 27%|██▋       | 81/300 [00:05<00:15, 14.56it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 28%|██▊       | 83/300 [00:05<00:14, 14.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 28%|██▊       | 85/300 [00:06<00:14, 14.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 29%|██▉       | 87/300 [00:06<00:14, 14.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 30%|██▉       | 89/300 [00:06<00:14, 14.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 30%|███       | 91/300 [00:06<00:14, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 31%|███       | 93/300 [00:06<00:14, 14.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 32%|███▏      | 95/300 [00:06<00:13, 14.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 32%|███▏      | 97/300 [00:06<00:13, 14.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 33%|███▎      | 99/300 [00:07<00:13, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 34%|███▎      | 101/300 [00:07<00:13, 14.52it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 34%|███▍      | 103/300 [00:07<00:13, 14.37it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 35%|███▌      | 105/300 [00:07<00:13, 14.52it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 36%|███▌      | 107/300 [00:07<00:13, 14.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 36%|███▋      | 109/300 [00:07<00:13, 14.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 37%|███▋      | 111/300 [00:07<00:12, 14.71it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 38%|███▊      | 113/300 [00:08<00:12, 14.53it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 38%|███▊      | 115/300 [00:08<00:12, 14.45it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 39%|███▉      | 117/300 [00:08<00:12, 14.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 40%|███▉      | 119/300 [00:08<00:12, 14.66it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 40%|████      | 121/300 [00:08<00:12, 14.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 41%|████      | 123/300 [00:08<00:11, 14.77it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 42%|████▏     | 125/300 [00:08<00:12, 14.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 42%|████▏     | 127/300 [00:09<00:11, 14.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 43%|████▎     | 129/300 [00:09<00:11, 14.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 44%|████▎     | 131/300 [00:09<00:11, 14.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 44%|████▍     | 133/300 [00:09<00:11, 14.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 45%|████▌     | 135/300 [00:09<00:11, 14.31it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 46%|████▌     | 137/300 [00:09<00:11, 14.31it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 46%|████▋     | 139/300 [00:09<00:11, 14.29it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 47%|████▋     | 141/300 [00:09<00:11, 14.23it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 48%|████▊     | 143/300 [00:10<00:10, 14.35it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 48%|████▊     | 145/300 [00:10<00:10, 14.26it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 49%|████▉     | 147/300 [00:10<00:10, 14.39it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 50%|████▉     | 149/300 [00:10<00:10, 14.60it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 50%|█████     | 151/300 [00:10<00:10, 14.46it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 51%|█████     | 153/300 [00:10<00:10, 14.18it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 52%|█████▏    | 155/300 [00:10<00:10, 14.35it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 52%|█████▏    | 157/300 [00:11<00:09, 14.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 53%|█████▎    | 159/300 [00:11<00:09, 14.55it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 54%|█████▎    | 161/300 [00:11<00:09, 14.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 54%|█████▍    | 163/300 [00:11<00:09, 14.85it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 55%|█████▌    | 165/300 [00:11<00:09, 14.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 56%|█████▌    | 167/300 [00:11<00:09, 14.29it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 56%|█████▋    | 169/300 [00:11<00:09, 14.13it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 57%|█████▋    | 171/300 [00:12<00:09, 14.19it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 58%|█████▊    | 173/300 [00:12<00:08, 14.47it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 58%|█████▊    | 175/300 [00:12<00:08, 14.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 59%|█████▉    | 177/300 [00:12<00:08, 14.60it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 60%|█████▉    | 179/300 [00:12<00:08, 14.79it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 60%|██████    | 181/300 [00:12<00:07, 14.94it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 61%|██████    | 183/300 [00:12<00:07, 15.02it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 62%|██████▏   | 185/300 [00:12<00:07, 15.02it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 62%|██████▏   | 187/300 [00:13<00:07, 14.85it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 63%|██████▎   | 189/300 [00:13<00:07, 14.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 64%|██████▎   | 191/300 [00:13<00:07, 14.45it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 64%|██████▍   | 193/300 [00:13<00:07, 14.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 65%|██████▌   | 195/300 [00:13<00:07, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 66%|██████▌   | 197/300 [00:13<00:06, 14.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 66%|██████▋   | 199/300 [00:13<00:06, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 67%|██████▋   | 201/300 [00:14<00:06, 14.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 68%|██████▊   | 203/300 [00:14<00:06, 14.98it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 68%|██████▊   | 205/300 [00:14<00:06, 15.06it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 69%|██████▉   | 207/300 [00:14<00:06, 14.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 70%|██████▉   | 209/300 [00:14<00:06, 14.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 70%|███████   | 211/300 [00:14<00:05, 14.84it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 71%|███████   | 213/300 [00:14<00:05, 14.90it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 72%|███████▏  | 215/300 [00:15<00:05, 14.95it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 72%|███████▏  | 217/300 [00:15<00:05, 14.83it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 73%|███████▎  | 219/300 [00:15<00:05, 14.90it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 74%|███████▎  | 221/300 [00:15<00:05, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 74%|███████▍  | 223/300 [00:15<00:05, 14.61it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 75%|███████▌  | 225/300 [00:15<00:05, 14.24it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 76%|███████▌  | 227/300 [00:15<00:05, 14.44it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 76%|███████▋  | 229/300 [00:15<00:04, 14.33it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 77%|███████▋  | 231/300 [00:16<00:04, 14.27it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 78%|███████▊  | 233/300 [00:16<00:04, 14.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 78%|███████▊  | 235/300 [00:16<00:04, 14.64it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 79%|███████▉  | 237/300 [00:16<00:04, 14.80it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 80%|███████▉  | 239/300 [00:16<00:04, 14.91it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 80%|████████  | 241/300 [00:16<00:03, 14.97it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 81%|████████  | 243/300 [00:16<00:03, 15.04it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 82%|████████▏ | 245/300 [00:17<00:03, 15.03it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 82%|████████▏ | 247/300 [00:17<00:03, 14.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 83%|████████▎ | 249/300 [00:17<00:03, 14.51it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 84%|████████▎ | 251/300 [00:17<00:03, 14.42it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 84%|████████▍ | 253/300 [00:17<00:03, 14.15it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 85%|████████▌ | 255/300 [00:17<00:03, 14.43it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 86%|████████▌ | 257/300 [00:17<00:03, 14.17it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 86%|████████▋ | 259/300 [00:18<00:02, 14.17it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 87%|████████▋ | 261/300 [00:18<00:02, 14.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 88%|████████▊ | 263/300 [00:18<00:02, 14.31it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 88%|████████▊ | 265/300 [00:18<00:02, 14.22it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 89%|████████▉ | 267/300 [00:18<00:02, 14.22it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 90%|████████▉ | 269/300 [00:18<00:02, 14.42it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 90%|█████████ | 271/300 [00:18<00:02, 14.38it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 91%|█████████ | 273/300 [00:19<00:01, 14.56it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 92%|█████████▏| 275/300 [00:19<00:01, 14.70it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 92%|█████████▏| 277/300 [00:19<00:01, 14.82it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 93%|█████████▎| 279/300 [00:19<00:01, 14.90it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 94%|█████████▎| 281/300 [00:19<00:01, 14.93it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 94%|█████████▍| 283/300 [00:19<00:01, 14.96it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 95%|█████████▌| 285/300 [00:19<00:01, 14.97it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 96%|█████████▌| 287/300 [00:19<00:00, 14.92it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 96%|█████████▋| 289/300 [00:20<00:00, 14.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 97%|█████████▋| 291/300 [00:20<00:00, 14.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 98%|█████████▊| 293/300 [00:20<00:00, 14.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 98%|█████████▊| 295/300 [00:20<00:00, 14.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 99%|█████████▉| 297/300 [00:20<00:00, 14.75it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "100%|█████████▉| 299/300 [00:20<00:00, 14.81it/s]Device set to use cuda:0\n",
      "100%|██████████| 300/300 [00:20<00:00, 14.40it/s]\n"
     ]
    }
   ],
   "source": [
    "def predict(test, model, tokenizer):\n",
    "    y_pred = []\n",
    "    labels = [\"Hate\", \"Offensive\", \"Normal\"]\n",
    "    \n",
    "    for i in tqdm(range(len(test))):\n",
    "        prompt = test.iloc[i][\"text\"]\n",
    "        pipe = pipeline(task=\"text-generation\", \n",
    "                        model=model, \n",
    "                        tokenizer=tokenizer, \n",
    "                        max_new_tokens=2, \n",
    "                        temperature=0.1)\n",
    "        \n",
    "        result = pipe(prompt)\n",
    "        answer = result[0]['generated_text'].split(\"label:\")[-1].strip()\n",
    "        \n",
    "        # Determine the predicted category\n",
    "        for label in labels:\n",
    "            if label.lower() in answer.lower():\n",
    "                y_pred.append(label)\n",
    "                break\n",
    "        else:\n",
    "            y_pred.append(\"none\")\n",
    "    \n",
    "    return y_pred\n",
    "\n",
    "y_pred = predict(X_test, model, tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "de9a621b-49af-47ce-a8ca-aa74f8da219b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.220\n",
      "Accuracy for label Hate: 0.588\n",
      "Accuracy for label Offensive: 0.112\n",
      "Accuracy for label Normal: 0.600\n",
      "\n",
      "Classification Report:\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "        Hate       0.07      0.59      0.12        17\n",
      "   Offensive       0.76      0.11      0.19       233\n",
      "      Normal       0.65      0.60      0.62        50\n",
      "\n",
      "   micro avg       0.29      0.22      0.25       300\n",
      "   macro avg       0.49      0.43      0.31       300\n",
      "weighted avg       0.71      0.22      0.26       300\n",
      "\n",
      "\n",
      "Confusion Matrix:\n",
      "[[ 10   1   1]\n",
      " [133  26  15]\n",
      " [  8   7  30]]\n"
     ]
    }
   ],
   "source": [
    "def evaluate(y_true, y_pred):\n",
    "    labels = [\"Hate\", \"Offensive\", \"Normal\"]\n",
    "    mapping = {label: idx for idx, label in enumerate(labels)}\n",
    "    \n",
    "    def map_func(x):\n",
    "        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data\n",
    "    \n",
    "    y_true_mapped = np.vectorize(map_func)(y_true)\n",
    "    y_pred_mapped = np.vectorize(map_func)(y_pred)\n",
    "    \n",
    "    # Calculate accuracy\n",
    "    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)\n",
    "    print(f'Accuracy: {accuracy:.3f}')\n",
    "    \n",
    "    # Generate accuracy report\n",
    "    unique_labels = set(y_true_mapped)  # Get unique labels\n",
    "    \n",
    "    for label in unique_labels:\n",
    "        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]\n",
    "        label_y_true = [y_true_mapped[i] for i in label_indices]\n",
    "        label_y_pred = [y_pred_mapped[i] for i in label_indices]\n",
    "        label_accuracy = accuracy_score(label_y_true, label_y_pred)\n",
    "        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')\n",
    "        \n",
    "    # Generate classification report\n",
    "    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))\n",
    "    print('\\nClassification Report:')\n",
    "    print(class_report)\n",
    "    \n",
    "    # Generate confusion matrix\n",
    "    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))\n",
    "    print('\\nConfusion Matrix:')\n",
    "    print(conf_matrix)\n",
    "\n",
    "evaluate(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e5c44ef1-377b-4223-899a-6ca2a0e3d537",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['up_proj', 'q_proj', 'gate_proj', 'o_proj', 'down_proj', 'k_proj', 'v_proj']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import bitsandbytes as bnb\n",
    "\n",
    "def find_all_linear_names(model):\n",
    "    cls = bnb.nn.Linear4bit\n",
    "    lora_module_names = set()\n",
    "    for name, module in model.named_modules():\n",
    "        if isinstance(module, cls):\n",
    "            names = name.split('.')\n",
    "            lora_module_names.add(names[0] if len(names) == 1 else names[-1])\n",
    "    if 'lm_head' in lora_module_names:  # needed for 16 bit\n",
    "        lora_module_names.remove('lm_head')\n",
    "    return list(lora_module_names)\n",
    "modules = find_all_linear_names(model)\n",
    "modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e9820a30-e05c-46fd-b53e-b6dbe2078820",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cf08288afc584612b257dae370b981f5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2400 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6992092e12594739ad1135f2a8b2b99b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/300 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#!pip install wandb\n",
    "output_dir=\"/home/marco/llama-3.2-3B-instruct-offensive-classification-2\"\n",
    "\n",
    "peft_config = LoraConfig(\n",
    "   lora_alpha=16,\n",
    "    lora_dropout=0,\n",
    "    r=64,\n",
    "    bias=\"none\",\n",
    "   task_type=\"CAUSAL_LM\",\n",
    "    target_modules=modules,\n",
    ")\n",
    "\n",
    "training_arguments = TrainingArguments(\n",
    "    output_dir=output_dir,                    # directory to save and repository id\n",
    "    num_train_epochs=1,                       # number of training epochs\n",
    "    per_device_train_batch_size=1,            # batch size per device during training\n",
    "    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass\n",
    "    gradient_checkpointing=True,              # use gradient checkpointing to save memory\n",
    "    optim=\"paged_adamw_32bit\",\n",
    "    logging_steps=1,                         \n",
    "   learning_rate=2e-4,                       # learning rate, based on QLoRA paper\n",
    "    weight_decay=0.001,\n",
    "    fp16=True,\n",
    "    bf16=False,\n",
    "      max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper\n",
    "    max_steps=-1,\n",
    "    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper\n",
    "    group_by_length=False,\n",
    "    lr_scheduler_type=\"cosine\",               # use cosine learning rate scheduler\n",
    "    report_to=\"wandb\",                  # report metrics to w&b\n",
    "    eval_strategy=\"steps\",              # save checkpoint every epoch\n",
    "    eval_steps = 0.2\n",
    ")\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "   model=model,\n",
    "    args=training_arguments,\n",
    "    train_dataset=train_data,\n",
    "    eval_dataset=eval_data,\n",
    "    peft_config=peft_config,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5f2f6464-d19c-4e0f-8189-742622d91c7b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmarcoor\u001b[0m (\u001b[33mmarcoor-universit-t-klagenfurt\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.19.1"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/marco/wandb/run-20250112_154819-ixq2tt7q</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface/runs/ixq2tt7q' target=\"_blank\">/home/marco/llama-3.2-3B-instruct-offensive-classification-2</a></strong> to <a href='https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface' target=\"_blank\">https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface/runs/ixq2tt7q' target=\"_blank\">https://wandb.ai/marcoor-universit-t-klagenfurt/huggingface/runs/ixq2tt7q</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:632: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
      "  return fn(*args, **kwargs)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='300' max='300' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [300/300 09:53, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>60</td>\n",
       "      <td>2.011600</td>\n",
       "      <td>2.002830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>120</td>\n",
       "      <td>1.853900</td>\n",
       "      <td>1.961909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>180</td>\n",
       "      <td>2.088800</td>\n",
       "      <td>1.939240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>240</td>\n",
       "      <td>1.923100</td>\n",
       "      <td>1.927367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>2.089000</td>\n",
       "      <td>1.924164</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/peft/utils/other.py:716: UserWarning: Unable to fetch remote file due to the following error (ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), '(Request ID: ebbfa21b-df77-43ef-bddd-b95ec07a63f8)') - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-3B-Instruct.\n",
      "  warnings.warn(\n",
      "/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/peft/utils/save_and_load.py:246: UserWarning: Could not find a config file in meta-llama/Llama-3.2-3B-Instruct - will assume that the vocabulary was not modified.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=300, training_loss=2.0926066251595814, metrics={'train_runtime': 596.9063, 'train_samples_per_second': 4.021, 'train_steps_per_second': 0.503, 'total_flos': 2216727844706304.0, 'train_loss': 2.0926066251595814, 'epoch': 1.0})"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d50bf803-dedd-44bf-8016-6d80b5726803",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',\n",
       " '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',\n",
       " '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.save_model(output_dir)\n",
    "tokenizer.save_pretrained(output_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "37e30c0e-dd90-4400-aaab-b3526ac4a7ab",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0\n",
      "  0%|          | 1/300 [00:00<00:32,  9.15it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  1%|          | 3/300 [00:00<00:30,  9.80it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  2%|▏         | 5/300 [00:00<00:28, 10.20it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  2%|▏         | 7/300 [00:00<00:27, 10.48it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  3%|▎         | 9/300 [00:00<00:27, 10.58it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  4%|▎         | 11/300 [00:01<00:27, 10.46it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  4%|▍         | 13/300 [00:01<00:26, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  5%|▌         | 15/300 [00:01<00:26, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  6%|▌         | 17/300 [00:01<00:26, 10.83it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  6%|▋         | 19/300 [00:01<00:25, 10.93it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  7%|▋         | 21/300 [00:01<00:25, 10.78it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  8%|▊         | 23/300 [00:02<00:25, 10.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  8%|▊         | 25/300 [00:02<00:25, 10.92it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "  9%|▉         | 27/300 [00:02<00:24, 10.98it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 10%|▉         | 29/300 [00:02<00:24, 11.04it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 10%|█         | 31/300 [00:02<00:24, 11.06it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 11%|█         | 33/300 [00:03<00:24, 10.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 12%|█▏        | 35/300 [00:03<00:24, 10.94it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 12%|█▏        | 37/300 [00:03<00:24, 10.68it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 13%|█▎        | 39/300 [00:03<00:24, 10.77it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 14%|█▎        | 41/300 [00:03<00:23, 10.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 14%|█▍        | 43/300 [00:03<00:23, 10.77it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 15%|█▌        | 45/300 [00:04<00:23, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 16%|█▌        | 47/300 [00:04<00:23, 10.71it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 16%|█▋        | 49/300 [00:04<00:23, 10.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 17%|█▋        | 51/300 [00:04<00:23, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 18%|█▊        | 53/300 [00:04<00:23, 10.70it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 18%|█▊        | 55/300 [00:05<00:22, 10.83it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 19%|█▉        | 57/300 [00:05<00:22, 10.70it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 20%|█▉        | 59/300 [00:05<00:22, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 20%|██        | 61/300 [00:05<00:22, 10.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 21%|██        | 63/300 [00:05<00:23, 10.26it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 22%|██▏       | 65/300 [00:06<00:22, 10.33it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 22%|██▏       | 67/300 [00:06<00:22, 10.39it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 23%|██▎       | 69/300 [00:06<00:21, 10.57it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 24%|██▎       | 71/300 [00:06<00:21, 10.58it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 24%|██▍       | 73/300 [00:06<00:21, 10.46it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 25%|██▌       | 75/300 [00:07<00:21, 10.63it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 26%|██▌       | 77/300 [00:07<00:20, 10.77it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 26%|██▋       | 79/300 [00:07<00:21, 10.52it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 27%|██▋       | 81/300 [00:07<00:20, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 28%|██▊       | 83/300 [00:07<00:20, 10.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 28%|██▊       | 85/300 [00:07<00:19, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 29%|██▉       | 87/300 [00:08<00:20, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 30%|██▉       | 89/300 [00:08<00:19, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 30%|███       | 91/300 [00:08<00:19, 10.70it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 31%|███       | 93/300 [00:08<00:19, 10.46it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 32%|███▏      | 95/300 [00:08<00:19, 10.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 32%|███▏      | 97/300 [00:09<00:18, 10.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 33%|███▎      | 99/300 [00:09<00:18, 10.78it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 34%|███▎      | 101/300 [00:09<00:18, 10.51it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 34%|███▍      | 103/300 [00:09<00:18, 10.37it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 35%|███▌      | 105/300 [00:09<00:18, 10.56it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 36%|███▌      | 107/300 [00:10<00:18, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 36%|███▋      | 109/300 [00:10<00:17, 10.70it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 37%|███▋      | 111/300 [00:10<00:17, 10.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 38%|███▊      | 113/300 [00:10<00:17, 10.55it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 38%|███▊      | 115/300 [00:10<00:17, 10.51it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 39%|███▉      | 117/300 [00:10<00:17, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 40%|███▉      | 119/300 [00:11<00:16, 10.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 40%|████      | 121/300 [00:11<00:16, 10.84it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 41%|████      | 123/300 [00:11<00:16, 10.91it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 42%|████▏     | 125/300 [00:11<00:16, 10.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 42%|████▏     | 127/300 [00:11<00:15, 10.85it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 43%|████▎     | 129/300 [00:12<00:15, 10.93it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 44%|████▎     | 131/300 [00:12<00:15, 11.01it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 44%|████▍     | 133/300 [00:12<00:15, 11.04it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 45%|████▌     | 135/300 [00:12<00:15, 10.86it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 46%|████▌     | 137/300 [00:12<00:14, 10.88it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 46%|████▋     | 139/300 [00:12<00:14, 10.96it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 47%|████▋     | 141/300 [00:13<00:14, 10.81it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 48%|████▊     | 143/300 [00:13<00:14, 10.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 48%|████▊     | 145/300 [00:13<00:14, 10.84it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 49%|████▉     | 147/300 [00:13<00:14, 10.90it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 50%|████▉     | 149/300 [00:13<00:13, 10.94it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 50%|█████     | 151/300 [00:14<00:13, 10.96it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 51%|█████     | 153/300 [00:14<00:13, 10.79it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 52%|█████▏    | 155/300 [00:14<00:13, 10.80it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 52%|█████▏    | 157/300 [00:14<00:13, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 53%|█████▎    | 159/300 [00:14<00:13, 10.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 54%|█████▎    | 161/300 [00:15<00:12, 10.74it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 54%|█████▍    | 163/300 [00:15<00:12, 10.78it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 55%|█████▌    | 165/300 [00:15<00:12, 10.63it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 56%|█████▌    | 167/300 [00:15<00:12, 10.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 56%|█████▋    | 169/300 [00:15<00:12, 10.25it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 57%|█████▋    | 171/300 [00:16<00:12, 10.27it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 58%|█████▊    | 173/300 [00:16<00:12, 10.48it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 58%|█████▊    | 175/300 [00:16<00:11, 10.61it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 59%|█████▉    | 177/300 [00:16<00:11, 10.54it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 60%|█████▉    | 179/300 [00:16<00:11, 10.68it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 60%|██████    | 181/300 [00:16<00:11, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 61%|██████    | 183/300 [00:17<00:10, 10.82it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 62%|██████▏   | 185/300 [00:17<00:10, 10.81it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 62%|██████▏   | 187/300 [00:17<00:10, 10.71it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 63%|██████▎   | 189/300 [00:17<00:10, 10.63it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 64%|██████▎   | 191/300 [00:17<00:10, 10.54it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 64%|██████▍   | 193/300 [00:18<00:10, 10.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 65%|██████▌   | 195/300 [00:18<00:09, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 66%|██████▌   | 197/300 [00:18<00:09, 10.86it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 66%|██████▋   | 199/300 [00:18<00:09, 10.75it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 67%|██████▋   | 201/300 [00:18<00:09, 10.82it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 68%|██████▊   | 203/300 [00:18<00:08, 10.87it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 68%|██████▊   | 205/300 [00:19<00:08, 10.88it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 69%|██████▉   | 207/300 [00:19<00:08, 10.73it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 70%|██████▉   | 209/300 [00:19<00:08, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 70%|███████   | 211/300 [00:19<00:08, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 71%|███████   | 213/300 [00:19<00:08, 10.79it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 72%|███████▏  | 215/300 [00:20<00:07, 10.75it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 72%|███████▏  | 217/300 [00:20<00:07, 10.76it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 73%|███████▎  | 219/300 [00:20<00:07, 10.79it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 74%|███████▎  | 221/300 [00:20<00:07, 10.67it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 74%|███████▍  | 223/300 [00:20<00:07, 10.58it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 75%|███████▌  | 225/300 [00:21<00:07, 10.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 76%|███████▌  | 227/300 [00:21<00:06, 10.54it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 76%|███████▋  | 229/300 [00:21<00:06, 10.41it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 77%|███████▋  | 231/300 [00:21<00:06, 10.22it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 78%|███████▊  | 233/300 [00:21<00:06, 10.40it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 78%|███████▊  | 235/300 [00:21<00:06, 10.56it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 79%|███████▉  | 237/300 [00:22<00:05, 10.65it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 80%|███████▉  | 239/300 [00:22<00:05, 10.72it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 80%|████████  | 241/300 [00:22<00:05, 10.75it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 81%|████████  | 243/300 [00:22<00:05, 10.85it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 82%|████████▏ | 245/300 [00:22<00:05, 10.90it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 82%|████████▏ | 247/300 [00:23<00:04, 10.69it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 83%|████████▎ | 249/300 [00:23<00:04, 10.53it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 84%|████████▎ | 251/300 [00:23<00:04, 10.45it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 84%|████████▍ | 253/300 [00:23<00:04, 10.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 85%|████████▌ | 255/300 [00:23<00:04, 10.52it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 86%|████████▌ | 257/300 [00:24<00:04, 10.18it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 86%|████████▋ | 259/300 [00:24<00:04, 10.05it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 87%|████████▋ | 261/300 [00:24<00:03, 10.24it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 88%|████████▊ | 263/300 [00:24<00:03, 10.28it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 88%|████████▊ | 265/300 [00:24<00:03, 10.21it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 89%|████████▉ | 267/300 [00:25<00:03, 10.18it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 90%|████████▉ | 269/300 [00:25<00:02, 10.35it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 90%|█████████ | 271/300 [00:25<00:02, 10.28it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 91%|█████████ | 273/300 [00:25<00:02, 10.36it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 92%|█████████▏| 275/300 [00:25<00:02, 10.37it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 92%|█████████▏| 277/300 [00:26<00:02, 10.51it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 93%|█████████▎| 279/300 [00:26<00:01, 10.59it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 94%|█████████▎| 281/300 [00:26<00:01, 10.53it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 94%|█████████▍| 283/300 [00:26<00:01, 10.46it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 95%|█████████▌| 285/300 [00:26<00:01, 10.43it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 96%|█████████▌| 287/300 [00:26<00:01, 10.44it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 96%|█████████▋| 289/300 [00:27<00:01, 10.35it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 97%|█████████▋| 291/300 [00:27<00:00, 10.49it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 98%|█████████▊| 293/300 [00:27<00:00, 10.44it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 98%|█████████▊| 295/300 [00:27<00:00, 10.52it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      " 99%|█████████▉| 297/300 [00:27<00:00, 10.53it/s]Device set to use cuda:0\n",
      "Device set to use cuda:0\n",
      "100%|█████████▉| 299/300 [00:28<00:00, 10.48it/s]Device set to use cuda:0\n",
      "100%|██████████| 300/300 [00:28<00:00, 10.63it/s]\n"
     ]
    }
   ],
   "source": [
    "y_pred = predict(X_test, model, tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3449c759-b590-4455-80d7-a1535e1b8f9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.903\n",
      "Accuracy for label Hate: 0.176\n",
      "Accuracy for label Offensive: 0.961\n",
      "Accuracy for label Normal: 0.880\n",
      "\n",
      "Classification Report:\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "        Hate       0.43      0.18      0.25        17\n",
      "   Offensive       0.92      0.96      0.94       233\n",
      "      Normal       0.88      0.88      0.88        50\n",
      "\n",
      "    accuracy                           0.90       300\n",
      "   macro avg       0.74      0.67      0.69       300\n",
      "weighted avg       0.89      0.90      0.89       300\n",
      "\n",
      "\n",
      "Confusion Matrix:\n",
      "[[  3  13   1]\n",
      " [  4 224   5]\n",
      " [  0   6  44]]\n"
     ]
    }
   ],
   "source": [
    "evaluate(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "591f2b8c-3cb1-448d-b70a-e26b0dcfbafd",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
    "fine_tuned_model = \"/home/marco/llama-3.2-3B-instruct-offensive-classification-2\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "64a3591f-8c98-41f9-8326-a9ed7d0da461",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "60c6531a470347dfb3702157f8c92ee7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
    "from peft import PeftModel\n",
    "import torch\n",
    "\n",
    "\n",
    "# Reload tokenizer and model\n",
    "tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
    "\n",
    "base_model_reload = AutoModelForCausalLM.from_pretrained(\n",
    "        base_model,\n",
    "        return_dict=True,\n",
    "        low_cpu_mem_usage=True,\n",
    "        torch_dtype=torch.float16,\n",
    "        device_map=\"auto\",\n",
    "        trust_remote_code=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "37e19428-9786-429f-b119-61ae7b7355ab",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/accelerate/utils/modeling.py:1593: UserWarning: Current model requires 7424 bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)\n",
    "model = model.merge_and_unload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "9e448406-60a1-4c76-a827-0284f859843e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Device set to use cpu\n",
      "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Offensive\n"
     ]
    }
   ],
   "source": [
    "text = \"You are an asshole!\"\n",
    "prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
    "text: {text}\n",
    "label: \"\"\".strip()\n",
    "\n",
    "pipe = pipeline(\n",
    "    \"text-generation\",\n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map=\"auto\",\n",
    ")\n",
    "\n",
    "outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
    "print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "8f479001-338c-4a06-b4e5-6d874a6d9b30",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Device set to use cpu\n",
      "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Normal\n"
     ]
    }
   ],
   "source": [
    "text = \"I walk in the park!\"\n",
    "prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
    "text: {text}\n",
    "label: \"\"\".strip()\n",
    "\n",
    "pipe = pipeline(\n",
    "    \"text-generation\",\n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map=\"auto\",\n",
    ")\n",
    "\n",
    "outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
    "print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "b182f963-ce02-465a-87dd-312fe782451d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Device set to use cpu\n",
      "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hate Speech\n"
     ]
    }
   ],
   "source": [
    "text = \"Jews are conspiratorial, devious, treacherous, sadistic, child killers, and subversive\"\n",
    "prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
    "text: {text}\n",
    "label: \"\"\".strip()\n",
    "pipe = pipeline(\n",
    "    \"text-generation\",\n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map=\"auto\",\n",
    ")\n",
    "\n",
    "outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
    "print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "dfb5394b-8be3-4e25-9417-165f08ff0fad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',\n",
       " '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',\n",
       " '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_dir = \"/home/marco/llama-3.2-3B-instruct-offensive-classification-2\"\n",
    "model.save_pretrained(model_dir)\n",
    "tokenizer.save_pretrained(model_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "aa0ffb80-4d69-4099-b279-712c3dfac2fb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification/commit/8437c36e0434971361a08296877c986cc68ee524', commit_message='Upload LlamaForCausalLM', commit_description='', oid='8437c36e0434971361a08296877c986cc68ee524', pr_url=None, repo_url=RepoUrl('https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification', endpoint='https://huggingface.co', repo_type='model', repo_id='marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification'), pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.push_to_hub(\"marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification\",use_temp_dir=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d94aa30-c59d-4575-a0b6-1d6c87f1087e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}