"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:632: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" return fn(*args, **kwargs)\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [300/300 09:53, Epoch 1/1]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 60 | \n",
" 2.011600 | \n",
" 2.002830 | \n",
"
\n",
" \n",
" 120 | \n",
" 1.853900 | \n",
" 1.961909 | \n",
"
\n",
" \n",
" 180 | \n",
" 2.088800 | \n",
" 1.939240 | \n",
"
\n",
" \n",
" 240 | \n",
" 1.923100 | \n",
" 1.927367 | \n",
"
\n",
" \n",
" 300 | \n",
" 2.089000 | \n",
" 1.924164 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/peft/utils/other.py:716: UserWarning: Unable to fetch remote file due to the following error (ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), '(Request ID: ebbfa21b-df77-43ef-bddd-b95ec07a63f8)') - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-3B-Instruct.\n",
" warnings.warn(\n",
"/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/peft/utils/save_and_load.py:246: UserWarning: Could not find a config file in meta-llama/Llama-3.2-3B-Instruct - will assume that the vocabulary was not modified.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=300, training_loss=2.0926066251595814, metrics={'train_runtime': 596.9063, 'train_samples_per_second': 4.021, 'train_steps_per_second': 0.503, 'total_flos': 2216727844706304.0, 'train_loss': 2.0926066251595814, 'epoch': 1.0})"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d50bf803-dedd-44bf-8016-6d80b5726803",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',\n",
" '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',\n",
" '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer.save_model(output_dir)\n",
"tokenizer.save_pretrained(output_dir)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "37e30c0e-dd90-4400-aaab-b3526ac4a7ab",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/300 [00:00, ?it/s]Device set to use cuda:0\n",
" 0%| | 1/300 [00:00<00:32, 9.15it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 1%| | 3/300 [00:00<00:30, 9.80it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 2%|▏ | 5/300 [00:00<00:28, 10.20it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 2%|▏ | 7/300 [00:00<00:27, 10.48it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 3%|▎ | 9/300 [00:00<00:27, 10.58it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 4%|▎ | 11/300 [00:01<00:27, 10.46it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 4%|▍ | 13/300 [00:01<00:26, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 5%|▌ | 15/300 [00:01<00:26, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 6%|▌ | 17/300 [00:01<00:26, 10.83it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 6%|▋ | 19/300 [00:01<00:25, 10.93it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 7%|▋ | 21/300 [00:01<00:25, 10.78it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 8%|▊ | 23/300 [00:02<00:25, 10.87it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 8%|▊ | 25/300 [00:02<00:25, 10.92it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 9%|▉ | 27/300 [00:02<00:24, 10.98it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 10%|▉ | 29/300 [00:02<00:24, 11.04it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 10%|█ | 31/300 [00:02<00:24, 11.06it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 11%|█ | 33/300 [00:03<00:24, 10.87it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 12%|█▏ | 35/300 [00:03<00:24, 10.94it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 12%|█▏ | 37/300 [00:03<00:24, 10.68it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 13%|█▎ | 39/300 [00:03<00:24, 10.77it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 14%|█▎ | 41/300 [00:03<00:23, 10.87it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 14%|█▍ | 43/300 [00:03<00:23, 10.77it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 15%|█▌ | 45/300 [00:04<00:23, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 16%|█▌ | 47/300 [00:04<00:23, 10.71it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 16%|█▋ | 49/300 [00:04<00:23, 10.67it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 17%|█▋ | 51/300 [00:04<00:23, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 18%|█▊ | 53/300 [00:04<00:23, 10.70it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 18%|█▊ | 55/300 [00:05<00:22, 10.83it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 19%|█▉ | 57/300 [00:05<00:22, 10.70it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 20%|█▉ | 59/300 [00:05<00:22, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 20%|██ | 61/300 [00:05<00:22, 10.57it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 21%|██ | 63/300 [00:05<00:23, 10.26it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 22%|██▏ | 65/300 [00:06<00:22, 10.33it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 22%|██▏ | 67/300 [00:06<00:22, 10.39it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 23%|██▎ | 69/300 [00:06<00:21, 10.57it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 24%|██▎ | 71/300 [00:06<00:21, 10.58it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 24%|██▍ | 73/300 [00:06<00:21, 10.46it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 25%|██▌ | 75/300 [00:07<00:21, 10.63it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 26%|██▌ | 77/300 [00:07<00:20, 10.77it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 26%|██▋ | 79/300 [00:07<00:21, 10.52it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 27%|██▋ | 81/300 [00:07<00:20, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 28%|██▊ | 83/300 [00:07<00:20, 10.74it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 28%|██▊ | 85/300 [00:07<00:19, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 29%|██▉ | 87/300 [00:08<00:20, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 30%|██▉ | 89/300 [00:08<00:19, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 30%|███ | 91/300 [00:08<00:19, 10.70it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 31%|███ | 93/300 [00:08<00:19, 10.46it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 32%|███▏ | 95/300 [00:08<00:19, 10.59it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 32%|███▏ | 97/300 [00:09<00:18, 10.72it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 33%|███▎ | 99/300 [00:09<00:18, 10.78it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 34%|███▎ | 101/300 [00:09<00:18, 10.51it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 34%|███▍ | 103/300 [00:09<00:18, 10.37it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 35%|███▌ | 105/300 [00:09<00:18, 10.56it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 36%|███▌ | 107/300 [00:10<00:18, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 36%|███▋ | 109/300 [00:10<00:17, 10.70it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 37%|███▋ | 111/300 [00:10<00:17, 10.67it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 38%|███▊ | 113/300 [00:10<00:17, 10.55it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 38%|███▊ | 115/300 [00:10<00:17, 10.51it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 39%|███▉ | 117/300 [00:10<00:17, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 40%|███▉ | 119/300 [00:11<00:16, 10.74it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 40%|████ | 121/300 [00:11<00:16, 10.84it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 41%|████ | 123/300 [00:11<00:16, 10.91it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 42%|████▏ | 125/300 [00:11<00:16, 10.73it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 42%|████▏ | 127/300 [00:11<00:15, 10.85it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 43%|████▎ | 129/300 [00:12<00:15, 10.93it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 44%|████▎ | 131/300 [00:12<00:15, 11.01it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 44%|████▍ | 133/300 [00:12<00:15, 11.04it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 45%|████▌ | 135/300 [00:12<00:15, 10.86it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 46%|████▌ | 137/300 [00:12<00:14, 10.88it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 46%|████▋ | 139/300 [00:12<00:14, 10.96it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 47%|████▋ | 141/300 [00:13<00:14, 10.81it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 48%|████▊ | 143/300 [00:13<00:14, 10.87it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 48%|████▊ | 145/300 [00:13<00:14, 10.84it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 49%|████▉ | 147/300 [00:13<00:14, 10.90it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 50%|████▉ | 149/300 [00:13<00:13, 10.94it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 50%|█████ | 151/300 [00:14<00:13, 10.96it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 51%|█████ | 153/300 [00:14<00:13, 10.79it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 52%|█████▏ | 155/300 [00:14<00:13, 10.80it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 52%|█████▏ | 157/300 [00:14<00:13, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 53%|█████▎ | 159/300 [00:14<00:13, 10.74it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 54%|█████▎ | 161/300 [00:15<00:12, 10.74it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 54%|█████▍ | 163/300 [00:15<00:12, 10.78it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 55%|█████▌ | 165/300 [00:15<00:12, 10.63it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 56%|█████▌ | 167/300 [00:15<00:12, 10.36it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 56%|█████▋ | 169/300 [00:15<00:12, 10.25it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 57%|█████▋ | 171/300 [00:16<00:12, 10.27it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 58%|█████▊ | 173/300 [00:16<00:12, 10.48it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 58%|█████▊ | 175/300 [00:16<00:11, 10.61it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 59%|█████▉ | 177/300 [00:16<00:11, 10.54it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 60%|█████▉ | 179/300 [00:16<00:11, 10.68it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 60%|██████ | 181/300 [00:16<00:11, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 61%|██████ | 183/300 [00:17<00:10, 10.82it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 62%|██████▏ | 185/300 [00:17<00:10, 10.81it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 62%|██████▏ | 187/300 [00:17<00:10, 10.71it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 63%|██████▎ | 189/300 [00:17<00:10, 10.63it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 64%|██████▎ | 191/300 [00:17<00:10, 10.54it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 64%|██████▍ | 193/300 [00:18<00:10, 10.67it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 65%|██████▌ | 195/300 [00:18<00:09, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 66%|██████▌ | 197/300 [00:18<00:09, 10.86it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 66%|██████▋ | 199/300 [00:18<00:09, 10.75it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 67%|██████▋ | 201/300 [00:18<00:09, 10.82it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 68%|██████▊ | 203/300 [00:18<00:08, 10.87it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 68%|██████▊ | 205/300 [00:19<00:08, 10.88it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 69%|██████▉ | 207/300 [00:19<00:08, 10.73it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 70%|██████▉ | 209/300 [00:19<00:08, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 70%|███████ | 211/300 [00:19<00:08, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 71%|███████ | 213/300 [00:19<00:08, 10.79it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 72%|███████▏ | 215/300 [00:20<00:07, 10.75it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 72%|███████▏ | 217/300 [00:20<00:07, 10.76it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 73%|███████▎ | 219/300 [00:20<00:07, 10.79it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 74%|███████▎ | 221/300 [00:20<00:07, 10.67it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 74%|███████▍ | 223/300 [00:20<00:07, 10.58it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 75%|███████▌ | 225/300 [00:21<00:07, 10.36it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 76%|███████▌ | 227/300 [00:21<00:06, 10.54it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 76%|███████▋ | 229/300 [00:21<00:06, 10.41it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 77%|███████▋ | 231/300 [00:21<00:06, 10.22it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 78%|███████▊ | 233/300 [00:21<00:06, 10.40it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 78%|███████▊ | 235/300 [00:21<00:06, 10.56it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 79%|███████▉ | 237/300 [00:22<00:05, 10.65it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 80%|███████▉ | 239/300 [00:22<00:05, 10.72it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 80%|████████ | 241/300 [00:22<00:05, 10.75it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 81%|████████ | 243/300 [00:22<00:05, 10.85it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 82%|████████▏ | 245/300 [00:22<00:05, 10.90it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 82%|████████▏ | 247/300 [00:23<00:04, 10.69it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 83%|████████▎ | 249/300 [00:23<00:04, 10.53it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 84%|████████▎ | 251/300 [00:23<00:04, 10.45it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 84%|████████▍ | 253/300 [00:23<00:04, 10.36it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 85%|████████▌ | 255/300 [00:23<00:04, 10.52it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 86%|████████▌ | 257/300 [00:24<00:04, 10.18it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 86%|████████▋ | 259/300 [00:24<00:04, 10.05it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 87%|████████▋ | 261/300 [00:24<00:03, 10.24it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 88%|████████▊ | 263/300 [00:24<00:03, 10.28it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 88%|████████▊ | 265/300 [00:24<00:03, 10.21it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 89%|████████▉ | 267/300 [00:25<00:03, 10.18it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 90%|████████▉ | 269/300 [00:25<00:02, 10.35it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 90%|█████████ | 271/300 [00:25<00:02, 10.28it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 91%|█████████ | 273/300 [00:25<00:02, 10.36it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 92%|█████████▏| 275/300 [00:25<00:02, 10.37it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 92%|█████████▏| 277/300 [00:26<00:02, 10.51it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 93%|█████████▎| 279/300 [00:26<00:01, 10.59it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 94%|█████████▎| 281/300 [00:26<00:01, 10.53it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 94%|█████████▍| 283/300 [00:26<00:01, 10.46it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 95%|█████████▌| 285/300 [00:26<00:01, 10.43it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 96%|█████████▌| 287/300 [00:26<00:01, 10.44it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 96%|█████████▋| 289/300 [00:27<00:01, 10.35it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 97%|█████████▋| 291/300 [00:27<00:00, 10.49it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 98%|█████████▊| 293/300 [00:27<00:00, 10.44it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 98%|█████████▊| 295/300 [00:27<00:00, 10.52it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
" 99%|█████████▉| 297/300 [00:27<00:00, 10.53it/s]Device set to use cuda:0\n",
"Device set to use cuda:0\n",
"100%|█████████▉| 299/300 [00:28<00:00, 10.48it/s]Device set to use cuda:0\n",
"100%|██████████| 300/300 [00:28<00:00, 10.63it/s]\n"
]
}
],
"source": [
"y_pred = predict(X_test, model, tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "3449c759-b590-4455-80d7-a1535e1b8f9f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.903\n",
"Accuracy for label Hate: 0.176\n",
"Accuracy for label Offensive: 0.961\n",
"Accuracy for label Normal: 0.880\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Hate 0.43 0.18 0.25 17\n",
" Offensive 0.92 0.96 0.94 233\n",
" Normal 0.88 0.88 0.88 50\n",
"\n",
" accuracy 0.90 300\n",
" macro avg 0.74 0.67 0.69 300\n",
"weighted avg 0.89 0.90 0.89 300\n",
"\n",
"\n",
"Confusion Matrix:\n",
"[[ 3 13 1]\n",
" [ 4 224 5]\n",
" [ 0 6 44]]\n"
]
}
],
"source": [
"evaluate(y_true, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "591f2b8c-3cb1-448d-b70a-e26b0dcfbafd",
"metadata": {},
"outputs": [],
"source": [
"base_model = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
"fine_tuned_model = \"/home/marco/llama-3.2-3B-instruct-offensive-classification-2\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "64a3591f-8c98-41f9-8326-a9ed7d0da461",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "60c6531a470347dfb3702157f8c92ee7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
"from peft import PeftModel\n",
"import torch\n",
"\n",
"\n",
"# Reload tokenizer and model\n",
"tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
"\n",
"base_model_reload = AutoModelForCausalLM.from_pretrained(\n",
" base_model,\n",
" return_dict=True,\n",
" low_cpu_mem_usage=True,\n",
" torch_dtype=torch.float16,\n",
" device_map=\"auto\",\n",
" trust_remote_code=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "37e19428-9786-429f-b119-61ae7b7355ab",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/marco/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/accelerate/utils/modeling.py:1593: UserWarning: Current model requires 7424 bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.\n",
" warnings.warn(\n"
]
}
],
"source": [
"model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)\n",
"model = model.merge_and_unload()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "9e448406-60a1-4c76-a827-0284f859843e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cpu\n",
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Offensive\n"
]
}
],
"source": [
"text = \"You are an asshole!\"\n",
"prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
"text: {text}\n",
"label: \"\"\".strip()\n",
"\n",
"pipe = pipeline(\n",
" \"text-generation\",\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" torch_dtype=torch.float16,\n",
" device_map=\"auto\",\n",
")\n",
"\n",
"outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
"print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "8f479001-338c-4a06-b4e5-6d874a6d9b30",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cpu\n",
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Normal\n"
]
}
],
"source": [
"text = \"I walk in the park!\"\n",
"prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
"text: {text}\n",
"label: \"\"\".strip()\n",
"\n",
"pipe = pipeline(\n",
" \"text-generation\",\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" torch_dtype=torch.float16,\n",
" device_map=\"auto\",\n",
")\n",
"\n",
"outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
"print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "b182f963-ce02-465a-87dd-312fe782451d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cpu\n",
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hate Speech\n"
]
}
],
"source": [
"text = \"Jews are conspiratorial, devious, treacherous, sadistic, child killers, and subversive\"\n",
"prompt = f\"\"\"Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\n",
"text: {text}\n",
"label: \"\"\".strip()\n",
"pipe = pipeline(\n",
" \"text-generation\",\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" torch_dtype=torch.float16,\n",
" device_map=\"auto\",\n",
")\n",
"\n",
"outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)\n",
"print(outputs[0][\"generated_text\"].split(\"label: \")[-1].strip())"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "dfb5394b-8be3-4e25-9417-165f08ff0fad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',\n",
" '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',\n",
" '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_dir = \"/home/marco/llama-3.2-3B-instruct-offensive-classification-2\"\n",
"model.save_pretrained(model_dir)\n",
"tokenizer.save_pretrained(model_dir)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "aa0ffb80-4d69-4099-b279-712c3dfac2fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification/commit/8437c36e0434971361a08296877c986cc68ee524', commit_message='Upload LlamaForCausalLM', commit_description='', oid='8437c36e0434971361a08296877c986cc68ee524', pr_url=None, repo_url=RepoUrl('https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification', endpoint='https://huggingface.co', repo_type='model', repo_id='marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification'), pr_revision=None, pr_num=None)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.push_to_hub(\"marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification\",use_temp_dir=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d94aa30-c59d-4575-a0b6-1d6c87f1087e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}