In [1]:
#!pip install -U bitsandbytes
#!pip install -U transformers
#!pip install -U accelerate
#!pip install -U peft
#!pip install -U trl

In [2]:
#!huggingface-cli whoami

In [3]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [4]:
from datasets import load_dataset

df = pd.read_parquet("hf://datasets/tdavidson/hate_speech_offensive/data/train-00000-of-00001.parquet")
df.head()

Unnamed: 0,count,hate_speech_count,offensive_language_count,neither_count,class,tweet
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


In [5]:
df = df.rename(columns={"class": "label","tweet": "text"}).sample(frac=1, random_state=85).reset_index(drop=True).head(3000)
df.loc[:,'label'] = df.loc[:,'label'].replace(0,'Hate')
df.loc[:,'label'] = df.loc[:,'label'].replace(1,'Offensive')
df.loc[:,'label'] = df.loc[:,'label'].replace(2,'Normal')
# Split the DataFrame
train_size = 0.8
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end]
X_eval = df[train_end:eval_end]
X_test = df[eval_end:]
# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.
text: {data_point["text"]}
label: {data_point["label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.
            text: {data_point["text"]}
            label: """.strip()

# Generate prompts for training and evaluation data
X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

  df.loc[:,'label'] = df.loc[:,'label'].replace(0,'Hate')


In [6]:
X_train.label.value_counts()

label
Offensive    1877
Normal        391
Hate          132
Name: count, dtype: int64

In [7]:
train_data = Dataset.from_pandas(X_train[["text"]])
eval_data = Dataset.from_pandas(X_eval[["text"]])

In [8]:
train_data['text'][2000]

'Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.\ntext: @kieffer_jason bitch u a thot oh fake ass nigga box up hoe u not bout nothing\nlabel: Offensive'

In [9]:
base_model_name = "meta-llama/Llama-3.2-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
def predict(test, model, tokenizer):
    y_pred = []
    labels = ["Hate", "Offensive", "Normal"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted category
        for label in labels:
            if label.lower() in answer.lower():
                y_pred.append(label)
                break
        else:
            y_pred.append("none")
    
    return y_pred

y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/300 [00:00<01:37,  3.06it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|          | 3/300 [00:00<00:41,  7.20it/s]Device set to use cuda:0
Device set to use cuda:0
  2%|▏         | 5/300 [00:00<00:30,  9.76it/s]Device set to use cuda:0
Device set to use cuda:0
  2%|▏         | 7/300 [00:00<00:25, 11.41it/s]Device set to use cuda:0
Device set to use cuda:0
  3%|▎         | 9/300 [00:00<00:23, 12.39it/s]Device set to use cuda:0
Device set to use cuda:0
  4%|▎         | 11/300 [00:01<00:22, 12.92it/s]Device set to use cuda:0
Device set to use cuda:0
  4%|▍         | 13/300 [00:01<00:21, 13.45it/s]Device set to use cuda:0
Device set to use cuda:0
  5%|▌         | 15/300 [00:01<00:20, 13.64it/s]Device set to use cuda:0
Device set to use cuda:0
  6%|▌         | 17/300 [00:01<00:20, 13.89it/s]Device set to use cuda:0
Device set to use cuda:0
  6%|▋         | 19/300 [00:01<00:19, 14.21it/s]Device set 

In [12]:
def evaluate(y_true, y_pred):
    labels = ["Hate", "Offensive", "Normal"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)

Accuracy: 0.220
Accuracy for label Hate: 0.588
Accuracy for label Offensive: 0.112
Accuracy for label Normal: 0.600

Classification Report:
              precision    recall  f1-score   support

        Hate       0.07      0.59      0.12        17
   Offensive       0.76      0.11      0.19       233
      Normal       0.65      0.60      0.62        50

   micro avg       0.29      0.22      0.25       300
   macro avg       0.49      0.43      0.31       300
weighted avg       0.71      0.22      0.26       300


Confusion Matrix:
[[ 10   1   1]
 [133  26  15]
 [  8   7  30]]


In [13]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)
modules = find_all_linear_names(model)
modules

['up_proj', 'q_proj', 'gate_proj', 'o_proj', 'down_proj', 'k_proj', 'v_proj']

In [14]:
#!pip install wandb
output_dir="/home/marco/llama-3.2-3B-instruct-offensive-classification-2"

peft_config = LoraConfig(
   lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
   task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,                    # directory to save and repository id
    num_train_epochs=1,                       # number of training epochs
    per_device_train_batch_size=1,            # batch size per device during training
    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,              # use gradient checkpointing to save memory
    optim="paged_adamw_32bit",
    logging_steps=1,                         
   learning_rate=2e-4,                       # learning rate, based on QLoRA paper
    weight_decay=0.001,
    fp16=True,
    bf16=False,
      max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
    max_steps=-1,
    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
    group_by_length=False,
    lr_scheduler_type="cosine",               # use cosine learning rate scheduler
    report_to="wandb",                  # report metrics to w&b
    eval_strategy="steps",              # save checkpoint every epoch
    eval_steps = 0.2
)

trainer = SFTTrainer(
   model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
)

Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [15]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmarcoor[0m ([33mmarcoor-universit-t-klagenfurt[0m). Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
60,2.0116,2.00283
120,1.8539,1.961909
180,2.0888,1.93924
240,1.9231,1.927367
300,2.089,1.924164




TrainOutput(global_step=300, training_loss=2.0926066251595814, metrics={'train_runtime': 596.9063, 'train_samples_per_second': 4.021, 'train_steps_per_second': 0.503, 'total_flos': 2216727844706304.0, 'train_loss': 2.0926066251595814, 'epoch': 1.0})

In [16]:
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',
 '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',
 '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')

In [17]:
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/300 [00:00<00:32,  9.15it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|          | 3/300 [00:00<00:30,  9.80it/s]Device set to use cuda:0
Device set to use cuda:0
  2%|▏         | 5/300 [00:00<00:28, 10.20it/s]Device set to use cuda:0
Device set to use cuda:0
  2%|▏         | 7/300 [00:00<00:27, 10.48it/s]Device set to use cuda:0
Device set to use cuda:0
  3%|▎         | 9/300 [00:00<00:27, 10.58it/s]Device set to use cuda:0
Device set to use cuda:0
  4%|▎         | 11/300 [00:01<00:27, 10.46it/s]Device set to use cuda:0
Device set to use cuda:0
  4%|▍         | 13/300 [00:01<00:26, 10.69it/s]Device set to use cuda:0
Device set to use cuda:0
  5%|▌         | 15/300 [00:01<00:26, 10.76it/s]Device set to use cuda:0
Device set to use cuda:0
  6%|▌         | 17/300 [00:01<00:26, 10.83it/s]Device set to use cuda:0
Device set to use cuda:0
  6%|▋         | 19/300 [00:01<00:25, 10.93it/s]Device set 

In [18]:
evaluate(y_true, y_pred)

Accuracy: 0.903
Accuracy for label Hate: 0.176
Accuracy for label Offensive: 0.961
Accuracy for label Normal: 0.880

Classification Report:
              precision    recall  f1-score   support

        Hate       0.43      0.18      0.25        17
   Offensive       0.92      0.96      0.94       233
      Normal       0.88      0.88      0.88        50

    accuracy                           0.90       300
   macro avg       0.74      0.67      0.69       300
weighted avg       0.89      0.90      0.89       300


Confusion Matrix:
[[  3  13   1]
 [  4 224   5]
 [  0   6  44]]


In [1]:
base_model = "meta-llama/Llama-3.2-3B-Instruct"
fine_tuned_model = "/home/marco/llama-3.2-3B-instruct-offensive-classification-2"

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch


# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()



In [22]:
text = "You are an asshole!"
prompt = f"""Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.
text: {text}
label: """.strip()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Offensive


In [23]:
text = "I walk in the park!"
prompt = f"""Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.
text: {text}
label: """.strip()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Normal


In [27]:
text = "Jews are conspiratorial, devious, treacherous, sadistic, child killers, and subversive"
prompt = f"""Classify the text into Hatespeech, Offensive, Normal and return the answer as the corresponding label.
text: {text}
label: """.strip()
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Hate Speech


In [28]:
model_dir = "/home/marco/llama-3.2-3B-instruct-offensive-classification-2"
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)

('/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer_config.json',
 '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/special_tokens_map.json',
 '/home/marco/llama-3.2-3B-instruct-offensive-classification-2/tokenizer.json')

In [6]:
model.push_to_hub("marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification",use_temp_dir=True)

CommitInfo(commit_url='https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification/commit/8437c36e0434971361a08296877c986cc68ee524', commit_message='Upload LlamaForCausalLM', commit_description='', oid='8437c36e0434971361a08296877c986cc68ee524', pr_url=None, repo_url=RepoUrl('https://huggingface.co/marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification', endpoint='https://huggingface.co', repo_type='model', repo_id='marcoorasch/llama-3.2-3B-instruct-hatespeech-offensive-classification'), pr_revision=None, pr_num=None)