EmotionExtracter / train /transfer_learning.py
hkanumilli's picture
emotion extracter application built using transfer learning
ea19ac8
raw
history blame
4.31 kB
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import RobertaConfig, RobertaForSequenceClassification
from transformers import AdamW
from newhead import NewClassificationHead
def preprocess_data(df):
"""
Preprocess the data by renaming columns, removing rows with missing values, and removing extra spaces.
"""
df = df.rename(columns={'Comment': 'text', 'Emotion': 'label'})
df = df.dropna()
df['text'] = df['text'].str.replace('\t', ' ').str.replace(' +', ' ', regex=True).str.strip()
df['label'] = df['label'].str.replace('\t', ' ').str.replace(' +', ' ', regex=True).str.strip()
return df
def encode_label(df):
"""
Encode the labels using LabelEncoder.
"""
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
return df
def generate_dataset(df, test_size=0.2):
"""
Convert the DataFrame into a Dataset that can be used with transformers.
"""
return Dataset.from_pandas(df)
def tokenize(batch):
return tokenizer(batch['text'], padding='max_length', truncation=True)
def compute_metrics(pred):
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
acc = accuracy_score(labels, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
# Define model and training arguments
model_name = "cardiffnlp/twitter-roberta-base-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
config = RobertaConfig.from_pretrained(model_name, num_labels=3) # Set the number of labels to 3
model = RobertaForSequenceClassification.from_pretrained(model_name, config=config, ignore_mismatched_sizes=True)
model.classifier = NewClassificationHead(config)
df = pd.read_csv('Emotion_classify_Data.csv')
df = preprocess_data(df)
df = encode_label(df)
ds = generate_dataset(df)
ds = ds.map(tokenize, batched=True)
### Transer Learning First
# Freeze all layers first
for param in model.parameters():
param.requires_grad = False
# Unfreeze the classifier layer
for param in model.classifier.parameters():
param.requires_grad = True
# Define different learning rates
head_lr = 3e-4 # Higher learning rate for the head
base_lr = head_lr/5 # Lower learning rate for the base layers
# Group parameters and set learning rates
optimizer_grouped_parameters = [
{'params': model.classifier.parameters(), 'lr': head_lr},
{'params': [p for n, p in model.named_parameters() if 'classifier' not in n], 'lr': base_lr}
]
optimizer = AdamW(optimizer_grouped_parameters)
## Training arguments
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=10,
per_device_train_batch_size=16,
per_device_eval_batch_size=64,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
save_strategy="no",
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=ds['train'],
eval_dataset=ds['test'],
tokenizer=tokenizer,
optimizers=(optimizer, None), # No need to pass a learning rate scheduler if you're managing learning rates manually,
compute_metrics=compute_metrics
)
## Train the head of the model
trainer.train()
## Unfreeze all layers
for param in model.parameters():
param.requires_grad = True
head_lr = 1e-4 # Slightly lower learning rate for the head
base_lr = 5e-6 # Much lower learning rate for the base layers
optimizer_grouped_parameters = [
{'params': model.classifier.parameters(), 'lr': head_lr},
{'params': [p for n, p in model.named_parameters() if 'classifier' not in n], 'lr': base_lr}
]
## train the entire model
optimizer = AdamW(optimizer_grouped_parameters)
training_args.num_train_epochs = 5 # Set the number of additional epochs
trainer.train()
model.save_pretrained('transferLearningResults')
tokenizer.save_pretrained('transferLearningResults')