SivaMallikarjun commited on
Commit
1a60d1d
·
verified ·
1 Parent(s): ff2f662

Delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -61
train.py DELETED
@@ -1,61 +0,0 @@
1
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
2
- from datasets import load_dataset
3
- import torch
4
- import os
5
-
6
- # Load Dataset
7
- dataset = load_dataset('csv', data_files={'train': './data/raw_data.csv'}, delimiter=",")
8
-
9
- # Load Pretrained Tokenizer and Model
10
- model_name = "xlm-roberta-base"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
13
-
14
- # Tokenization
15
- def preprocess_function(examples):
16
- return tokenizer(examples['text'], truncation=True, padding=True)
17
-
18
- encoded_dataset = dataset.map(preprocess_function, batched=True)
19
-
20
- # Training Arguments
21
- training_args = TrainingArguments(
22
- output_dir="./checkpoints",
23
- num_train_epochs=3,
24
- per_device_train_batch_size=8,
25
- save_steps=100,
26
- save_total_limit=1,
27
- logging_dir="./logs",
28
- logging_steps=10,
29
- evaluation_strategy="no",
30
- push_to_hub=False,
31
- load_best_model_at_end=False
32
- )
33
-
34
- # Trainer Setup
35
- trainer = Trainer(
36
- model=model,
37
- args=training_args,
38
- train_dataset=encoded_dataset['train']
39
- )
40
-
41
- # Start Training
42
- trainer.train()
43
-
44
- # Save Final Fine-tuned Model
45
- save_directory = "./models/fine_tuned_xlm_roberta"
46
- os.makedirs(save_directory, exist_ok=True)
47
- model.save_pretrained(save_directory)
48
- tokenizer.save_pretrained(save_directory)
49
-
50
- # Quantize Model (Make Lightweight)
51
- def quantize_model(model_path):
52
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
53
- model.to(torch.device('cpu'))
54
- model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
55
- quantized_model_path = model_path + "_quantized"
56
- os.makedirs(quantized_model_path, exist_ok=True)
57
- model.save_pretrained(quantized_model_path)
58
- tokenizer.save_pretrained(quantized_model_path)
59
- print(f"Quantized model saved to {quantized_model_path}")
60
-
61
- quantize_model(save_directory)