SivaMallikarjun commited on
Commit
979d940
·
verified ·
1 Parent(s): a2b8c9c

multi_language_rl_model

Browse files
multi_language_rl_model/README.md DELETED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Multi-Language RL Model
3
- emoji: 🌍
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: "3.50.2"
8
- app_file: app.py
9
- pinned: false
10
- ---
 
 
 
 
 
 
 
 
 
 
 
multi_language_rl_model/app.py DELETED
@@ -1,22 +0,0 @@
1
- import gradio as gr
2
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
- import torch
4
-
5
- model_path = "./models/fine_tuned_xlm_roberta_quantized"
6
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
7
- tokenizer = AutoTokenizer.from_pretrained(model_path)
8
-
9
- def classify_text(text):
10
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
11
- outputs = model(**inputs)
12
- prediction = torch.argmax(outputs.logits, dim=1).item()
13
- label = "Correct" if prediction == 1 else "Incorrect"
14
- return label
15
-
16
- iface = gr.Interface(fn=classify_text,
17
- inputs="text",
18
- outputs="text",
19
- title="Multi-Language RL Text Classifier")
20
-
21
- if __name__ == "__main__":
22
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
multi_language_rl_model/checkpoints/checkpoint_episode_100/pytorch_model.bin DELETED
File without changes
multi_language_rl_model/data/raw_data.csv DELETED
@@ -1,6 +0,0 @@
1
- text,label
2
- "Bonjour tout le monde",1
3
- "Hola mundo",1
4
- "Hello world",1
5
- "Das ist falsch",0
6
- "यह गलत है",0
 
 
 
 
 
 
 
multi_language_rl_model/logs/Untitledtraining_log.txt DELETED
@@ -1,4 +0,0 @@
1
- Epoch 1/3 - Loss: 0.456 - Accuracy: 88%
2
- Epoch 2/3 - Loss: 0.320 - Accuracy: 91%
3
- Epoch 3/3 - Loss: 0.278 - Accuracy: 93%
4
- Final Model saved to ./models/fine_tuned_xlm_roberta_quantized/
 
 
 
 
 
multi_language_rl_model/requirements.txt DELETED
@@ -1,5 +0,0 @@
1
- transformers
2
- torch
3
- gradio
4
- datasets
5
- huggingface_hub
 
 
 
 
 
 
multi_language_rl_model/train.py DELETED
@@ -1,61 +0,0 @@
1
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
2
- from datasets import load_dataset
3
- import torch
4
- import os
5
-
6
- # Load Dataset
7
- dataset = load_dataset('csv', data_files={'train': './data/raw_data.csv'}, delimiter=",")
8
-
9
- # Load Pretrained Tokenizer and Model
10
- model_name = "xlm-roberta-base"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
13
-
14
- # Tokenization
15
- def preprocess_function(examples):
16
- return tokenizer(examples['text'], truncation=True, padding=True)
17
-
18
- encoded_dataset = dataset.map(preprocess_function, batched=True)
19
-
20
- # Training Arguments
21
- training_args = TrainingArguments(
22
- output_dir="./checkpoints",
23
- num_train_epochs=3,
24
- per_device_train_batch_size=8,
25
- save_steps=100,
26
- save_total_limit=1,
27
- logging_dir="./logs",
28
- logging_steps=10,
29
- evaluation_strategy="no",
30
- push_to_hub=False,
31
- load_best_model_at_end=False
32
- )
33
-
34
- # Trainer Setup
35
- trainer = Trainer(
36
- model=model,
37
- args=training_args,
38
- train_dataset=encoded_dataset['train']
39
- )
40
-
41
- # Start Training
42
- trainer.train()
43
-
44
- # Save Final Fine-tuned Model
45
- save_directory = "./models/fine_tuned_xlm_roberta"
46
- os.makedirs(save_directory, exist_ok=True)
47
- model.save_pretrained(save_directory)
48
- tokenizer.save_pretrained(save_directory)
49
-
50
- # Quantize Model (Make Lightweight)
51
- def quantize_model(model_path):
52
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
53
- model.to(torch.device('cpu'))
54
- model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
55
- quantized_model_path = model_path + "_quantized"
56
- os.makedirs(quantized_model_path, exist_ok=True)
57
- model.save_pretrained(quantized_model_path)
58
- tokenizer.save_pretrained(quantized_model_path)
59
- print(f"Quantized model saved to {quantized_model_path}")
60
-
61
- quantize_model(save_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
multi_language_rl_model/utils/data_preprocessing.py DELETED
File without changes