SivaMallikarjun commited on
Commit
6281c34
·
verified ·
1 Parent(s): 90b8d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -5
app.py CHANGED
@@ -1,15 +1,83 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
4
 
5
- # Load pre-trained model & tokenizer (Example: XLM-R for multilingual text classification)
6
  model_name = "xlm-roberta-base"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Define prediction function
11
  def classify_text(text):
12
- inputs = tokenizer(text, return_tensors="pt")
 
13
  with torch.no_grad():
14
  output = model(**inputs)
15
  label = torch.argmax(output.logits, dim=1).item()
@@ -20,8 +88,7 @@ gradio_app = gr.Interface(
20
  fn=classify_text,
21
  inputs=gr.Textbox(label="Enter Text"),
22
  outputs="text",
23
- title="Multi-Language RL Model"
24
  )
25
 
26
  gradio_app.launch()
27
- #run it
 
1
  import gradio as gr
2
  import torch
3
+ from torch.utils.data import DataLoader, Dataset
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW
5
 
6
+ # Load model and tokenizer
7
  model_name = "xlm-roberta-base"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
10
 
11
+ # Prepare a custom dataset
12
+ train_texts = [
13
+ "Water freezes at 0 degrees Celsius.",
14
+ "The sun rises in the west.",
15
+ "Dogs can fly in the sky.",
16
+ "Birds lay eggs.",
17
+ "The earth is flat.",
18
+ "Fish can swim in water.",
19
+ "Humans can live without oxygen.",
20
+ "Plants need sunlight to grow.",
21
+ "Cars run on milk.",
22
+ "The moon orbits the earth."
23
+ ]
24
+ train_labels = [
25
+ 1, # Correct
26
+ 0, # Incorrect
27
+ 0, # Incorrect
28
+ 1, # Correct
29
+ 0, # Incorrect
30
+ 1, # Correct
31
+ 0, # Incorrect
32
+ 1, # Correct
33
+ 0, # Incorrect
34
+ 1 # Correct
35
+ ]
36
+
37
+ # Create Dataset class
38
+ class TextDataset(Dataset):
39
+ def __init__(self, texts, labels, tokenizer):
40
+ self.texts = texts
41
+ self.labels = labels
42
+ self.tokenizer = tokenizer
43
+
44
+ def __len__(self):
45
+ return len(self.texts)
46
+
47
+ def __getitem__(self, idx):
48
+ encodings = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
49
+ item = {key: val.squeeze(0) for key, val in encodings.items()}
50
+ item['labels'] = torch.tensor(self.labels[idx])
51
+ return item
52
+
53
+ # Load Dataset
54
+ train_dataset = TextDataset(train_texts, train_labels, tokenizer)
55
+ train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
56
+
57
+ # Define optimizer
58
+ optimizer = AdamW(model.parameters(), lr=5e-5)
59
+
60
+ # Fine-tuning loop
61
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
62
+ model.to(device)
63
+ model.train()
64
+
65
+ for epoch in range(5): # Train for 5 epochs
66
+ for batch in train_loader:
67
+ batch = {k: v.to(device) for k, v in batch.items()}
68
+ outputs = model(**batch)
69
+ loss = outputs.loss
70
+ loss.backward()
71
+ optimizer.step()
72
+ optimizer.zero_grad()
73
+ print(f"Epoch {epoch+1} completed")
74
+
75
+ # Now model is fine-tuned!
76
+
77
  # Define prediction function
78
  def classify_text(text):
79
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
80
+ inputs = {k: v.to(device) for k, v in inputs.items()}
81
  with torch.no_grad():
82
  output = model(**inputs)
83
  label = torch.argmax(output.logits, dim=1).item()
 
88
  fn=classify_text,
89
  inputs=gr.Textbox(label="Enter Text"),
90
  outputs="text",
91
+ title="Multi-Language RL Model (Trained)"
92
  )
93
 
94
  gradio_app.launch()