Spaces:

SivaMallikarjun
/

multi-lang-rl-model

Sleeping

App Files Files Community

SivaMallikarjun commited on Apr 26

Commit

6281c34

verified ·

1 Parent(s): 90b8d44

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -5

app.py CHANGED Viewed

@@ -1,15 +1,83 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-# Load pre-trained model & tokenizer (Example: XLM-R for multilingual text classification)
 model_name = "xlm-roberta-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
 # Define prediction function
 def classify_text(text):
-    inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
         output = model(**inputs)
     label = torch.argmax(output.logits, dim=1).item()
@@ -20,8 +88,7 @@ gradio_app = gr.Interface(
     fn=classify_text,
     inputs=gr.Textbox(label="Enter Text"),
     outputs="text",
-    title="Multi-Language RL Model"
 )
 gradio_app.launch()
-#run it

 import gradio as gr
 import torch
+from torch.utils.data import DataLoader, Dataset
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW
+# Load model and tokenizer
 model_name = "xlm-roberta-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
+# Prepare a custom dataset
+train_texts = [
+    "Water freezes at 0 degrees Celsius.",
+    "The sun rises in the west.",
+    "Dogs can fly in the sky.",
+    "Birds lay eggs.",
+    "The earth is flat.",
+    "Fish can swim in water.",
+    "Humans can live without oxygen.",
+    "Plants need sunlight to grow.",
+    "Cars run on milk.",
+    "The moon orbits the earth."
+]
+train_labels = [
+    1,  # Correct
+    0,  # Incorrect
+    0,  # Incorrect
+    1,  # Correct
+    0,  # Incorrect
+    1,  # Correct
+    0,  # Incorrect
+    1,  # Correct
+    0,  # Incorrect
+    1   # Correct
+]
+# Create Dataset class
+class TextDataset(Dataset):
+    def __init__(self, texts, labels, tokenizer):
+        self.texts = texts
+        self.labels = labels
+        self.tokenizer = tokenizer
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        encodings = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
+        item = {key: val.squeeze(0) for key, val in encodings.items()}
+        item['labels'] = torch.tensor(self.labels[idx])
+        return item
+# Load Dataset
+train_dataset = TextDataset(train_texts, train_labels, tokenizer)
+train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
+# Define optimizer
+optimizer = AdamW(model.parameters(), lr=5e-5)
+# Fine-tuning loop
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+model.train()
+for epoch in range(5):  # Train for 5 epochs
+    for batch in train_loader:
+        batch = {k: v.to(device) for k, v in batch.items()}
+        outputs = model(**batch)
+        loss = outputs.loss
+        loss.backward()
+        optimizer.step()
+        optimizer.zero_grad()
+    print(f"Epoch {epoch+1} completed")
+# Now model is fine-tuned!
 # Define prediction function
 def classify_text(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         output = model(**inputs)
     label = torch.argmax(output.logits, dim=1).item()
     fn=classify_text,
     inputs=gr.Textbox(label="Enter Text"),
     outputs="text",
+    title="Multi-Language RL Model (Trained)"
 )
 gradio_app.launch()