Spaces:

SivaMallikarjun
/

multi-lang-rl-model

Sleeping

App Files Files Community

SivaMallikarjun commited on Apr 26

Commit

dfd8ba5

verified ·

1 Parent(s): 6281c34

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -78

app.py CHANGED Viewed

@@ -1,94 +1,67 @@
-import gradio as gr
 import torch
-from torch.utils.data import DataLoader, Dataset
-from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW
-# Load model and tokenizer
 model_name = "xlm-roberta-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
-# Prepare a custom dataset
-train_texts = [
-    "Water freezes at 0 degrees Celsius.",
-    "The sun rises in the west.",
-    "Dogs can fly in the sky.",
-    "Birds lay eggs.",
-    "The earth is flat.",
-    "Fish can swim in water.",
-    "Humans can live without oxygen.",
-    "Plants need sunlight to grow.",
-    "Cars run on milk.",
-    "The moon orbits the earth."
-]
-train_labels = [
-    1,  # Correct
-    0,  # Incorrect
-    0,  # Incorrect
-    1,  # Correct
-    0,  # Incorrect
-    1,  # Correct
-    0,  # Incorrect
-    1,  # Correct
-    0,  # Incorrect
-    1   # Correct
-]
-# Create Dataset class
-class TextDataset(Dataset):
-    def __init__(self, texts, labels, tokenizer):
-        self.texts = texts
-        self.labels = labels
-        self.tokenizer = tokenizer
-    def __len__(self):
-        return len(self.texts)
-    def __getitem__(self, idx):
-        encodings = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
-        item = {key: val.squeeze(0) for key, val in encodings.items()}
-        item['labels'] = torch.tensor(self.labels[idx])
-        return item
-# Load Dataset
-train_dataset = TextDataset(train_texts, train_labels, tokenizer)
-train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
-# Define optimizer
-optimizer = AdamW(model.parameters(), lr=5e-5)
-# Fine-tuning loop
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-model.train()
-for epoch in range(5):  # Train for 5 epochs
-    for batch in train_loader:
-        batch = {k: v.to(device) for k, v in batch.items()}
-        outputs = model(**batch)
-        loss = outputs.loss
-        loss.backward()
-        optimizer.step()
-        optimizer.zero_grad()
-    print(f"Epoch {epoch+1} completed")
-# Now model is fine-tuned!
-# Define prediction function
-def classify_text(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         output = model(**inputs)
-    label = torch.argmax(output.logits, dim=1).item()
-    return "Correct" if label == 1 else "Incorrect"
-# Gradio UI
-gradio_app = gr.Interface(
-    fn=classify_text,
-    inputs=gr.Textbox(label="Enter Text"),
-    outputs="text",
-    title="Multi-Language RL Model (Trained)"
-)
-gradio_app.launch()

+import random
+import numpy as np
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from sklearn.preprocessing import LabelEncoder
+# Initialize the model and tokenizer
 model_name = "xlm-roberta-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
+# Define action space: [0 = Incorrect, 1 = Correct]
+action_space = [0, 1]
+# Define environment (simplified): classify text
+text_data = [
+    ("Water freezes at 0°C.", 1),  # Correct
+    ("The sun rises in the west.", 0),  # Incorrect
+    ("Dogs can fly in the sky.", 0),  # Incorrect
+    ("Birds lay eggs.", 1),  # Correct
+    ("The Earth is flat.", 0),  # Incorrect
+]
+# Q-learning parameters
+alpha = 0.1  # Learning rate
+gamma = 0.9  # Discount factor
+epsilon = 0.1  # Exploration rate
+n_episodes = 1000  # Number of episodes for training
+q_table = np.zeros((len(text_data), len(action_space)))  # Q-table (state x action)
+# Reward system (simplified)
+def get_reward(text, predicted_label):
+    correct_label = next((label for t, label in text_data if t == text), None)
+    if predicted_label == correct_label:
+        return 1  # Reward for correct classification
+    return -1  # Penalty for incorrect classification
+# Q-learning training loop
+for episode in range(n_episodes):
+    state = random.choice(text_data)  # Random initial text input
+    state_idx = text_data.index(state)
+    # Exploration vs Exploitation
+    if random.uniform(0, 1) < epsilon:
+        action = random.choice(action_space)  # Exploration: random action
+    else:
+        action = np.argmax(q_table[state_idx])  # Exploitation: choose best action based on Q-table
+    # Make a prediction based on action
+    inputs = tokenizer(state[0], return_tensors="pt")
     with torch.no_grad():
         output = model(**inputs)
+    predicted_label = torch.argmax(output.logits, dim=1).item()
+    # Get reward from environment
+    reward = get_reward(state[0], predicted_label)
+    # Update Q-table using Q-learning update rule
+    next_state_idx = random.choice(range(len(text_data)))  # Random next state (simplified)
+    best_next_action = np.argmax(q_table[next_state_idx])
+    q_table[state_idx, action] = (1 - alpha) * q_table[state_idx, action] + alpha * (reward + gamma * q_table[next_state_idx, best_next_action])
+    if episode % 100 == 0:
+        print(f"Episode {episode + 1}/{n_episodes}, Reward: {reward}")
+# After training, the model should have learned which actions (labels) to take
+print("Training complete.")