SivaMallikarjun commited on
Commit
dfd8ba5
·
verified ·
1 Parent(s): 6281c34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -78
app.py CHANGED
@@ -1,94 +1,67 @@
1
- import gradio as gr
 
2
  import torch
3
- from torch.utils.data import DataLoader, Dataset
4
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, AdamW
5
 
6
- # Load model and tokenizer
7
  model_name = "xlm-roberta-base"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
10
 
11
- # Prepare a custom dataset
12
- train_texts = [
13
- "Water freezes at 0 degrees Celsius.",
14
- "The sun rises in the west.",
15
- "Dogs can fly in the sky.",
16
- "Birds lay eggs.",
17
- "The earth is flat.",
18
- "Fish can swim in water.",
19
- "Humans can live without oxygen.",
20
- "Plants need sunlight to grow.",
21
- "Cars run on milk.",
22
- "The moon orbits the earth."
23
- ]
24
- train_labels = [
25
- 1, # Correct
26
- 0, # Incorrect
27
- 0, # Incorrect
28
- 1, # Correct
29
- 0, # Incorrect
30
- 1, # Correct
31
- 0, # Incorrect
32
- 1, # Correct
33
- 0, # Incorrect
34
- 1 # Correct
35
- ]
36
-
37
- # Create Dataset class
38
- class TextDataset(Dataset):
39
- def __init__(self, texts, labels, tokenizer):
40
- self.texts = texts
41
- self.labels = labels
42
- self.tokenizer = tokenizer
43
-
44
- def __len__(self):
45
- return len(self.texts)
46
 
47
- def __getitem__(self, idx):
48
- encodings = self.tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
49
- item = {key: val.squeeze(0) for key, val in encodings.items()}
50
- item['labels'] = torch.tensor(self.labels[idx])
51
- return item
52
-
53
- # Load Dataset
54
- train_dataset = TextDataset(train_texts, train_labels, tokenizer)
55
- train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
56
 
57
- # Define optimizer
58
- optimizer = AdamW(model.parameters(), lr=5e-5)
 
 
 
 
59
 
60
- # Fine-tuning loop
61
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
62
- model.to(device)
63
- model.train()
 
 
64
 
65
- for epoch in range(5): # Train for 5 epochs
66
- for batch in train_loader:
67
- batch = {k: v.to(device) for k, v in batch.items()}
68
- outputs = model(**batch)
69
- loss = outputs.loss
70
- loss.backward()
71
- optimizer.step()
72
- optimizer.zero_grad()
73
- print(f"Epoch {epoch+1} completed")
74
 
75
- # Now model is fine-tuned!
 
 
 
 
76
 
77
- # Define prediction function
78
- def classify_text(text):
79
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
80
- inputs = {k: v.to(device) for k, v in inputs.items()}
81
  with torch.no_grad():
82
  output = model(**inputs)
83
- label = torch.argmax(output.logits, dim=1).item()
84
- return "Correct" if label == 1 else "Incorrect"
 
 
 
 
 
 
 
85
 
86
- # Gradio UI
87
- gradio_app = gr.Interface(
88
- fn=classify_text,
89
- inputs=gr.Textbox(label="Enter Text"),
90
- outputs="text",
91
- title="Multi-Language RL Model (Trained)"
92
- )
93
 
94
- gradio_app.launch()
 
 
1
+ import random
2
+ import numpy as np
3
  import torch
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ from sklearn.preprocessing import LabelEncoder
6
 
7
+ # Initialize the model and tokenizer
8
  model_name = "xlm-roberta-base"
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
11
 
12
+ # Define action space: [0 = Incorrect, 1 = Correct]
13
+ action_space = [0, 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Define environment (simplified): classify text
16
+ text_data = [
17
+ ("Water freezes at 0°C.", 1), # Correct
18
+ ("The sun rises in the west.", 0), # Incorrect
19
+ ("Dogs can fly in the sky.", 0), # Incorrect
20
+ ("Birds lay eggs.", 1), # Correct
21
+ ("The Earth is flat.", 0), # Incorrect
22
+ ]
 
23
 
24
+ # Q-learning parameters
25
+ alpha = 0.1 # Learning rate
26
+ gamma = 0.9 # Discount factor
27
+ epsilon = 0.1 # Exploration rate
28
+ n_episodes = 1000 # Number of episodes for training
29
+ q_table = np.zeros((len(text_data), len(action_space))) # Q-table (state x action)
30
 
31
+ # Reward system (simplified)
32
+ def get_reward(text, predicted_label):
33
+ correct_label = next((label for t, label in text_data if t == text), None)
34
+ if predicted_label == correct_label:
35
+ return 1 # Reward for correct classification
36
+ return -1 # Penalty for incorrect classification
37
 
38
+ # Q-learning training loop
39
+ for episode in range(n_episodes):
40
+ state = random.choice(text_data) # Random initial text input
41
+ state_idx = text_data.index(state)
 
 
 
 
 
42
 
43
+ # Exploration vs Exploitation
44
+ if random.uniform(0, 1) < epsilon:
45
+ action = random.choice(action_space) # Exploration: random action
46
+ else:
47
+ action = np.argmax(q_table[state_idx]) # Exploitation: choose best action based on Q-table
48
 
49
+ # Make a prediction based on action
50
+ inputs = tokenizer(state[0], return_tensors="pt")
 
 
51
  with torch.no_grad():
52
  output = model(**inputs)
53
+ predicted_label = torch.argmax(output.logits, dim=1).item()
54
+
55
+ # Get reward from environment
56
+ reward = get_reward(state[0], predicted_label)
57
+
58
+ # Update Q-table using Q-learning update rule
59
+ next_state_idx = random.choice(range(len(text_data))) # Random next state (simplified)
60
+ best_next_action = np.argmax(q_table[next_state_idx])
61
+ q_table[state_idx, action] = (1 - alpha) * q_table[state_idx, action] + alpha * (reward + gamma * q_table[next_state_idx, best_next_action])
62
 
63
+ if episode % 100 == 0:
64
+ print(f"Episode {episode + 1}/{n_episodes}, Reward: {reward}")
 
 
 
 
 
65
 
66
+ # After training, the model should have learned which actions (labels) to take
67
+ print("Training complete.")