Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,94 +1,67 @@
|
|
1 |
-
import
|
|
|
2 |
import torch
|
3 |
-
from
|
4 |
-
from
|
5 |
|
6 |
-
#
|
7 |
model_name = "xlm-roberta-base"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
-
"Water freezes at 0 degrees Celsius.",
|
14 |
-
"The sun rises in the west.",
|
15 |
-
"Dogs can fly in the sky.",
|
16 |
-
"Birds lay eggs.",
|
17 |
-
"The earth is flat.",
|
18 |
-
"Fish can swim in water.",
|
19 |
-
"Humans can live without oxygen.",
|
20 |
-
"Plants need sunlight to grow.",
|
21 |
-
"Cars run on milk.",
|
22 |
-
"The moon orbits the earth."
|
23 |
-
]
|
24 |
-
train_labels = [
|
25 |
-
1, # Correct
|
26 |
-
0, # Incorrect
|
27 |
-
0, # Incorrect
|
28 |
-
1, # Correct
|
29 |
-
0, # Incorrect
|
30 |
-
1, # Correct
|
31 |
-
0, # Incorrect
|
32 |
-
1, # Correct
|
33 |
-
0, # Incorrect
|
34 |
-
1 # Correct
|
35 |
-
]
|
36 |
-
|
37 |
-
# Create Dataset class
|
38 |
-
class TextDataset(Dataset):
|
39 |
-
def __init__(self, texts, labels, tokenizer):
|
40 |
-
self.texts = texts
|
41 |
-
self.labels = labels
|
42 |
-
self.tokenizer = tokenizer
|
43 |
-
|
44 |
-
def __len__(self):
|
45 |
-
return len(self.texts)
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
#
|
54 |
-
|
55 |
-
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
|
56 |
|
57 |
-
#
|
58 |
-
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
#
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
loss = outputs.loss
|
70 |
-
loss.backward()
|
71 |
-
optimizer.step()
|
72 |
-
optimizer.zero_grad()
|
73 |
-
print(f"Epoch {epoch+1} completed")
|
74 |
|
75 |
-
#
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
#
|
78 |
-
|
79 |
-
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
|
80 |
-
inputs = {k: v.to(device) for k, v in inputs.items()}
|
81 |
with torch.no_grad():
|
82 |
output = model(**inputs)
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
fn=classify_text,
|
89 |
-
inputs=gr.Textbox(label="Enter Text"),
|
90 |
-
outputs="text",
|
91 |
-
title="Multi-Language RL Model (Trained)"
|
92 |
-
)
|
93 |
|
94 |
-
|
|
|
|
1 |
+
import random
|
2 |
+
import numpy as np
|
3 |
import torch
|
4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
+
from sklearn.preprocessing import LabelEncoder
|
6 |
|
7 |
+
# Initialize the model and tokenizer
|
8 |
model_name = "xlm-roberta-base"
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
10 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
|
11 |
|
12 |
+
# Define action space: [0 = Incorrect, 1 = Correct]
|
13 |
+
action_space = [0, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
# Define environment (simplified): classify text
|
16 |
+
text_data = [
|
17 |
+
("Water freezes at 0°C.", 1), # Correct
|
18 |
+
("The sun rises in the west.", 0), # Incorrect
|
19 |
+
("Dogs can fly in the sky.", 0), # Incorrect
|
20 |
+
("Birds lay eggs.", 1), # Correct
|
21 |
+
("The Earth is flat.", 0), # Incorrect
|
22 |
+
]
|
|
|
23 |
|
24 |
+
# Q-learning parameters
|
25 |
+
alpha = 0.1 # Learning rate
|
26 |
+
gamma = 0.9 # Discount factor
|
27 |
+
epsilon = 0.1 # Exploration rate
|
28 |
+
n_episodes = 1000 # Number of episodes for training
|
29 |
+
q_table = np.zeros((len(text_data), len(action_space))) # Q-table (state x action)
|
30 |
|
31 |
+
# Reward system (simplified)
|
32 |
+
def get_reward(text, predicted_label):
|
33 |
+
correct_label = next((label for t, label in text_data if t == text), None)
|
34 |
+
if predicted_label == correct_label:
|
35 |
+
return 1 # Reward for correct classification
|
36 |
+
return -1 # Penalty for incorrect classification
|
37 |
|
38 |
+
# Q-learning training loop
|
39 |
+
for episode in range(n_episodes):
|
40 |
+
state = random.choice(text_data) # Random initial text input
|
41 |
+
state_idx = text_data.index(state)
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
# Exploration vs Exploitation
|
44 |
+
if random.uniform(0, 1) < epsilon:
|
45 |
+
action = random.choice(action_space) # Exploration: random action
|
46 |
+
else:
|
47 |
+
action = np.argmax(q_table[state_idx]) # Exploitation: choose best action based on Q-table
|
48 |
|
49 |
+
# Make a prediction based on action
|
50 |
+
inputs = tokenizer(state[0], return_tensors="pt")
|
|
|
|
|
51 |
with torch.no_grad():
|
52 |
output = model(**inputs)
|
53 |
+
predicted_label = torch.argmax(output.logits, dim=1).item()
|
54 |
+
|
55 |
+
# Get reward from environment
|
56 |
+
reward = get_reward(state[0], predicted_label)
|
57 |
+
|
58 |
+
# Update Q-table using Q-learning update rule
|
59 |
+
next_state_idx = random.choice(range(len(text_data))) # Random next state (simplified)
|
60 |
+
best_next_action = np.argmax(q_table[next_state_idx])
|
61 |
+
q_table[state_idx, action] = (1 - alpha) * q_table[state_idx, action] + alpha * (reward + gamma * q_table[next_state_idx, best_next_action])
|
62 |
|
63 |
+
if episode % 100 == 0:
|
64 |
+
print(f"Episode {episode + 1}/{n_episodes}, Reward: {reward}")
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
# After training, the model should have learned which actions (labels) to take
|
67 |
+
print("Training complete.")
|