Spaces:
Sleeping
Sleeping
Update tasks/text.py
Browse files- tasks/text.py +6 -48
tasks/text.py
CHANGED
@@ -6,17 +6,17 @@ import random
|
|
6 |
|
7 |
from .utils.evaluation import TextEvaluationRequest
|
8 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
|
|
|
|
9 |
|
10 |
#packages needed for inference
|
11 |
-
from sentence_transformers import SentenceTransformer
|
12 |
-
from xgboost import XGBClassifier
|
13 |
import pickle
|
14 |
import torch
|
15 |
import os
|
16 |
|
17 |
router = APIRouter()
|
18 |
|
19 |
-
DESCRIPTION = "
|
20 |
ROUTE = "/text"
|
21 |
|
22 |
@router.post(ROUTE, tags=["Text Task"],
|
@@ -62,59 +62,17 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
62 |
#--------------------------------------------------------------------------------------------
|
63 |
# YOUR MODEL INFERENCE CODE HERE
|
64 |
|
65 |
-
# Set the device to MPS (if available)
|
66 |
-
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
67 |
-
print(f"Using device: {device}")
|
68 |
-
|
69 |
-
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" # You can use other Sentence Transformers models as needed
|
70 |
-
sentence_model = SentenceTransformer(model_name)
|
71 |
|
72 |
-
# Convert each sentence into a vector representation (embedding)
|
73 |
-
embeddings = sentence_model.encode(test_dataset['quote'], convert_to_tensor=True)
|
74 |
|
75 |
# Make random predictions (placeholder for actual model inference)
|
76 |
true_labels = test_dataset["label"]
|
77 |
|
78 |
-
"""
|
79 |
-
from torch import nn, optim
|
80 |
-
|
81 |
-
class SimpleNN2(nn.Module):
|
82 |
-
def __init__(self, input_dim, output_dim):
|
83 |
-
super(SimpleNN2, self).__init__()
|
84 |
-
self.fc1 = nn.Linear(input_dim, 128) # Reduce hidden units
|
85 |
-
self.fc2 = nn.Linear(128, 64) # Further reduce units
|
86 |
-
self.fc3 = nn.Linear(64, output_dim)
|
87 |
-
self.relu = nn.ReLU()
|
88 |
-
self.dropout = nn.Dropout(0.3) # Add dropout
|
89 |
-
self.batch_norm1 = nn.BatchNorm1d(128)
|
90 |
-
self.batch_norm2 = nn.BatchNorm1d(64)
|
91 |
-
|
92 |
-
def forward(self, x):
|
93 |
-
x = self.relu(self.batch_norm1(self.fc1(x)))
|
94 |
-
x = self.dropout(x) # Apply dropout
|
95 |
-
x = self.relu(self.batch_norm2(self.fc2(x)))
|
96 |
-
x = self.dropout(x) # Apply dropout
|
97 |
-
x = self.fc3(x) # Output raw logits
|
98 |
-
return x
|
99 |
-
"""
|
100 |
-
|
101 |
current_file_path = os.path.abspath(__file__)
|
102 |
current_dir = os.path.dirname(current_file_path)
|
103 |
|
104 |
-
#
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
# Set the model to evaluation mode
|
109 |
-
model_nn.eval()
|
110 |
-
|
111 |
-
# Make predictions
|
112 |
-
with torch.no_grad():
|
113 |
-
outputs = model_nn(embeddings)
|
114 |
-
_, predicted = torch.max(outputs, 1) # Get the class with the highest score
|
115 |
-
|
116 |
-
# Decode the predictions back to original labels using label_encoder
|
117 |
-
predictions = predicted.cpu().numpy()
|
118 |
|
119 |
#--------------------------------------------------------------------------------------------
|
120 |
# YOUR MODEL INFERENCE STOPS HERE
|
|
|
6 |
|
7 |
from .utils.evaluation import TextEvaluationRequest
|
8 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
9 |
+
from .utils.preprocessing import process_text,predict
|
10 |
+
|
11 |
|
12 |
#packages needed for inference
|
|
|
|
|
13 |
import pickle
|
14 |
import torch
|
15 |
import os
|
16 |
|
17 |
router = APIRouter()
|
18 |
|
19 |
+
DESCRIPTION = "TF-IDF + RF"
|
20 |
ROUTE = "/text"
|
21 |
|
22 |
@router.post(ROUTE, tags=["Text Task"],
|
|
|
62 |
#--------------------------------------------------------------------------------------------
|
63 |
# YOUR MODEL INFERENCE CODE HERE
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
|
|
|
|
66 |
|
67 |
# Make random predictions (placeholder for actual model inference)
|
68 |
true_labels = test_dataset["label"]
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
current_file_path = os.path.abspath(__file__)
|
71 |
current_dir = os.path.dirname(current_file_path)
|
72 |
|
73 |
+
# Make predictions using the loaded model
|
74 |
+
predictions = predict(test_dataset, os.path.join(current_dir,"tf-idf_vectorizer.pkl") ,os.path.join(current_dir,"random_forest_model.pkl"))
|
75 |
+
predictions = [LABEL_MAPPING[label] for label in predictions]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
#--------------------------------------------------------------------------------------------
|
78 |
# YOUR MODEL INFERENCE STOPS HERE
|