Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,13 +18,23 @@ import gradio as gr
|
|
| 18 |
from transformers import AutoModel, AutoTokenizer
|
| 19 |
import torch
|
| 20 |
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Load your dataset
|
| 23 |
def load_dataset():
|
| 24 |
-
file_path =
|
| 25 |
-
|
|
|
|
| 26 |
if not os.path.exists(file_path):
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
try:
|
| 30 |
df = pd.read_excel(file_path)
|
|
@@ -36,8 +46,8 @@ def load_dataset():
|
|
| 36 |
|
| 37 |
# Preprocess the data
|
| 38 |
def preprocess_data(df):
|
| 39 |
-
#
|
| 40 |
-
# For
|
| 41 |
return df
|
| 42 |
|
| 43 |
# Train your model
|
|
@@ -49,8 +59,8 @@ def train_model(df):
|
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
| 50 |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
#
|
| 54 |
return model
|
| 55 |
|
| 56 |
# Define the Gradio interface function
|
|
@@ -92,3 +102,4 @@ if __name__ == "__main__":
|
|
| 92 |
print("Failed to build the Gradio interface. Please check the dataset and model.")
|
| 93 |
|
| 94 |
|
|
|
|
|
|
| 18 |
from transformers import AutoModel, AutoTokenizer
|
| 19 |
import torch
|
| 20 |
from sklearn.model_selection import train_test_split
|
| 21 |
+
from google.colab import files
|
| 22 |
+
|
| 23 |
+
# Upload the dataset if running in Google Colab
|
| 24 |
+
def upload_dataset():
|
| 25 |
+
uploaded = files.upload() # This will prompt the file upload
|
| 26 |
+
file_name = list(uploaded.keys())[0]
|
| 27 |
+
file_path = f'/content/{file_name}'
|
| 28 |
+
return file_path
|
| 29 |
|
| 30 |
# Load your dataset
|
| 31 |
def load_dataset():
|
| 32 |
+
file_path = '/content/Valid-part-2.xlsx' # Default path if the file is uploaded manually to Colab
|
| 33 |
+
|
| 34 |
+
# Check if the file exists
|
| 35 |
if not os.path.exists(file_path):
|
| 36 |
+
print(f"File not found at '{file_path}', prompting file upload...")
|
| 37 |
+
file_path = upload_dataset() # Upload if not found
|
| 38 |
|
| 39 |
try:
|
| 40 |
df = pd.read_excel(file_path)
|
|
|
|
| 46 |
|
| 47 |
# Preprocess the data
|
| 48 |
def preprocess_data(df):
|
| 49 |
+
# Add your preprocessing steps here
|
| 50 |
+
# For example: cleaning, tokenization, etc.
|
| 51 |
return df
|
| 52 |
|
| 53 |
# Train your model
|
|
|
|
| 59 |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
| 60 |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
| 61 |
|
| 62 |
+
# Add your training code here
|
| 63 |
+
# This may involve tokenizing the data and feeding it into the model
|
| 64 |
return model
|
| 65 |
|
| 66 |
# Define the Gradio interface function
|
|
|
|
| 102 |
print("Failed to build the Gradio interface. Please check the dataset and model.")
|
| 103 |
|
| 104 |
|
| 105 |
+
|