Engineer / app.py
atifsial123's picture
Update app.py
99fbda0 verified
raw
history blame
2.95 kB
# Install necessary libraries
import os
import subprocess
# Function to install a package if it is not already installed
def install(package):
subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])
# Ensure the necessary packages are installed
install("transformers")
install("torch")
install("pandas")
install("scikit-learn")
install("gradio")
import os
import pandas as pd
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.model_selection import train_test_split
# Load your dataset
def load_dataset():
file_path = "Valid-part-2.xlsx"
print(f"Current working directory: {os.getcwd()}")
if not os.path.exists(file_path):
raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
try:
df = pd.read_excel(file_path)
print("Columns in the dataset:", df.columns.tolist())
return df
except Exception as e:
print(f"Error loading dataset: {e}")
return None
# Preprocess the data
def preprocess_data(df):
# Example preprocessing: You can add more steps as needed
# For now, we're just returning the dataframe as is
return df
# Train your model
def train_model(df):
# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# Load your pre-trained model and tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
# Normally, you would fine-tune the model here with your training data
# Since this is an example, we're returning the model as is
return model
# Define the Gradio interface function
def predict(input_text):
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
# Tokenize input and make predictions
inputs = tokenizer(input_text, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Process the outputs as needed (e.g., extracting relevant information)
return outputs.last_hidden_state
# Build the Gradio interface
def build_interface():
df = load_dataset() # Load your dataset
if df is None:
return None
df = preprocess_data(df) # Preprocess the dataset
model = train_model(df) # Train your model
iface = gr.Interface(
fn=predict,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
outputs="text"
)
return iface
# Run the Gradio interface
if __name__ == "__main__":
iface = build_interface()
if iface:
iface.launch()
else:
print("Failed to build the Gradio interface. Please check the dataset and model.")