Spaces:

atifsial123
/

Engineer

Sleeping

App Files Files Community

Engineer / app.py

atifsial123

Update app.py

99fbda0 verified 10 months ago

raw

history blame

2.95 kB

	# Install necessary libraries
	import os
	import subprocess

	# Function to install a package if it is not already installed
	def install(package):
	subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])

	# Ensure the necessary packages are installed
	install("transformers")
	install("torch")
	install("pandas")
	install("scikit-learn")
	install("gradio")
	import os
	import pandas as pd
	import gradio as gr
	from transformers import AutoModel, AutoTokenizer
	import torch
	from sklearn.model_selection import train_test_split

	# Load your dataset
	def load_dataset():
	file_path = "Valid-part-2.xlsx"
	print(f"Current working directory: {os.getcwd()}")
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")

	try:
	df = pd.read_excel(file_path)
	print("Columns in the dataset:", df.columns.tolist())
	return df
	except Exception as e:
	print(f"Error loading dataset: {e}")
	return None

	# Preprocess the data
	def preprocess_data(df):
	# Example preprocessing: You can add more steps as needed
	# For now, we're just returning the dataframe as is
	return df

	# Train your model
	def train_model(df):
	# Split the dataset into training and testing sets
	train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

	# Load your pre-trained model and tokenizer from Hugging Face
	tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
	model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")

	# Normally, you would fine-tune the model here with your training data
	# Since this is an example, we're returning the model as is
	return model

	# Define the Gradio interface function
	def predict(input_text):
	# Load the model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
	model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")

	# Tokenize input and make predictions
	inputs = tokenizer(input_text, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)

	# Process the outputs as needed (e.g., extracting relevant information)
	return outputs.last_hidden_state

	# Build the Gradio interface
	def build_interface():
	df = load_dataset() # Load your dataset
	if df is None:
	return None

	df = preprocess_data(df) # Preprocess the dataset
	model = train_model(df) # Train your model

	iface = gr.Interface(
	fn=predict,
	inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
	outputs="text"
	)
	return iface

	# Run the Gradio interface
	if __name__ == "__main__":
	iface = build_interface()
	if iface:
	iface.launch()
	else:
	print("Failed to build the Gradio interface. Please check the dataset and model.")