Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer | |
import torch | |
# Load the dataset containing PEC numbers and names | |
def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'): | |
df = pd.read_excel(file_path) | |
return df | |
# Load the model and tokenizer from Hugging Face | |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) | |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) | |
# Define the function to get the name based on the PEC number | |
def get_name(pec_number, df): | |
result = df[df['PEC No.'] == pec_number] | |
if not result.empty: | |
return result.iloc[0]['Name'] | |
else: | |
return "PEC Number not found." | |
# Function to process the PEC number using the Hugging Face model | |
def process_with_model(pec_number): | |
inputs = tokenizer(pec_number, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Here, we simply return the last hidden state as a string representation | |
# In a real application, you might want to use this in a more meaningful way | |
return outputs.last_hidden_state.mean(dim=1).squeeze().tolist() | |
# Combine both functions to create a prediction | |
def predict(pec_number): | |
name = get_name(pec_number, df) | |
model_output = process_with_model(pec_number) | |
return f"Name: {name}\nModel Output: {model_output}" | |
# Load the dataset | |
df = load_dataset() | |
# Build the Gradio interface | |
iface = gr.Interface( | |
fn=predict, | |
inputs=gr.Textbox(lines=1, placeholder="Enter PEC Number..."), | |
outputs="text", | |
title="PEC Number Lookup with Model Integration", | |
description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model." | |
) | |
# Run the Gradio interface | |
if __name__ == "__main__": | |
iface.launch() | |