import os import pandas as pd import gradio as gr from transformers import AutoModel, AutoTokenizer import torch # Load the dataset containing PEC numbers and names def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'): df = pd.read_excel(file_path) return df # Load the model and tokenizer from Hugging Face tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) # Define the function to get the name based on the PEC number def get_name(pec_number, df): result = df[df['PEC No.'] == pec_number] if not result.empty: return result.iloc[0]['Name'] else: return "PEC Number not found." # Function to process the PEC number using the Hugging Face model def process_with_model(pec_number): inputs = tokenizer(pec_number, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Here, we simply return the last hidden state as a string representation # In a real application, you might want to use this in a more meaningful way return outputs.last_hidden_state.mean(dim=1).squeeze().tolist() # Combine both functions to create a prediction def predict(pec_number): name = get_name(pec_number, df) model_output = process_with_model(pec_number) return f"Name: {name}\nModel Output: {model_output}" # Load the dataset df = load_dataset() # Build the Gradio interface iface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=1, placeholder="Enter PEC Number..."), outputs="text", title="PEC Number Lookup with Model Integration", description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model." ) # Run the Gradio interface if __name__ == "__main__": iface.launch()