File size: 2,846 Bytes
823ded0
6feb7ec
 
 
 
 
 
 
 
 
 
 
 
f13a3ca
 
a78f83f
c12ca9b
e403126
c68cde2
 
b9b4dd3
 
c68cde2
c12ca9b
 
0314451
 
 
 
 
 
 
 
 
 
 
c68cde2
0314451
 
c68cde2
0314451
 
c12ca9b
0314451
 
 
c12ca9b
 
c68cde2
9e57aa8
c68cde2
0314451
b9b4dd3
77603ce
 
 
 
 
 
0314451
 
 
b9b4dd3
 
c12ca9b
0314451
 
 
 
77603ce
c68cde2
 
77603ce
0314451
77603ce
 
c68cde2
0314451
 
c68cde2
e403126
c12ca9b
e403126
c68cde2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import subprocess

# Function to install a package if it is not already installed
def install(package):
    subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])

# Ensure the necessary packages are installed
install("transformers")
install("torch")
install("pandas")
install("gradio")

import pandas as pd
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import torch

# Load the dataset containing PEC numbers and names
def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    df = pd.read_excel(file_path)
    return df

# Debugging function to get PEC number based on the name
def get_pec_number(name, df):
    print("Column names in DataFrame:", df.columns.tolist())  # Print the column names
    print(f"Looking for Name: '{name}'")
    
    # Normalize the input and dataset
    df['Name'] = df['Name'].str.strip().str.lower()
    name = name.strip().str.lower()
    
    result = df[df['Name'] == name]
    
    if not result.empty:
        print(f"Found PEC Number: {result.iloc[0]['PEC No.']}")
        return result.iloc[0]['PEC No.']
    else:
        print("Name not found.")
        return "Name not found."

# Function to process the name using the Hugging Face model
def process_with_model(name):
    inputs = tokenizer(name, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

# Combine both functions to create a prediction
def predict(name, file):
    try:
        # Load the dataset from the uploaded file if provided
        if file is not None:
            df = pd.read_excel(file.name)
        else:
            df = load_dataset()

        pec_number = get_pec_number(name, df)
        model_output = process_with_model(name)
        return f"PEC Number: {pec_number}\nModel Output: {model_output}"
    except FileNotFoundError as e:
        return str(e)

# Load the model and tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)

# Build the Gradio interface with file upload option
iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=1, placeholder="Enter Name..."),
        gr.File(label="Upload PEC Numbers and Names file (optional)")
    ],
    outputs="text",
    title="Name to PEC Number Lookup with Model Integration",
    description="Enter a name to retrieve the corresponding PEC number and process it with a Hugging Face model. Optionally, upload the Excel file if not found."
)

# Run the Gradio interface
if __name__ == "__main__":
    iface.launch()