File size: 2,373 Bytes
823ded0
6feb7ec
 
 
 
 
 
 
 
 
 
 
 
f13a3ca
 
a78f83f
c12ca9b
e403126
a89484e
 
 
 
c68cde2
f27514f
 
a89484e
 
b9b4dd3
c12ca9b
 
a89484e
 
 
f27514f
0314451
a89484e
0314451
c68cde2
a89484e
c68cde2
a89484e
c12ca9b
a89484e
 
 
c12ca9b
 
c68cde2
9e57aa8
c68cde2
f27514f
b9b4dd3
f27514f
 
77603ce
a89484e
 
 
b9b4dd3
 
c12ca9b
f27514f
c68cde2
 
f27514f
c68cde2
a89484e
f27514f
c68cde2
e403126
c12ca9b
e403126
c68cde2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import subprocess

# Function to install a package if it is not already installed
def install(package):
    subprocess.check_call([os.sys.executable, "-m", "pip", "install", package])

# Ensure the necessary packages are installed
install("transformers")
install("torch")
install("pandas")
install("gradio")

import pandas as pd
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import torch

# Load the model and tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)

# Load the dataset containing PEC numbers and names
def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'):
    if os.path.exists(file_path):
        df = pd.read_excel(file_path)
    else:
        raise FileNotFoundError(f"File not found: {file_path}")
    return df

# Function to get the name based on the PEC number
def get_name(pec_number, df):
    df['PEC No.'] = df['PEC No.'].str.strip().str.upper()
    pec_number = pec_number.strip().str.upper()
    
    result = df[df['PEC No.'] == pec_number]
    
    if not result.empty:
        return result.iloc[0]['Name']
    else:
        return "PEC Number not found."

# Function to process the PEC number using the Hugging Face model
def process_with_model(pec_number):
    inputs = tokenizer(pec_number, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

# Combine both functions to create a prediction
def predict(pec_number):
    try:
        # Load the dataset from the root directory
        df = load_dataset()

        name = get_name(pec_number, df)
        model_output = process_with_model(pec_number)
        return f"Name: {name}\nModel Output: {model_output}"
    except FileNotFoundError as e:
        return str(e)

# Build the Gradio interface without the file upload option
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(lines=1, placeholder="Enter PEC Number..."),
    outputs="text",
    title="PEC Number to Name Lookup",
    description="Enter a PEC number to retrieve the corresponding name and process it with a Hugging Face model."
)

# Run the Gradio interface
if __name__ == "__main__":
    iface.launch()