File size: 4,663 Bytes
8ef6cb8
 
 
 
 
 
 
 
 
 
 
 
3e21c23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ef6cb8
3e21c23
 
 
 
8ef6cb8
 
 
 
 
 
 
 
 
3e21c23
8ef6cb8
 
 
 
 
 
3e21c23
8ef6cb8
3e21c23
8ef6cb8
3e21c23
 
8ef6cb8
3e21c23
 
 
 
 
 
 
 
 
8ef6cb8
 
3e21c23
 
 
 
 
 
 
 
 
 
8ef6cb8
 
3e21c23
 
 
8ef6cb8
3e21c23
 
 
 
8ef6cb8
 
 
 
 
 
1c0fddd
8ef6cb8
 
 
3e21c23
 
032ced1
3e21c23
 
 
 
 
8ef6cb8
3e21c23
 
8ef6cb8
3e21c23
8ef6cb8
3e21c23
8ef6cb8
032ced1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr

# Load environment variables
load_dotenv()

models = [
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    # "NousResearch/Yarn-Mistral-7b-64k",  ## 14GB>10GB
    # "impira/layoutlm-document-qa",  ## ERR
    # "Qwen/Qwen1.5-7B",      ## 15GB
    # "Qwen/Qwen2.5-3B",      ## high response time
    # "google/gemma-2-2b-jpn-it",   ## high response time
    # "impira/layoutlm-invoices",   ## bad req
    # "google/pix2struct-docvqa-large",  ## bad req
    "mistralai/Mistral-7B-Instruct-v0.2",
    # "google/gemma-7b-it", ## 17GB > 10GB
    # "google/gemma-2b-it",  ## high response time
    # "HuggingFaceH4/zephyr-7b-beta",   ## high response time
    # "HuggingFaceH4/zephyr-7b-gemma-v0.1",     ## bad req
    # "microsoft/phi-2",    ## high response time
    # "TinyLlama/TinyLlama-1.1B-Chat-v1.0",     ## high response time
    # "mosaicml/mpt-7b-instruct",     ## 13GB>10GB
    "tiiuae/falcon-7b-instruct",
    "google/flan-t5-xxl"
    # "NousResearch/Yarn-Mistral-7b-128k",  ## 14GB>10GB
    # "Qwen/Qwen2.5-7B-Instruct",     ## 15GB>10GB
]

# Global variable for selected model
selected_model_name = models[0]  # Default to the first model in the list

# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
file_extractor = {'.pdf': parser, '.docx': parser, '.doc': parser}

# Embedding model and index initialization (to be populated by uploaded files)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Global variable to store documents loaded from user-uploaded files
vector_index = None


# File processing function
def load_files(file_path: str):
    try:
        global vector_index
        document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
        vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
        print(f"Parsing done for {file_path}")
        filename = os.path.basename(file_path)
        return f"Ready to give response on {filename}"
    except Exception as e:
        return f"An error occurred: {e}"


# Function to handle the selected model from dropdown
def set_model(selected_model):
    global selected_model_name
    selected_model_name = selected_model  # Update the global variable
    # print(f"Model selected: {selected_model_name}")
    # return f"Model set to: {selected_model_name}"


# Respond function that uses the globally set selected model
def respond(message, history):
    try:
        # Initialize the LLM with the selected model
        llm = HuggingFaceInferenceAPI(
            model_name=selected_model_name,
            token=os.getenv("TOKEN")
        )

        # Check selected model
        # print(f"Using model: {selected_model_name}")

        # Set up the query engine with the selected LLM
        query_engine = vector_index.as_query_engine(llm=llm)
        bot_message = query_engine.query(message)

        print(f"\n{datetime.now()}:{selected_model_name}:: {message} --> {str(bot_message)}\n")
        return f"{selected_model_name}:\n{str(bot_message)}"
    except Exception as e:
        if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
            return "Please upload a file."
        return f"An error occurred: {e}"


# UI Setup
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(file_count="single", type='filepath')
            with gr.Row():
                clear = gr.ClearButton()
                btn = gr.Button("Submit", variant='primary')
            output = gr.Text(label='Vector Index')
            model_dropdown = gr.Dropdown(models, label="Select Model", interactive=True)

        with gr.Column(scale=3):
            gr.ChatInterface(
                fn=respond,
                chatbot=gr.Chatbot(height=500),
                textbox=gr.Textbox(placeholder="Ask me questions on the uploaded document!", container=False, scale=7)
            )

    # Set up Gradio interactions
    model_dropdown.change(fn=set_model, inputs=model_dropdown)
    btn.click(fn=load_files, inputs=file_input, outputs=output)
    clear.click(lambda: [None] * 2, outputs=[file_input, output])

# Launch the demo with a public link option
if __name__ == "__main__":
    demo.launch()