File size: 5,579 Bytes
71c916b
29c811a
71c916b
 
29c811a
71c916b
 
29c811a
71c916b
29c811a
71c916b
72ba547
29c811a
 
 
 
 
 
 
 
 
71c916b
 
29c811a
 
 
 
 
 
 
 
 
71c916b
29c811a
71c916b
 
 
 
 
72ba547
29c811a
 
 
71c916b
 
29c811a
 
71c916b
 
29c811a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78aa2e5
71c916b
29c811a
 
 
71c916b
29c811a
67c6e4d
29c811a
 
 
 
 
 
 
 
 
71c916b
 
de6fa5e
 
71c916b
 
29c811a
 
 
 
 
71c916b
29c811a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71c916b
a3445e5
29c811a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import shutil
from getpass import getpass

import gradio as gr
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
import qdrant_client

# Set your OpenAI API key from environment variable
openai_api_key = os.getenv('OPENAI_API_KEY')
if not openai_api_key:
    raise ValueError("Please set your OPENAI_API_KEY environment variable.")

# Define a system prompt as a global constant
SYSTEM_PROMPT = (
    "You are an AI assistant who answers the user questions, "
    "use the schema fields to generate appropriate and valid json queries"
)

# Configure the LLM and embedding models
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

# Load initial documents from a directory called "new_file"
documents = SimpleDirectoryReader("new_file").load_data()

# Set up the Qdrant vector store (using an in-memory collection for simplicity)
client = qdrant_client.QdrantClient(location=":memory:")
vector_store = QdrantVectorStore(
    collection_name="paper",
    client=client,
    enable_hybrid=True,
    batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Build the initial index and query/chat engines
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=chat_memory,
    system_prompt=SYSTEM_PROMPT,
)

def process_uploaded_file(uploaded_file):
    """
    Process the uploaded file:
      1. Save the file to an "uploads" folder.
      2. Copy it to a temporary folder ("temp_upload") to load using SimpleDirectoryReader.
      3. Extend the global documents list and rebuild the index and chat engine.
    """
    if uploaded_file is None:
        return "No file uploaded."

    # 'uploaded_file' is a temporary file path provided by Gradio.
    file_name = os.path.basename(uploaded_file)
    uploads_dir = "uploads"
    os.makedirs(uploads_dir, exist_ok=True)
    dest_path = os.path.join(uploads_dir, file_name)
    shutil.copy(uploaded_file, dest_path)

    # Prepare a temporary directory to read the file
    temp_dir = "temp_upload"
    os.makedirs(temp_dir, exist_ok=True)
    # Clear previous files in temp_dir (optional, to avoid mixing files)
    for f in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, f))
    shutil.copy(dest_path, temp_dir)

    # Load the new document(s) from the temporary folder
    new_docs = SimpleDirectoryReader(temp_dir).load_data()

    # Update the global documents list and rebuild the index and chat engine
    global documents, index, chat_engine
    documents.extend(new_docs)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    chat_engine = index.as_chat_engine(
        chat_mode="context",
        memory=chat_memory,
        system_prompt=SYSTEM_PROMPT,
    )

    return f"File '{file_name}' processed and added to index."

def chat_with_ai(user_input, chat_history):
    """
    Send the user input to the chat engine and update the conversation history.
    """
    response = chat_engine.chat(user_input)
    # Collect reference filenames from the response (if available)
    references = response.source_nodes
    ref = []
    for node in references:
        if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
            ref.append(node.metadata["file_name"])
    # Create a complete response string with references if present
    complete_response = str(response)
    if ref:
        complete_response += "\n\nReferences: " + ", ".join(ref)
    chat_history.append((user_input, complete_response))
    return chat_history, ""

def clear_history():
    return [], ""

def gradio_chatbot():
    """
    Create a Gradio interface with two tabs:
      - "Chat" for interacting with the chat engine.
      - "Upload" for uploading new files to update the index.
    """
    with gr.Blocks() as demo:
        gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
        
        with gr.Tab("Chat"):
            chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
            user_input = gr.Textbox(
                placeholder="Ask a question...", label="Enter your question"
            )
            submit_button = gr.Button("Send")
            btn_clear = gr.Button("Delete Context")
            chat_history = gr.State([])
            submit_button.click(chat_with_ai, inputs=[user_input, chat_history],
                                  outputs=[chatbot, user_input])
            user_input.submit(chat_with_ai, inputs=[user_input, chat_history],
                              outputs=[chatbot, user_input])
            btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
        
        with gr.Tab("Upload"):
            gr.Markdown("### Upload a file to add its content to the index")
            file_upload = gr.File(label="Choose a file")
            upload_button = gr.Button("Upload and Process")
            upload_status = gr.Textbox(label="Upload Status")
            upload_button.click(process_uploaded_file, inputs=[file_upload], outputs=[upload_status])
    
    return demo

if __name__ == "__main__":
    gradio_chatbot().launch(debug=True)