import os import shutil import gradio as gr import qdrant_client from getpass import getpass # Set your OpenAI API key from environment variables. openai_api_key = os.getenv('OPENAI_API_KEY') # ------------------------------------------------------- # Configure LlamaIndex with OpenAI LLM and Embeddings # ------------------------------------------------------- from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.core import Settings Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4) Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002") # ------------------------------------------------------- # Import document readers, index, vector store, memory, etc. # ------------------------------------------------------- from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext from llama_index.vector_stores.qdrant import QdrantVectorStore from llama_index.core.memory import ChatMemoryBuffer # Global variables to hold persistent objects. chat_engine = None index = None query_engine = None memory = None client = None vector_store = None storage_context = None # Define a global collection name (you can change this as needed) collection_name = "paper" def process_upload(files): """ Process newly uploaded files by copying them into a persistent folder, loading their content, and then either building a new index or inserting new documents into the existing index. """ upload_dir = "uploaded_files" # Create the upload folder if it does not exist. if not os.path.exists(upload_dir): os.makedirs(upload_dir) # Copy new files into the upload directory. new_file_paths = [] for file_path in files: file_name = os.path.basename(file_path) dest = os.path.join(upload_dir, file_name) # Copy the file if it doesn't already exist. if not os.path.exists(dest): shutil.copy(file_path, dest) new_file_paths.append(dest) # Load only the newly uploaded documents. # (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.) documents = SimpleDirectoryReader(input_files=new_file_paths).load_data() global client, vector_store, storage_context, index, query_engine, memory, chat_engine # Initialize Qdrant client if not already done. if client is None: client = qdrant_client.QdrantClient( path="./qdrant_db", prefer_grpc=True ) # Ensure the collection exists. from qdrant_client.http import models existing_collections = {col.name for col in client.get_collections().collections} if collection_name not in existing_collections: client.create_collection( collection_name=collection_name, vectors_config=models.VectorParams( size=1536, # OpenAI's text-embedding-ada-002 produces 1536-d vectors. distance=models.Distance.COSINE ) ) # Initialize the vector store if not already done. if vector_store is None: vector_store = QdrantVectorStore( collection_name=collection_name, client=client, enable_hybrid=True, batch_size=20, ) # Initialize storage context if not already done. if storage_context is None: storage_context = StorageContext.from_defaults(vector_store=vector_store) # If no index exists yet, create one from the documents. if index is None: index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) else: # Append the new documents to the existing index. index.insert_documents(documents) # (Optional) Reinitialize the query and chat engines so they reflect the updated index. query_engine = index.as_query_engine(vector_store_query_mode="hybrid") memory = ChatMemoryBuffer.from_defaults(token_limit=3000) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory, system_prompt="You are an AI assistant who answers the user questions," ) return "Documents uploaded and index updated successfully!" def chat_with_ai(user_input, chat_history): global chat_engine if chat_engine is None: return chat_history, "Please upload documents first." response = chat_engine.chat(user_input) references = response.source_nodes ref = [] # Extract referenced file names from the response. for node in references: file_name = node.metadata.get('file_name') if file_name and file_name not in ref: ref.append(file_name) complete_response = str(response) + "\n\n" if ref: chat_history.append((user_input, complete_response)) else: chat_history.append((user_input, str(response))) return chat_history, "" def clear_history(): return [], "" def gradio_interface(): with gr.Blocks() as demo: gr.Markdown("# AI Assistant") with gr.Tab("Upload Documents"): gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:") file_upload = gr.File( label="Upload Files", file_count="multiple", file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"], type="filepath" # Returns file paths. ) upload_status = gr.Textbox(label="Upload Status", interactive=False) upload_button = gr.Button("Process Upload") upload_button.click(process_upload, inputs=file_upload, outputs=upload_status) with gr.Tab("Chat"): chatbot = gr.Chatbot(label="AI Assistant Chat Interface") user_input = gr.Textbox( placeholder="Ask a question...", label="Enter your question" ) submit_button = gr.Button("Send") btn_clear = gr.Button("Clear History") # A State to hold the chat history. chat_history = gr.State([]) submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) btn_clear.click(clear_history, outputs=[chatbot, user_input]) return demo gradio_interface().launch(debug=True)