anasmkh's picture
Update app.py
c9eadbe verified
raw
history blame
6.56 kB
import os
import shutil
import gradio as gr
import qdrant_client
from getpass import getpass
# Set your OpenAI API key from environment variables.
openai_api_key = os.getenv('OPENAI_API_KEY')
# -------------------------------------------------------
# Configure LlamaIndex with OpenAI LLM and Embeddings
# -------------------------------------------------------
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
# -------------------------------------------------------
# Import document readers, index, vector store, memory, etc.
# -------------------------------------------------------
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
# Global variables to hold persistent objects.
chat_engine = None
index = None
query_engine = None
memory = None
client = None
vector_store = None
storage_context = None
# Define a global collection name (you can change this as needed)
collection_name = "paper"
def process_upload(files):
"""
Process newly uploaded files by copying them into a persistent folder,
loading their content, and then either building a new index or inserting
new documents into the existing index.
"""
upload_dir = "uploaded_files"
# Create the upload folder if it does not exist.
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
# Copy new files into the upload directory.
new_file_paths = []
for file_path in files:
file_name = os.path.basename(file_path)
dest = os.path.join(upload_dir, file_name)
# Copy the file if it doesn't already exist.
if not os.path.exists(dest):
shutil.copy(file_path, dest)
new_file_paths.append(dest)
# Load only the newly uploaded documents.
# (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.)
documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
global client, vector_store, storage_context, index, query_engine, memory, chat_engine
# Initialize Qdrant client if not already done.
if client is None:
client = qdrant_client.QdrantClient(
path="./qdrant_db",
prefer_grpc=True
)
# Ensure the collection exists.
from qdrant_client.http import models
existing_collections = {col.name for col in client.get_collections().collections}
if collection_name not in existing_collections:
client.create_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(
size=1536, # OpenAI's text-embedding-ada-002 produces 1536-d vectors.
distance=models.Distance.COSINE
)
)
# Initialize the vector store if not already done.
if vector_store is None:
vector_store = QdrantVectorStore(
collection_name=collection_name,
client=client,
enable_hybrid=True,
batch_size=20,
)
# Initialize storage context if not already done.
if storage_context is None:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# If no index exists yet, create one from the documents.
if index is None:
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
# Append the new documents to the existing index.
index.insert_documents(documents)
# (Optional) Reinitialize the query and chat engines so they reflect the updated index.
query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
system_prompt="You are an AI assistant who answers the user questions,"
)
return "Documents uploaded and index updated successfully!"
def chat_with_ai(user_input, chat_history):
global chat_engine
if chat_engine is None:
return chat_history, "Please upload documents first."
response = chat_engine.chat(user_input)
references = response.source_nodes
ref = []
# Extract referenced file names from the response.
for node in references:
file_name = node.metadata.get('file_name')
if file_name and file_name not in ref:
ref.append(file_name)
complete_response = str(response) + "\n\n"
if ref:
chat_history.append((user_input, complete_response))
else:
chat_history.append((user_input, str(response)))
return chat_history, ""
def clear_history():
return [], ""
def gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# AI Assistant")
with gr.Tab("Upload Documents"):
gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
file_upload = gr.File(
label="Upload Files",
file_count="multiple",
file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
type="filepath" # Returns file paths.
)
upload_status = gr.Textbox(label="Upload Status", interactive=False)
upload_button = gr.Button("Process Upload")
upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
with gr.Tab("Chat"):
chatbot = gr.Chatbot(label="AI Assistant Chat Interface")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
btn_clear = gr.Button("Clear History")
# A State to hold the chat history.
chat_history = gr.State([])
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
btn_clear.click(clear_history, outputs=[chatbot, user_input])
return demo
gradio_interface().launch(debug=True)