Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

QdrantVectorStore_Llamaindex / app.py

anasmkh

Update app.py

c9eadbe verified 6 months ago

raw

history blame

6.56 kB

	import os
	import shutil
	import gradio as gr
	import qdrant_client
	from getpass import getpass

	# Set your OpenAI API key from environment variables.
	openai_api_key = os.getenv('OPENAI_API_KEY')

	# -------------------------------------------------------
	# Configure LlamaIndex with OpenAI LLM and Embeddings
	# -------------------------------------------------------
	from llama_index.llms.openai import OpenAI
	from llama_index.embeddings.openai import OpenAIEmbedding
	from llama_index.core import Settings

	Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
	Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

	# -------------------------------------------------------
	# Import document readers, index, vector store, memory, etc.
	# -------------------------------------------------------
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
	from llama_index.vector_stores.qdrant import QdrantVectorStore
	from llama_index.core.memory import ChatMemoryBuffer

	# Global variables to hold persistent objects.
	chat_engine = None
	index = None
	query_engine = None
	memory = None
	client = None
	vector_store = None
	storage_context = None

	# Define a global collection name (you can change this as needed)
	collection_name = "paper"

	def process_upload(files):
	"""
	Process newly uploaded files by copying them into a persistent folder,
	loading their content, and then either building a new index or inserting
	new documents into the existing index.
	"""
	upload_dir = "uploaded_files"
	# Create the upload folder if it does not exist.
	if not os.path.exists(upload_dir):
	os.makedirs(upload_dir)

	# Copy new files into the upload directory.
	new_file_paths = []
	for file_path in files:
	file_name = os.path.basename(file_path)
	dest = os.path.join(upload_dir, file_name)
	# Copy the file if it doesn't already exist.
	if not os.path.exists(dest):
	shutil.copy(file_path, dest)
	new_file_paths.append(dest)

	# Load only the newly uploaded documents.
	# (SimpleDirectoryReader can accept a list of file paths via the 'input_files' parameter.)
	documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()

	global client, vector_store, storage_context, index, query_engine, memory, chat_engine

	# Initialize Qdrant client if not already done.
	if client is None:
	client = qdrant_client.QdrantClient(
	path="./qdrant_db",
	prefer_grpc=True
	)

	# Ensure the collection exists.
	from qdrant_client.http import models
	existing_collections = {col.name for col in client.get_collections().collections}
	if collection_name not in existing_collections:
	client.create_collection(
	collection_name=collection_name,
	vectors_config=models.VectorParams(
	size=1536, # OpenAI's text-embedding-ada-002 produces 1536-d vectors.
	distance=models.Distance.COSINE
	)
	)

	# Initialize the vector store if not already done.
	if vector_store is None:
	vector_store = QdrantVectorStore(
	collection_name=collection_name,
	client=client,
	enable_hybrid=True,
	batch_size=20,
	)

	# Initialize storage context if not already done.
	if storage_context is None:
	storage_context = StorageContext.from_defaults(vector_store=vector_store)

	# If no index exists yet, create one from the documents.
	if index is None:
	index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
	else:
	# Append the new documents to the existing index.
	index.insert_documents(documents)

	# (Optional) Reinitialize the query and chat engines so they reflect the updated index.
	query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
	memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
	chat_engine = index.as_chat_engine(
	chat_mode="context",
	memory=memory,
	system_prompt="You are an AI assistant who answers the user questions,"
	)

	return "Documents uploaded and index updated successfully!"

	def chat_with_ai(user_input, chat_history):
	global chat_engine
	if chat_engine is None:
	return chat_history, "Please upload documents first."

	response = chat_engine.chat(user_input)
	references = response.source_nodes
	ref = []

	# Extract referenced file names from the response.
	for node in references:
	file_name = node.metadata.get('file_name')
	if file_name and file_name not in ref:
	ref.append(file_name)

	complete_response = str(response) + "\n\n"
	if ref:
	chat_history.append((user_input, complete_response))
	else:
	chat_history.append((user_input, str(response)))
	return chat_history, ""

	def clear_history():
	return [], ""

	def gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# AI Assistant")

	with gr.Tab("Upload Documents"):
	gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
	file_upload = gr.File(
	label="Upload Files",
	file_count="multiple",
	file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
	type="filepath" # Returns file paths.
	)
	upload_status = gr.Textbox(label="Upload Status", interactive=False)
	upload_button = gr.Button("Process Upload")

	upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)

	with gr.Tab("Chat"):
	chatbot = gr.Chatbot(label="AI Assistant Chat Interface")
	user_input = gr.Textbox(
	placeholder="Ask a question...", label="Enter your question"
	)
	submit_button = gr.Button("Send")
	btn_clear = gr.Button("Clear History")

	# A State to hold the chat history.
	chat_history = gr.State([])

	submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
	user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
	btn_clear.click(clear_history, outputs=[chatbot, user_input])

	return demo

	gradio_interface().launch(debug=True)