import os from getpass import getpass openai_api_key = os.getenv('OPENAI_API_KEY') openai_api_key = openai_api_key from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.core import Settings Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4) Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002") from llama_index.core import SimpleDirectoryReader # Load initial documents documents = SimpleDirectoryReader("new_file").load_data() from llama_index.core import VectorStoreIndex, StorageContext from llama_index.vector_stores.qdrant import QdrantVectorStore import qdrant_client client = qdrant_client.QdrantClient( location=":memory:", ) vector_store = QdrantVectorStore( collection_name="paper", client=client, enable_hybrid=True, batch_size=20, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, ) query_engine = index.as_query_engine( vector_store_query_mode="hybrid" ) from llama_index.core.memory import ChatMemoryBuffer memory = ChatMemoryBuffer.from_defaults(token_limit=3000) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory, system_prompt=( """You are an AI assistant who answers the user questions, use the schema fields to generate appropriate and valid json queries""" ), ) import gradio as gr def chat_with_ai(user_input, chat_history): response = chat_engine.chat(user_input) references = response.source_nodes ref, pages = [], [] for i in range(len(references)): if references[i].metadata['file_name'] not in ref: ref.append(references[i].metadata['file_name']) complete_response = str(response) + "\n\n" if ref != [] or pages != []: chat_history.append((user_input, complete_response)) ref = [] elif ref == [] or pages == []: chat_history.append((user_input, str(response))) return chat_history, "" def clear_history(): return [], "" import os import PyPDF2 import docx import pandas as pd def extract_text_from_file(file_path): """ Extracts text from the file based on its extension. Supports: PDF, DOC/DOCX, TXT, XLS/XLSX. """ ext = os.path.splitext(file_path)[1].lower() text = "" if ext == ".pdf": try: with open(file_path, "rb") as f: pdf_reader = PyPDF2.PdfReader(f) for page in pdf_reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" except Exception as e: text = f"Error processing PDF: {e}" elif ext in [".doc", ".docx"]: try: doc = docx.Document(file_path) text = "\n".join([para.text for para in doc.paragraphs]) except Exception as e: text = f"Error processing Word document: {e}" elif ext == ".txt": try: with open(file_path, "r", encoding="utf-8") as f: text = f.read() except Exception as e: text = f"Error processing TXT file: {e}" elif ext in [".xls", ".xlsx"]: try: # Read the first sheet of the Excel file df = pd.read_excel(file_path) # Convert the dataframe to CSV format (or any format you prefer) text = df.to_csv(index=False) except Exception as e: text = f"Error processing Excel file: {e}" else: text = "Unsupported file type for text extraction." return text def upload_file(file): if file is None: return "No file uploaded!" if isinstance(file, list): file = file[0] if hasattr(file, 'name'): file_name = file.name file_data = file.read() elif isinstance(file, dict): file_name = file.get("name", "uploaded_file") file_data = file.get("data") else: return "Uploaded file format not recognized." if file_data is None: return "Uploaded file data not found!" if not os.path.exists("new_file"): os.makedirs("new_file") file_path = os.path.join("new_file", file_name) try: with open(file_path, "wb") as f: f.write(file_data) except Exception as e: return f"Error saving file: {e}" extracted_text = extract_text_from_file(file_path) preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}" file_path = os.path.join("new_file", file_name) if hasattr(file, "read"): content = file.read() elif isinstance(file, dict) and "data" in file: content = file["data"] else: return "Uploaded file format not recognized." with open(file_path, "wb") as f: f.write(content) return f"File {file_name} uploaded successfully!" def gradio_chatbot(): with gr.Blocks() as demo: gr.Markdown("# Chat Interface for LlamaIndex") chatbot = gr.Chatbot(label="LlamaIndex Chatbot") user_input = gr.Textbox( placeholder="Ask a question...", label="Enter your question" ) submit_button = gr.Button("Send") btn_clear = gr.Button("Delete Context") # Add a file upload component file_upload = gr.File(label="Upload a file") # Add a button to handle file upload upload_button = gr.Button("Upload File") chat_history = gr.State([]) # Define the file upload action upload_button.click(upload_file, inputs=file_upload, outputs=user_input) # Define the chat interaction submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) btn_clear.click(fn=clear_history, outputs=[chatbot, user_input]) return demo gradio_chatbot().launch(debug=True)