Spaces:

Deepakraj2006
/

RAG_GRAD

Sleeping

App Files Files Community

Deepakraj2006 commited on Mar 3

Commit

0ccdb83

verified ·

1 Parent(s): a800118

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -20

app.py CHANGED Viewed

@@ -41,33 +41,53 @@ def init_llm():
     )
 def process_document(file):
-    """Process uploaded PDF and create a retriever"""
     global conversation_retrieval_chain
     if not llm_pipeline or not embeddings:
         init_llm()
-    # Load PDF and split text
-    loader = PyPDFLoader(file.name)
-    documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
-    texts = text_splitter.split_documents(documents)
-    # Load or create ChromaDB
-    if os.path.exists(persist_directory):
-        db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
-    else:
-        db = Chroma.from_documents(texts, embedding=embeddings, persist_directory=persist_directory)
-    retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 6})
-    # Initialize ConversationalRetrievalChain
-    conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
-        llm=llm_pipeline, retriever=retriever
-    )
-    return "📄 PDF uploaded and processed successfully! You can now ask questions."
 def process_prompt(prompt, chat_history_display):

     )
+import time
 def process_document(file):
     global conversation_retrieval_chain
     if not llm_pipeline or not embeddings:
         init_llm()
+    start_time = time.time()
+    print(f"📂 Uploading PDF: {file.name}")
+    try:
+        # ✅ Ensure file is saved correctly
+        file_path = os.path.join("/tmp/uploads", file.name)
+        with open(file_path, "wb") as f:
+            f.write(file.read())
+        print(f"✅ PDF saved at {file_path} in {time.time() - start_time:.2f}s")
+        # ✅ Load PDF
+        start_time = time.time()
+        loader = PyPDFLoader(file_path)
+        documents = loader.load()
+        print(f"✅ PDF loaded in {time.time() - start_time:.2f}s")
+        # ✅ Split text
+        start_time = time.time()
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
+        texts = text_splitter.split_documents(documents)
+        print(f"✅ Text split in {time.time() - start_time:.2f}s")
+        # ✅ Create ChromaDB
+        start_time = time.time()
+        db = Chroma.from_documents(texts, embedding=embeddings, persist_directory="/tmp/chroma_db")
+        print(f"✅ ChromaDB created in {time.time() - start_time:.2f}s")
+        # ✅ Create retrieval chain
+        conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
+            llm=llm_pipeline, retriever=db.as_retriever()
+        )
+        print("✅ Document processing complete!")
+        return "📄 PDF uploaded and processed successfully! You can now ask questions."
+    except Exception as e:
+        print(f"❌ Error processing PDF: {str(e)}")
+        return f"Error: {str(e)}"
 def process_prompt(prompt, chat_history_display):