Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on 27 days ago

Commit

ae644bf

verified ·

1 Parent(s): 8a5a609

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -8

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ from langchain_huggingface import HuggingFaceEmbeddings
 retriever = None               # FAISS retriever for multimodal content
 current_pdf_name = None        # Name of the currently loaded PDF
 combined_texts: List[str] = [] # Combined text + image captions corpus
@@ -50,8 +51,7 @@ def generate_caption(image_path: str) -> str:
 def embed_texts(texts: List[str]) -> List[List[float]]:
     """
-    Call the HF embeddings endpoint.
-    Uses `google/Gemma-Embeddings-v1.0` (or any other hosted embeddings model).
     """
     resp = hf.embeddings(
         model="google/Gemma-Embeddings-v1.0",
@@ -62,7 +62,47 @@ def embed_texts(texts: List[str]) -> List[List[float]]:
 def process_pdf(pdf_file) -> str:
     """
-    Parse the PDF, caption its images, combine text+captions, embed remotely,
     build FAISS index, and prepare retriever.
     """
     global current_pdf_name, retriever, combined_texts
@@ -109,12 +149,13 @@ def process_pdf(pdf_file) -> str:
     )
     retriever = index.as_retriever(search_kwargs={"k": 2})
-    return f"✅ Indexed '{current_pdf_name}' — " \
-           f"{len(text_elements)} text blocks + {len(captions)} image captions"
 def ask_question(question: str) -> str:
-    """Retrieve top-k chunks from FAISS and call chat_completions endpoint."""
     global retriever
     if retriever is None:
         return "❌ Please upload and process a PDF first."
@@ -149,7 +190,6 @@ def clear_interface():
     os.makedirs(FIGURES_DIR, exist_ok=True)
     return ""
 # ── Gradio UI ────────────────────────────────────────────────────────────────
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")
 with gr.Blocks(theme=theme, css="""
@@ -191,4 +231,4 @@ with gr.Blocks(theme=theme, css="""
                     outputs=[status_box, answer_output])
 if __name__ == "__main__":
-    demo.launch(debug=True, share=True)

+# ── Globals ───────────────────────────────────────────────────────────────────
 retriever = None               # FAISS retriever for multimodal content
 current_pdf_name = None        # Name of the currently loaded PDF
 combined_texts: List[str] = [] # Combined text + image captions corpus
 def embed_texts(texts: List[str]) -> List[List[float]]:
     """
+    Call the HF embeddings endpoint using google/Gemma-Embeddings-v1.0.
     """
     resp = hf.embeddings(
         model="google/Gemma-Embeddings-v1.0",
 def process_pdf(pdf_file) -> str:
     """
+    Parse the PDF, caption images, combine text+captions, embed remotely,
+    build FAISS index, and prepare retriever. Falls back to text-only if poppler is missing.
+    """
+    from pdf2image.exceptions import PDFInfoNotInstalledError
+    global current_pdf_name, retriever, combined_texts
+    if pdf_file is None:
+        return "❌ Please upload a PDF file."
+    pdf_path = pdf_file.name
+    current_pdf_name = os.path.basename(pdf_path)
+    # Try rich parsing; fallback if poppler/pdfinfo is unavailable
+    try:
+        elements = partition_pdf(
+            filename=pdf_path,
+            strategy=PartitionStrategy.HI_RES,
+            extract_image_block_types=["Image", "Table"],
+            extract_image_block_output_dir=FIGURES_DIR,
+        )
+        text_elements = [el.text for el in elements if el.category not in ["Image","Table"] and el.text]
+        image_files = [os.path.join(FIGURES_DIR, f) for f in os.listdir(FIGURES_DIR)
+                       if f.lower().endswith((".png",".jpg",".jpeg"))]
+    except PDFInfoNotInstalledError:
+        # Fallback: text-only extraction
+        from PyPDF2 import PdfReader
+        reader = PdfReader(pdf_path)
+        text_elements = [page.extract_text() or "" for page in reader.pages]
+        image_files = []
+    # Caption images if any
+    captions = [generate_caption(img) for img in image_files]
+    combined_texts = text_elements + captions
+    vectors = embed_texts(combined_texts)
+    index = FAISS.from_embeddings(texts=combined_texts, embeddings=vectors)
+    retriever = index.as_retriever(search_kwargs={"k": 2})
+    return f"✅ Indexed '{current_pdf_name}' — {len(text_elements)} text blocks + {len(captions)} image captions"
+    """
+    Parse the PDF, caption images, combine text+captions, embed remotely,
     build FAISS index, and prepare retriever.
     """
     global current_pdf_name, retriever, combined_texts
     )
     retriever = index.as_retriever(search_kwargs={"k": 2})
+    return f"✅ Indexed '{current_pdf_name}' — {len(text_elements)} text blocks + {len(captions)} image captions"
 def ask_question(question: str) -> str:
+    """
+    Retrieve top-k chunks from FAISS and call chat_completions endpoint.
+    """
     global retriever
     if retriever is None:
         return "❌ Please upload and process a PDF first."
     os.makedirs(FIGURES_DIR, exist_ok=True)
     return ""
 # ── Gradio UI ────────────────────────────────────────────────────────────────
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")
 with gr.Blocks(theme=theme, css="""
                     outputs=[status_box, answer_output])
 if __name__ == "__main__":
+    demo.launch(debug=True, share=True)