Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on 23 days ago

Commit

2a4ba68

verified ·

1 Parent(s): 7133a05

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -52

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import os
 import shutil
-import PyPDF2
 import gradio as gr
 from PIL import Image
-from typing import List
 # Unstructured for rich PDF parsing
 from unstructured.partition.pdf import partition_pdf
 from unstructured.partition.utils.constants import PartitionStrategy
@@ -14,18 +15,14 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
 # Hugging Face Inference client
 from huggingface_hub import InferenceClient
-# LangChain vectorstore and embeddings
-from langchain_community.vectorstores import FAISS
-from langchain_huggingface import HuggingFaceEmbeddings
 # ── Globals ───────────────────────────────────────────────────────────────────
 retriever = None               # FAISS retriever for multimodal content
 current_pdf_name = None        # Name of the currently loaded PDF
 combined_texts: List[str] = [] # Combined text + image captions corpus
 # ── Setup: directories ─────────────────────────────────────────────────────────
 FIGURES_DIR = "figures"
@@ -38,7 +35,7 @@ hf = InferenceClient()  # uses HUGGINGFACEHUB_API_TOKEN env var
 # BLIP captioner
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 def generate_caption(image_path: str) -> str:
@@ -55,60 +52,68 @@ def embed_texts(texts: List[str]) -> List[List[float]]:
     return resp["embeddings"]
-def process_pdf(pdf_file) -> str:
     """
-    Parse PDF, extract text and images, caption images,
-    embed all chunks remotely, build FAISS index.
     """
-    global retriever, current_pdf_name, combined_texts
     if pdf_file is None:
-        return "❌ Please upload a PDF file."
-    pdf_path = pdf_file.name
-    current_pdf_name = os.path.basename(pdf_path)
-    # Attempt rich parsing
     try:
-        from pdf2image.exceptions import PDFInfoNotInstalledError
         elements = partition_pdf(
-            filename=pdf_path,
             strategy=PartitionStrategy.HI_RES,
-            extract_image_block_types=["Image","Table"],
             extract_image_block_output_dir=FIGURES_DIR,
         )
         text_elements = [el.text for el in elements if el.category not in ["Image","Table"] and el.text]
-        image_files = [os.path.join(FIGURES_DIR, f) for f in os.listdir(FIGURES_DIR)
-                       if f.lower().endswith((".png",".jpg",".jpeg"))]
     except Exception:
-        # Fallback to text-only
-        from pypdf import PdfReader
-        reader = PdfReader(pdf_path)
-        text_elements = [page.extract_text() or "" for page in reader.pages]
         image_files = []
     captions = [generate_caption(img) for img in image_files]
-    combined_texts = text_elements + captions
     vectors = embed_texts(combined_texts)
     index = FAISS.from_embeddings(texts=combined_texts, embeddings=vectors)
     retriever = index.as_retriever(search_kwargs={"k":2})
-    return f"✅ Indexed '{current_pdf_name}' — {len(text_elements)} text blocks + {len(captions)} image captions"
-def ask_question(question: str) -> str:
-    """Retrieve from FAISS and call chat completion."""
     global retriever
     if retriever is None:
-        return "❌ Please process a PDF first."
     if not question.strip():
         return "❌ Please enter a question."
     docs = retriever.get_relevant_documents(question)
     context = "\n\n".join(doc.page_content for doc in docs)
     prompt = (
-        "Use the following excerpts to answer the question:\n\n"
         f"{context}\n\nQuestion: {question}\nAnswer:"
     )
     response = hf.chat_completion(
@@ -120,34 +125,81 @@ def ask_question(question: str) -> str:
     return response["choices"][0]["message"]["content"].strip()
 def clear_interface():
-    """Reset all state and clear extracted images."""
-    global retriever, current_pdf_name, combined_texts
     retriever = None
     current_pdf_name = None
     combined_texts = []
     shutil.rmtree(FIGURES_DIR, ignore_errors=True)
     os.makedirs(FIGURES_DIR, exist_ok=True)
-    return ""
-# Gradio UI
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
-    gr.Markdown("# DocQueryAI (Remote‐RAG)")
     with gr.Row():
         with gr.Column():
-            pdf_file = gr.File(file_types=[".pdf"], type="filepath")
-            process_btn = gr.Button("Process PDF")
-            status_box = gr.Textbox(interactive=False)
         with gr.Column():
-            question_input = gr.Textbox(lines=3)
-            ask_btn = gr.Button("Ask")
-            answer_output = gr.Textbox(interactive=False)
-    clear_btn = gr.Button("Clear All")
-    process_btn.click(fn=process_pdf, inputs=[pdf_file], outputs=[status_box])
-    ask_btn.click(fn=ask_question, inputs=[question_input], outputs=[answer_output])
-    clear_btn.click(fn=clear_interface, outputs=[status_box, answer_output])
 if __name__ == "__main__":
-    demo.launch()

 import os
 import shutil
+from typing import List
 import gradio as gr
 from PIL import Image
 # Unstructured for rich PDF parsing
 from unstructured.partition.pdf import partition_pdf
 from unstructured.partition.utils.constants import PartitionStrategy
 # Hugging Face Inference client
 from huggingface_hub import InferenceClient
+# FAISS vectorstore
+from langchain.vectorstores.faiss import FAISS
 # ── Globals ───────────────────────────────────────────────────────────────────
 retriever = None               # FAISS retriever for multimodal content
 current_pdf_name = None        # Name of the currently loaded PDF
 combined_texts: List[str] = [] # Combined text + image captions corpus
+pdf_text: str = ""           # Full PDF text for summary/keywords
 # ── Setup: directories ─────────────────────────────────────────────────────────
 FIGURES_DIR = "figures"
 # BLIP captioner
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+blip_model     = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 def generate_caption(image_path: str) -> str:
     return resp["embeddings"]
+def process_pdf(pdf_file):
     """
+    Reads & extracts text and images from the PDF, captions images,
+    splits & embeds chunks, builds FAISS index, and stores full text.
+    Returns filename, status, and enables Q&A box.
     """
+    global retriever, current_pdf_name, combined_texts, pdf_text
     if pdf_file is None:
+        return None, "❌ Please upload a PDF file.", gr.update(interactive=False)
+    current_pdf_name = os.path.basename(pdf_file.name)
+    # extract full text for summary/keywords
+    from pypdf import PdfReader
+    reader = PdfReader(pdf_file.name)
+    pages = [page.extract_text() or "" for page in reader.pages]
+    pdf_text = "\n\n".join(pages)
+    # parse with unstructured for images
     try:
         elements = partition_pdf(
+            filename=pdf_file.name,
             strategy=PartitionStrategy.HI_RES,
+            extract_image_block_types=["Image", "Table"],
             extract_image_block_output_dir=FIGURES_DIR,
         )
         text_elements = [el.text for el in elements if el.category not in ["Image","Table"] and el.text]
+        image_files   = [os.path.join(FIGURES_DIR, f) for f in os.listdir(FIGURES_DIR)
+                         if f.lower().endswith((".png",".jpg",".jpeg"))]
     except Exception:
+        text_elements = pages
         image_files = []
     captions = [generate_caption(img) for img in image_files]
+    # split text elements into chunks
+    from langchain.text_splitter import CharacterTextSplitter
+    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    chunks = []
+    for t in text_elements:
+        chunks.extend(splitter.split_text(t))
+    combined_texts = chunks + captions
     vectors = embed_texts(combined_texts)
     index = FAISS.from_embeddings(texts=combined_texts, embeddings=vectors)
     retriever = index.as_retriever(search_kwargs={"k":2})
+    status = f"✅ Indexed '{current_pdf_name}' — {len(chunks)} text chunks + {len(captions)} image captions"
+    return current_pdf_name, status, gr.update(interactive=True)
+def ask_question(pdf_name, question):
+    """Retrieve relevant chunks and generate answer via remote LLM."""
     global retriever
     if retriever is None:
+        return "❌ Please upload and index a PDF first."
     if not question.strip():
         return "❌ Please enter a question."
     docs = retriever.get_relevant_documents(question)
     context = "\n\n".join(doc.page_content for doc in docs)
     prompt = (
+        "Use the following document excerpts to answer the question.\n\n"
         f"{context}\n\nQuestion: {question}\nAnswer:"
     )
     response = hf.chat_completion(
     return response["choices"][0]["message"]["content"].strip()
+def generate_summary():
+    """Ask remote LLM for concise summary using full text."""
+    if not pdf_text:
+        return "❌ Please upload and index a PDF first."
+    ctx = pdf_text[:2000]
+    resp = hf.chat_completion(
+        model="google/gemma-3-27b-it",
+        messages=[{"role":"user","content":f"Summarize concisely:\n\n{ctx}..."}],
+        max_tokens=150,
+        temperature=0.5,
+    )
+    return resp["choices"][0]["message"]["content"].strip()
+def extract_keywords():
+    """Ask remote LLM to extract key terms from full text."""
+    if not pdf_text:
+        return "❌ Please upload and index a PDF first."
+    ctx = pdf_text[:2000]
+    resp = hf.chat_completion(
+        model="google/gemma-3-27b-it",
+        messages=[{"role":"user","content":f"Extract 10-15 key terms:\n\n{ctx}..."}],
+        max_tokens=60,
+        temperature=0.5,
+    )
+    return resp["choices"][0]["message"]["content"].strip()
 def clear_interface():
+    """Reset state and clear extracted images."""
+    global retriever, current_pdf_name, combined_texts, pdf_text
     retriever = None
     current_pdf_name = None
     combined_texts = []
+    pdf_text = ""
     shutil.rmtree(FIGURES_DIR, ignore_errors=True)
     os.makedirs(FIGURES_DIR, exist_ok=True)
+    return None, "", gr.update(interactive=False)
+# ── Gradio UI ────────────────────────────────────────────────────────────────
+theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")
+with gr.Blocks(theme=theme, css="""
+    .container { border-radius: 10px; padding: 15px; }
+    .pdf-active { border-left: 3px solid #6366f1; padding-left: 10px; background-color: rgba(99,102,241,0.1); }
+    .footer { text-align: center; margin-top: 30px; font-size: 0.8em; color: #666; }
+    .main-title { text-align: center; font-size: 64px; font-weight: bold; margin-bottom: 20px; }
+""") as demo:
+    gr.Markdown("<div class='main-title'>DocQueryAI (Multimodal RAG)</div>")
     with gr.Row():
         with gr.Column():
+            gr.Markdown("## 📄 Document Input")
+            pdf_display   = gr.Textbox(label="Active Document", interactive=False, elem_classes="pdf-active")
+            pdf_file      = gr.File(file_types=[".pdf"], type="filepath")
+            upload_button = gr.Button("📤 Process Document", variant="primary")
+            status_box    = gr.Textbox(label="Status", interactive=False)
         with gr.Column():
+            gr.Markdown("## ❓ Ask Questions")
+            question_input = gr.Textbox(lines=3, placeholder="Enter your question here…", interactive=False)
+            ask_button     = gr.Button("🔍 Ask Question", variant="primary", interactive=False)
+            answer_output  = gr.Textbox(label="Answer", lines=8, interactive=False)
+    with gr.Row():
+        summary_button  = gr.Button("📋 Generate Summary", variant="secondary", interactive=False)
+        summary_output  = gr.Textbox(label="Summary", lines=4, interactive=False)
+        keywords_button = gr.Button("🏷️ Extract Keywords", variant="secondary", interactive=False)
+        keywords_output = gr.Textbox(label="Keywords", lines=4, interactive=False)
+    clear_button = gr.Button("🗑️ Clear All", variant="secondary")
+    gr.Markdown("<div class='footer'>Powered by HF Inference + FAISS + BLIP | Gradio</div>")
+    upload_button.click(process_pdf, [pdf_file], [pdf_display, status_box, question_input])
+    ask_button.click(ask_question, [pdf_display, question_input], answer_output)
+    summary_button.click(generate_summary, [], summary_output)
+    keywords_button.click(extract_keywords, [], keywords_output)
+    clear_button.click(clear_interface, [], [pdf_display, status_box, question_input])
 if __name__ == "__main__":
+    demo.launch(debug=True)