Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on May 21

Commit

3fdd093

verified ·

1 Parent(s): 40696fb

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -58

app.py CHANGED Viewed

@@ -1,96 +1,121 @@
 import os
 import gradio as gr
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.llms import HuggingFaceHub
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from unstructured.partition.pdf import partition_pdf
 from unstructured.partition.utils.constants import PartitionStrategy
-from huggingface_hub import InferenceClient
-from PIL import Image
-# Directories
-PDF_DIR = "pdfs"
-FIGURE_DIR = "figures"
-os.makedirs(PDF_DIR, exist_ok=True)
-os.makedirs(FIGURE_DIR, exist_ok=True)
-# Embeddings and Model Setup
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vector_store = FAISS.from_texts([], embedding_model)
-llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.5, "max_length": 512})
-template = """
-Use the following context to answer the question. If the answer is unknown, say so.
 Context: {context}
 Question: {question}
-Answer (3 sentences max):
 """
-prompt = PromptTemplate(template=template, input_variables=["context", "question"])
-qa_chain = RetrievalQA.from_chain_type(
-    llm=llm,
-    retriever=vector_store.as_retriever(),
-    chain_type_kwargs={"prompt": prompt}
-)
-# Hugging Face Inference API Client (for image captioning, etc.)
-vision_model = InferenceClient("Salesforce/blip-image-captioning-base")
-def extract_image_text(file_path):
-    with Image.open(file_path) as img:
-        caption = vision_model.image_to_text(img)
-    return caption
-def process_pdf(file):
-    pdf_path = os.path.join(PDF_DIR, file.name)
-    with open(pdf_path, "wb") as f:
-        f.write(file.read())
-    elements = partition_pdf(
-        pdf_path,
         strategy=PartitionStrategy.HI_RES,
         extract_image_block_types=["Image", "Table"],
-        extract_image_block_output_dir=FIGURE_DIR
     )
-    texts = [el.text for el in elements if el.category not in ["Image", "Table"]]
-    for fig_file in os.listdir(FIGURE_DIR):
-        fig_path = os.path.join(FIGURE_DIR, fig_file)
-        caption = extract_image_text(fig_path)
-        texts.append(caption)
-    full_text = "\n\n".join(texts)
-    # Chunking
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    docs = splitter.split_text(full_text)
-    vector_store.add_texts(docs)
-    return f"Processed {file.name} with {len(docs)} text chunks."
-def answer_query(question):
     return qa_chain.run(question)
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄📷 Multimodal RAG with Hugging Face")
     with gr.Row():
-        file_input = gr.File(label="Upload PDF", type="file")
-        upload_btn = gr.Button("Process PDF")
-        status = gr.Textbox(label="Processing Status")
     with gr.Row():
-        question = gr.Textbox(label="Ask a Question")
-        ask_btn = gr.Button("Get Answer")
-        answer_box = gr.Textbox(label="Answer")
-    upload_btn.click(fn=process_pdf, inputs=file_input, outputs=status)
-    ask_btn.click(fn=answer_query, inputs=question, outputs=answer_box)
-demo.launch()

+# app.py
 import os
+from pathlib import Path
 import gradio as gr
+from PIL import Image
+from huggingface_hub import InferenceClient
+# ✅ Use community packages to avoid deprecation warnings
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import HuggingFaceHub
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from unstructured.partition.pdf import partition_pdf
 from unstructured.partition.utils.constants import PartitionStrategy
+# ————— Config & Folders —————
+PDF_DIR = Path("pdfs")
+FIG_DIR = Path("figures")
+PDF_DIR.mkdir(exist_ok=True)
+FIG_DIR.mkdir(exist_ok=True)
+# ————— Embeddings & LLM Setup —————
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# LLM via Hugging Face Inference API
+llm = HuggingFaceHub(
+    repo_id="google/flan-t5-base",
+    model_kwargs={"temperature": 0.5, "max_length": 512}
+)
+# Prompt
+TEMPLATE = """
+Use the following context to answer the question. If unknown, say so.
 Context: {context}
 Question: {question}
+Answer (up to 3 sentences):
 """
+prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
+# Inference client for image captioning
+vision_client = InferenceClient("Salesforce/blip-image-captioning-base")
+# Globals (will set after processing)
+vector_store = None
+qa_chain = None
+def extract_image_caption(path: str) -> str:
+    """Return an autogenerated caption for an image file."""
+    with Image.open(path) as img:
+        return vision_client.image_to_text(img)
+def process_pdf(pdf_file) -> str:
+    """Save, parse, chunk, embed & index a PDF (text + images)."""
+    global vector_store, qa_chain
+    # 1️⃣ Save PDF
+    out_path = PDF_DIR / pdf_file.name
+    with open(out_path, "wb") as f:
+        f.write(pdf_file.read())
+    # 2️⃣ Partition into text + image blocks
+    elems = partition_pdf(
+        str(out_path),
         strategy=PartitionStrategy.HI_RES,
         extract_image_block_types=["Image", "Table"],
+        extract_image_block_output_dir=str(FIG_DIR),
     )
+    # 3️⃣ Collect text
+    texts = [el.text for el in elems if el.category not in ("Image", "Table")]
+    # 4️⃣ Caption each image
+    for img_file in FIG_DIR.iterdir():
+        texts.append(extract_image_caption(str(img_file)))
+    # 5️⃣ Split & index
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    docs = splitter.split_text("\n\n".join(texts))
+    vector_store = FAISS.from_texts(docs, embedding_model)
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=vector_store.as_retriever(),
+        chain_type_kwargs={"prompt": prompt},
+    )
+    return f"✅ Processed `{pdf_file.name}` into {len(docs)} chunks."
+def answer_query(question: str) -> str:
+    if qa_chain is None:
+        return "❗ Please upload and process a PDF first."
     return qa_chain.run(question)
+# ————— Gradio UI —————
 with gr.Blocks() as demo:
+    gr.Markdown("## 📄📷 Multimodal RAG — Hugging Face Spaces")
     with gr.Row():
+        pdf_in = gr.File(label="Upload PDF", type="file")
+        btn_proc = gr.Button("Process PDF")
+        status = gr.Textbox(label="Status")
     with gr.Row():
+        q_in = gr.Textbox(label="Your Question")
+        btn_ask = gr.Button("Ask")
+        ans_out = gr.Textbox(label="Answer")
+    btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
+    btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
+if __name__ == "__main__":
+    demo.launch()