Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on May 21

Commit

40696fb

verified ·

1 Parent(s): ced2810

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -68

app.py CHANGED Viewed

@@ -1,95 +1,96 @@
 import os
-import tempfile
 import gradio as gr
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import UnstructuredPDFLoader
-from langchain.chains import RetrievalQA
 from langchain.llms import HuggingFaceHub
 from PIL import Image
-from transformers import pipeline
-# Directories for temporary storage
-FIGURES_DIR = tempfile.mkdtemp(prefix="figures_")
-# Configure Hugging Face
-HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-# Initialize embeddings and vector store
-embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vector_store = None
-# Initialize image captioning pipeline
-captioner = pipeline("image-to-text", model="Salesforce/blip2-flan-t5-xl", use_auth_token=HUGGINGFACEHUB_API_TOKEN)
-# Initialize LLM for QA
-llm = HuggingFaceHub(
-    repo_id="google/flan-t5-xxl",
-    model_kwargs={"temperature":0.0, "max_length":256},
-    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
 )
-# Helper functions
-def process_pdf(pdf_file):
-    # Load text content
-    loader = UnstructuredPDFLoader(pdf_file.name)
-    docs = loader.load()
-    # Basic text from PDF
-    raw_text = "\n".join([d.page_content for d in docs])
-    # Optionally extract images and caption them
-    # Here, we simply caption any embedded images
-    captions = []
-    # (In a real pipeline, extract and save images separately)
-    # For demo, we skip actual image files extraction
-    # Combine text and captions
-    combined = raw_text + "\n\n" + "\n".join(captions)
-    return combined
-def build_index(text):
-    global vector_store
-    # Split into chunks
-    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    chunks = splitter.split_text(text)
-    # Create or update FAISS index
-    vector_store = FAISS.from_texts(chunks, embeddings)
-def answer_query(query):
-    qa = RetrievalQA.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=vector_store.as_retriever()
-    )
-    return qa.run(query)
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# Multimodal RAG QA App")
     with gr.Row():
-        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"] )
-        question_input = gr.Textbox(label="Ask a question", placeholder="Enter your question here...")
-    output = gr.Textbox(label="Answer", interactive=False)
-    def on_submit(pdf, question):
-        if pdf is not None:
-            text = process_pdf(pdf)
-            build_index(text)
-        if not question:
-            return "Please enter a question."
-        return answer_query(question)
-    submit_btn = gr.Button("Get Answer")
-    submit_btn.click(on_submit, inputs=[pdf_input, question_input], outputs=output)
-if __name__ == "__main__":
-    demo.launch()

 import os
 import gradio as gr
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.llms import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from unstructured.partition.pdf import partition_pdf
+from unstructured.partition.utils.constants import PartitionStrategy
+from huggingface_hub import InferenceClient
 from PIL import Image
+# Directories
+PDF_DIR = "pdfs"
+FIGURE_DIR = "figures"
+os.makedirs(PDF_DIR, exist_ok=True)
+os.makedirs(FIGURE_DIR, exist_ok=True)
+# Embeddings and Model Setup
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+vector_store = FAISS.from_texts([], embedding_model)
+llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.5, "max_length": 512})
+template = """
+Use the following context to answer the question. If the answer is unknown, say so.
+Context: {context}
+Question: {question}
+Answer (3 sentences max):
+"""
+prompt = PromptTemplate(template=template, input_variables=["context", "question"])
+qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    retriever=vector_store.as_retriever(),
+    chain_type_kwargs={"prompt": prompt}
 )
+# Hugging Face Inference API Client (for image captioning, etc.)
+vision_model = InferenceClient("Salesforce/blip-image-captioning-base")
+def extract_image_text(file_path):
+    with Image.open(file_path) as img:
+        caption = vision_model.image_to_text(img)
+    return caption
+def process_pdf(file):
+    pdf_path = os.path.join(PDF_DIR, file.name)
+    with open(pdf_path, "wb") as f:
+        f.write(file.read())
+    elements = partition_pdf(
+        pdf_path,
+        strategy=PartitionStrategy.HI_RES,
+        extract_image_block_types=["Image", "Table"],
+        extract_image_block_output_dir=FIGURE_DIR
+    )
+    texts = [el.text for el in elements if el.category not in ["Image", "Table"]]
+    for fig_file in os.listdir(FIGURE_DIR):
+        fig_path = os.path.join(FIGURE_DIR, fig_file)
+        caption = extract_image_text(fig_path)
+        texts.append(caption)
+    full_text = "\n\n".join(texts)
+    # Chunking
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    docs = splitter.split_text(full_text)
+    vector_store.add_texts(docs)
+    return f"Processed {file.name} with {len(docs)} text chunks."
+def answer_query(question):
+    return qa_chain.run(question)
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄📷 Multimodal RAG with Hugging Face")
     with gr.Row():
+        file_input = gr.File(label="Upload PDF", type="file")
+        upload_btn = gr.Button("Process PDF")
+        status = gr.Textbox(label="Processing Status")
+    with gr.Row():
+        question = gr.Textbox(label="Ask a Question")
+        ask_btn = gr.Button("Get Answer")
+        answer_box = gr.Textbox(label="Answer")
+    upload_btn.click(fn=process_pdf, inputs=file_input, outputs=status)
+    ask_btn.click(fn=answer_query, inputs=question, outputs=answer_box)
+demo.launch()