Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on May 21

Commit

cb3c155

verified ·

1 Parent(s): dcc36ef

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -27

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import gradio as gr
 from PIL import Image
 from huggingface_hub import InferenceClient
-# ✅ Community imports
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceEndpoint
@@ -20,20 +19,15 @@ from unstructured.partition.pdf import partition_pdf
 from unstructured.partition.utils.constants import PartitionStrategy
 # ————— Config & Folders —————
-PDF_DIR = Path("pdfs")
-FIG_DIR = Path("figures")
-PDF_DIR.mkdir(exist_ok=True)
-FIG_DIR.mkdir(exist_ok=True)
 # ————— Read your HF_TOKEN secret —————
 hf_token = os.environ["HF_TOKEN"]
 # ————— Embeddings & LLM Setup —————
-embedding_model = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-MiniLM-L6-v2"
-)
-# LLM via HF Inference API endpoint
 llm = HuggingFaceEndpoint(
     endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
     huggingfacehub_api_token=hf_token,
@@ -41,7 +35,6 @@ llm = HuggingFaceEndpoint(
     max_length=512,
 )
-# Prompt
 TEMPLATE = """
 Use the following context to answer the question. If unknown, say so.
 Context: {context}
@@ -50,22 +43,19 @@ Answer (up to 3 sentences):
 """
 prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
-# Inference client for image captioning
 vision_client = InferenceClient(
-    repo_id="Salesforce/blip-image-captioning-base",
     token=hf_token,
 )
-# Globals (will initialize after processing)
 vector_store = None
 qa_chain = None
 def extract_image_caption(path: str) -> str:
     with Image.open(path) as img:
         return vision_client.image_to_text(img)
 def process_pdf(pdf_file) -> str:
     global vector_store, qa_chain
@@ -81,7 +71,6 @@ def process_pdf(pdf_file) -> str:
     )
     texts = [el.text for el in elems if el.category not in ("Image", "Table")]
     for img_file in FIG_DIR.iterdir():
         texts.append(extract_image_caption(str(img_file)))
@@ -97,27 +86,19 @@ def process_pdf(pdf_file) -> str:
     return f"✅ Processed `{pdf_file.name}` into {len(docs)} chunks."
 def answer_query(question: str) -> str:
     if qa_chain is None:
         return "❗ Please upload and process a PDF first."
     return qa_chain.run(question)
-# ————— Gradio UI —————
 with gr.Blocks() as demo:
-    gr.Markdown("## 📄📷 Multimodal RAG — Hugging Face Spaces")
     with gr.Row():
         pdf_in = gr.File(label="Upload PDF", type="file")
-        btn_proc = gr.Button("Process PDF")
-        status = gr.Textbox(label="Status")
     with gr.Row():
         q_in = gr.Textbox(label="Your Question")
-        btn_ask = gr.Button("Ask")
-        ans_out = gr.Textbox(label="Answer")
     btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
     btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)

 from PIL import Image
 from huggingface_hub import InferenceClient
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceEndpoint
 from unstructured.partition.utils.constants import PartitionStrategy
 # ————— Config & Folders —————
+PDF_DIR = Path("pdfs"); FIG_DIR = Path("figures")
+PDF_DIR.mkdir(exist_ok=True); FIG_DIR.mkdir(exist_ok=True)
 # ————— Read your HF_TOKEN secret —————
 hf_token = os.environ["HF_TOKEN"]
 # ————— Embeddings & LLM Setup —————
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 llm = HuggingFaceEndpoint(
     endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
     huggingfacehub_api_token=hf_token,
     max_length=512,
 )
 TEMPLATE = """
 Use the following context to answer the question. If unknown, say so.
 Context: {context}
 """
 prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
+# ————— FIXED: correct keyword for InferenceClient —————
 vision_client = InferenceClient(
+    model="Salesforce/blip-image-captioning-base",
     token=hf_token,
 )
 vector_store = None
 qa_chain = None
 def extract_image_caption(path: str) -> str:
     with Image.open(path) as img:
         return vision_client.image_to_text(img)
 def process_pdf(pdf_file) -> str:
     global vector_store, qa_chain
     )
     texts = [el.text for el in elems if el.category not in ("Image", "Table")]
     for img_file in FIG_DIR.iterdir():
         texts.append(extract_image_caption(str(img_file)))
     return f"✅ Processed `{pdf_file.name}` into {len(docs)} chunks."
 def answer_query(question: str) -> str:
     if qa_chain is None:
         return "❗ Please upload and process a PDF first."
     return qa_chain.run(question)
 with gr.Blocks() as demo:
+    gr.Markdown("## 📄📷 Multimodal RAG — HF Spaces")
     with gr.Row():
         pdf_in = gr.File(label="Upload PDF", type="file")
+        btn_proc = gr.Button("Process PDF"); status = gr.Textbox(label="Status")
     with gr.Row():
         q_in = gr.Textbox(label="Your Question")
+        btn_ask = gr.Button("Ask"); ans_out = gr.Textbox(label="Answer")
     btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
     btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)