Spaces:

Muzammil6376
/

Multimodal

Sleeping

App Files Files Community

Muzammil6376 commited on May 21

Commit

dcc36ef

verified ·

1 Parent(s): eefc0bf

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -11

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import gradio as gr
 from PIL import Image
 from huggingface_hub import InferenceClient
-# ✅ Community imports to avoid deprecation
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceEndpoint
@@ -29,13 +29,16 @@ FIG_DIR.mkdir(exist_ok=True)
 hf_token = os.environ["HF_TOKEN"]
 # ————— Embeddings & LLM Setup —————
-embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # LLM via HF Inference API endpoint
 llm = HuggingFaceEndpoint(
     endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
     huggingfacehub_api_token=hf_token,
-    model_kwargs={"temperature": 0.5, "max_length": 512},
 )
 # Prompt
@@ -48,7 +51,10 @@ Answer (up to 3 sentences):
 prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
 # Inference client for image captioning
-vision_client = InferenceClient("Salesforce/blip-image-captioning-base", token=hf_token)
 # Globals (will initialize after processing)
 vector_store = None
@@ -56,21 +62,17 @@ qa_chain = None
 def extract_image_caption(path: str) -> str:
-    """Return an autogenerated caption for an image file."""
     with Image.open(path) as img:
         return vision_client.image_to_text(img)
 def process_pdf(pdf_file) -> str:
-    """Save, parse, chunk, embed & index a PDF (text + images)."""
     global vector_store, qa_chain
-    # 1️⃣ Save PDF
     out_path = PDF_DIR / pdf_file.name
     with open(out_path, "wb") as f:
         f.write(pdf_file.read())
-    # 2️⃣ Partition into text + image blocks
     elems = partition_pdf(
         str(out_path),
         strategy=PartitionStrategy.HI_RES,
@@ -78,14 +80,11 @@ def process_pdf(pdf_file) -> str:
         extract_image_block_output_dir=str(FIG_DIR),
     )
-    # 3️⃣ Collect text
     texts = [el.text for el in elems if el.category not in ("Image", "Table")]
-    # 4️⃣ Caption each image
     for img_file in FIG_DIR.iterdir():
         texts.append(extract_image_caption(str(img_file)))
-    # 5️⃣ Split & index
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     docs = splitter.split_text("\n\n".join(texts))

 from PIL import Image
 from huggingface_hub import InferenceClient
+# ✅ Community imports
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.llms import HuggingFaceEndpoint
 hf_token = os.environ["HF_TOKEN"]
 # ————— Embeddings & LLM Setup —————
+embedding_model = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2"
+)
 # LLM via HF Inference API endpoint
 llm = HuggingFaceEndpoint(
     endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
     huggingfacehub_api_token=hf_token,
+    temperature=0.5,
+    max_length=512,
 )
 # Prompt
 prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
 # Inference client for image captioning
+vision_client = InferenceClient(
+    repo_id="Salesforce/blip-image-captioning-base",
+    token=hf_token,
+)
 # Globals (will initialize after processing)
 vector_store = None
 def extract_image_caption(path: str) -> str:
     with Image.open(path) as img:
         return vision_client.image_to_text(img)
 def process_pdf(pdf_file) -> str:
     global vector_store, qa_chain
     out_path = PDF_DIR / pdf_file.name
     with open(out_path, "wb") as f:
         f.write(pdf_file.read())
     elems = partition_pdf(
         str(out_path),
         strategy=PartitionStrategy.HI_RES,
         extract_image_block_output_dir=str(FIG_DIR),
     )
     texts = [el.text for el in elems if el.category not in ("Image", "Table")]
     for img_file in FIG_DIR.iterdir():
         texts.append(extract_image_caption(str(img_file)))
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     docs = splitter.split_text("\n\n".join(texts))