Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

ikraamkb commited on Apr 7

Commit

d1b0e84

verified ·

1 Parent(s): 1e83db4

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -4,11 +4,15 @@ from PIL import Image
 from fastapi import FastAPI
 from starlette.responses import RedirectResponse
-# Load models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-# FastAPI app
 app = FastAPI()
 def analyze_input(file, question=None):
@@ -17,22 +21,25 @@ def analyze_input(file, question=None):
     filename = file.name.lower()
     if filename.endswith((".png", ".jpg", ".jpeg")):
         image = Image.open(file)
         caption = image_captioner(image)[0]['generated_text']
         return f"📷 Image Interpretation:\n{caption}"
     elif filename.endswith((".pdf", ".docx", ".pptx", ".xlsx")):
-        from PyPDF2 import PdfReader
         import docx
         import pptx
         import pandas as pd
         try:
             text = ""
             if filename.endswith(".pdf"):
-                reader = PdfReader(file)
-                text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
             elif filename.endswith(".docx"):
                 doc = docx.Document(file)
@@ -50,7 +57,7 @@ def analyze_input(file, question=None):
                 text = "\n".join([df[sheet].to_string() for sheet in df])
             if not text.strip():
-                return "Could not extract meaningful text from the document."
             summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False)
             return f"📄 Document Summary:\n{summary[0]['summary_text']}"
@@ -59,9 +66,9 @@ def analyze_input(file, question=None):
             return f"❌ Error processing document: {str(e)}"
     else:
-        return "Unsupported file type. Please upload a valid image or document."
-# Gradio Interface
 iface = gr.Interface(
     fn=analyze_input,
     inputs=gr.File(label="Upload Document or Image"),
@@ -70,12 +77,13 @@ iface = gr.Interface(
     description="Upload a document (PDF, DOCX, PPTX, XLSX) to get a summary or an image to get a caption. Runs fully on CPU."
 )
-# Wrap in TabbedInterface (even if only one for now)
 demo = gr.TabbedInterface([iface], ["Docs and Images"])
-# Mount to FastAPI app
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
     return RedirectResponse(url="/")

 from fastapi import FastAPI
 from starlette.responses import RedirectResponse
+# ✅ Patch for Pydantic v2 schema compatibility (important for FastAPI + Gradio)
+from pydantic import BaseModel
+BaseModel.model_config = {"arbitrary_types_allowed": True}
+# ✅ Load models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+# ✅ Initialize FastAPI app
 app = FastAPI()
 def analyze_input(file, question=None):
     filename = file.name.lower()
+    # 📷 Image Processing
     if filename.endswith((".png", ".jpg", ".jpeg")):
         image = Image.open(file)
         caption = image_captioner(image)[0]['generated_text']
         return f"📷 Image Interpretation:\n{caption}"
+    # 📄 Document Processing
     elif filename.endswith((".pdf", ".docx", ".pptx", ".xlsx")):
+        import pdfplumber
         import docx
         import pptx
         import pandas as pd
         try:
             text = ""
             if filename.endswith(".pdf"):
+                with pdfplumber.open(file) as pdf:
+                    text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
             elif filename.endswith(".docx"):
                 doc = docx.Document(file)
                 text = "\n".join([df[sheet].to_string() for sheet in df])
             if not text.strip():
+                return "❌ Could not extract meaningful text from the document."
             summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False)
             return f"📄 Document Summary:\n{summary[0]['summary_text']}"
             return f"❌ Error processing document: {str(e)}"
     else:
+        return "❌ Unsupported file type. Please upload a valid image or document."
+# ✅ Gradio Interface
 iface = gr.Interface(
     fn=analyze_input,
     inputs=gr.File(label="Upload Document or Image"),
     description="Upload a document (PDF, DOCX, PPTX, XLSX) to get a summary or an image to get a caption. Runs fully on CPU."
 )
+# ✅ Wrap in a Tabbed Interface
 demo = gr.TabbedInterface([iface], ["Docs and Images"])
+# ✅ Mount to FastAPI app
 app = gr.mount_gradio_app(app, demo, path="/")
+# ✅ Redirect base URL to Gradio app
 @app.get("/")
 def home():
     return RedirectResponse(url="/")