Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

ikraamkb commited on 17 days ago

Commit

653c3ae

verified ·

1 Parent(s): 3fb07d9

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -57

app.py CHANGED Viewed

@@ -1,49 +1,47 @@
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
-from transformers import pipeline
-from PIL import Image
 import fitz  # PyMuPDF
 import docx
 import pptx
 import openpyxl
 import io
 import gradio as gr
-# Initialize models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-# FastAPI app
 app = FastAPI()
 # -------------------------
-# Helper Functions
 # -------------------------
-def extract_text_from_pdf(upload):
     try:
-        file_bytes = upload.read()
-        stream = io.BytesIO(file_bytes)
-        with fitz.open(stream=stream, filetype="pdf") as doc:
             return "\n".join([page.get_text() for page in doc])
     except Exception as e:
-        return f"❌ PDF extraction error: {e}"
-def extract_text_from_docx(upload):
     try:
-        file_bytes = upload.read()
-        stream = io.BytesIO(file_bytes)
-        doc = docx.Document(stream)
         return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
     except Exception as e:
-        return f"❌ DOCX extraction error: {e}"
-def extract_text_from_pptx(upload):
     try:
-        file_bytes = upload.read()
-        stream = io.BytesIO(file_bytes)
-        prs = pptx.Presentation(stream)
         text = []
         for slide in prs.slides:
             for shape in slide.shapes:
@@ -51,46 +49,43 @@ def extract_text_from_pptx(upload):
                     text.append(shape.text)
         return "\n".join(text)
     except Exception as e:
-        return f"❌ PPTX extraction error: {e}"
-def extract_text_from_xlsx(upload):
     try:
-        file_bytes = upload.read()
-        stream = io.BytesIO(file_bytes)
-        wb = openpyxl.load_workbook(stream)
         text = []
         for sheet in wb.sheetnames:
             ws = wb[sheet]
             for row in ws.iter_rows(values_only=True):
-                text.append(" ".join(str(cell) for cell in row if cell))
         return "\n".join(text)
     except Exception as e:
-        return f"❌ XLSX extraction error: {e}"
 # -------------------------
-# Core Functions
 # -------------------------
-def summarize_document(upload):
-    if not upload:
-        return "⚠️ No file uploaded."
-    ext = upload.name.lower()
-    upload.seek(0)
-    if ext.endswith(".pdf"):
-        text = extract_text_from_pdf(upload)
-    elif ext.endswith(".docx"):
-        text = extract_text_from_docx(upload)
-    elif ext.endswith(".pptx"):
-        text = extract_text_from_pptx(upload)
-    elif ext.endswith(".xlsx"):
-        text = extract_text_from_xlsx(upload)
     else:
-        return "❌ Unsupported file type."
-    if not text or not text.strip() or text.startswith("❌"):
-        return text if text.startswith("❌") else "❗ No extractable text found."
     try:
         summary = summarizer(text[:3000], max_length=150, min_length=30, do_sample=False)
@@ -99,34 +94,36 @@ def summarize_document(upload):
         return f"⚠️ Summarization error: {e}"
 def interpret_image(image):
-    if not image:
-        return "⚠️ No image uploaded."
     try:
         return f"🖼️ Caption:\n{image_captioner(image)[0]['generated_text']}"
     except Exception as e:
         return f"⚠️ Image captioning error: {e}"
 # -------------------------
-# Gradio Interface
 # -------------------------
-doc_ui = gr.Interface(
     fn=summarize_document,
-    inputs=gr.File(label="Upload a Document (PDF, DOCX, PPTX, XLSX)"),
-    outputs=gr.Textbox(label="Summary"),
     title="📄 Document Summarizer"
 )
-img_ui = gr.Interface(
     fn=interpret_image,
     inputs=gr.Image(type="pil", label="Upload an Image"),
-    outputs=gr.Textbox(label="Caption"),
-    title="🖼️ Image Interpreter"
 )
-demo = gr.TabbedInterface([doc_ui, img_ui], ["Document Summarization", "Image Captioning"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
-def redirect_to_ui():
     return RedirectResponse(url="/")

 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 import fitz  # PyMuPDF
 import docx
 import pptx
 import openpyxl
 import io
+from PIL import Image
 import gradio as gr
+from transformers import pipeline
+# Load models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 app = FastAPI()
 # -------------------------
+# File Text Extractors
 # -------------------------
+def extract_text_from_pdf(file):
     try:
+        file.seek(0)
+        data = file.read()
+        with fitz.open(stream=data, filetype="pdf") as doc:
             return "\n".join([page.get_text() for page in doc])
     except Exception as e:
+        return f"❌ PDF error: {e}"
+def extract_text_from_docx(file):
     try:
+        file.seek(0)
+        data = file.read()
+        doc = docx.Document(io.BytesIO(data))
         return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
     except Exception as e:
+        return f"❌ DOCX error: {e}"
+def extract_text_from_pptx(file):
     try:
+        file.seek(0)
+        data = file.read()
+        prs = pptx.Presentation(io.BytesIO(data))
         text = []
         for slide in prs.slides:
             for shape in slide.shapes:
                     text.append(shape.text)
         return "\n".join(text)
     except Exception as e:
+        return f"❌ PPTX error: {e}"
+def extract_text_from_xlsx(file):
     try:
+        file.seek(0)
+        data = file.read()
+        wb = openpyxl.load_workbook(io.BytesIO(data))
         text = []
         for sheet in wb.sheetnames:
             ws = wb[sheet]
             for row in ws.iter_rows(values_only=True):
+                line = " ".join(str(cell) for cell in row if cell)
+                text.append(line)
         return "\n".join(text)
     except Exception as e:
+        return f"❌ XLSX error: {e}"
 # -------------------------
+# Main Logic
 # -------------------------
+def summarize_document(file):
+    filename = file.name.lower()
+    if filename.endswith(".pdf"):
+        text = extract_text_from_pdf(file)
+    elif filename.endswith(".docx"):
+        text = extract_text_from_docx(file)
+    elif filename.endswith(".pptx"):
+        text = extract_text_from_pptx(file)
+    elif filename.endswith(".xlsx"):
+        text = extract_text_from_xlsx(file)
     else:
+        return "❌ Unsupported file format."
+    if not text.strip():
+        return "❗ No extractable text."
     try:
         summary = summarizer(text[:3000], max_length=150, min_length=30, do_sample=False)
         return f"⚠️ Summarization error: {e}"
 def interpret_image(image):
     try:
         return f"🖼️ Caption:\n{image_captioner(image)[0]['generated_text']}"
     except Exception as e:
         return f"⚠️ Image captioning error: {e}"
 # -------------------------
+# Gradio Interfaces
 # -------------------------
+doc_summary = gr.Interface(
     fn=summarize_document,
+    inputs=gr.File(label="Upload a Document"),
+    outputs="text",
     title="📄 Document Summarizer"
 )
+img_caption = gr.Interface(
     fn=interpret_image,
     inputs=gr.Image(type="pil", label="Upload an Image"),
+    outputs="text",
+    title="🖼️ Image Captioning"
 )
+# -------------------------
+# Launch via FastAPI
+# -------------------------
+demo = gr.TabbedInterface([doc_summary, img_caption], ["Document Summary", "Image Captioning"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
+def root():
     return RedirectResponse(url="/")