Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

ikraamkb commited on 17 days ago

Commit

af32fa4

verified ·

1 Parent(s): 95c2451

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -28

app.py CHANGED Viewed

@@ -6,49 +6,60 @@ import openpyxl
 import pptx
 import io
 from PIL import Image
 import gradio as gr
 from transformers import pipeline
-# Models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 app = FastAPI()
 # -------------------------
-# Document Extraction Utils
 # -------------------------
 def extract_text_from_pdf(file_bytes):
-    text = ""
-    with fitz.open(stream=file_bytes, filetype="pdf") as doc:
-        for page in doc:
-            text += page.get_text()
-    return text
 def extract_text_from_docx(file_bytes):
-    doc = docx.Document(io.BytesIO(file_bytes))
-    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
 def extract_text_from_pptx(file_bytes):
-    text = []
-    prs = pptx.Presentation(io.BytesIO(file_bytes))
-    for slide in prs.slides:
-        for shape in slide.shapes:
-            if hasattr(shape, "text"):
-                text.append(shape.text)
-    return "\n".join(text)
 def extract_text_from_xlsx(file_bytes):
-    wb = openpyxl.load_workbook(io.BytesIO(file_bytes))
-    text = []
-    for sheet in wb.sheetnames:
-        ws = wb[sheet]
-        for row in ws.iter_rows(values_only=True):
-            line = " ".join(str(cell) for cell in row if cell)
-            text.append(line)
-    return "\n".join(text)
 def summarize_document(file):
     file_bytes = file.read()
     filename = getattr(file, "name", "").lower()
@@ -74,8 +85,6 @@ def summarize_document(file):
         return f"⚠️ Summarization error: {e}"
 def interpret_image(image):
-    if image is None:
-        return "No image uploaded."
     try:
         return f"🖼️ Caption:\n{image_captioner(image)[0]['generated_text']}"
     except Exception as e:
@@ -99,7 +108,7 @@ img_caption = gr.Interface(
 )
 # -------------------------
-# Combine into Gradio + FastAPI
 # -------------------------
 demo = gr.TabbedInterface([doc_summary, img_caption], ["Document Summary", "Image Captioning"])
 app = gr.mount_gradio_app(app, demo, path="/")

 import pptx
 import io
 from PIL import Image
 import gradio as gr
 from transformers import pipeline
+# Load models
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 app = FastAPI()
 # -------------------------
+# Extraction Functions
 # -------------------------
 def extract_text_from_pdf(file_bytes):
+    try:
+        with fitz.open(stream=file_bytes, filetype="pdf") as doc:
+            return "\n".join([page.get_text() for page in doc])
+    except Exception as e:
+        return f"❌ PDF extraction error: {e}"
 def extract_text_from_docx(file_bytes):
+    try:
+        doc = docx.Document(io.BytesIO(file_bytes))
+        return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
+    except Exception as e:
+        return f"❌ DOCX extraction error: {e}"
 def extract_text_from_pptx(file_bytes):
+    try:
+        prs = pptx.Presentation(io.BytesIO(file_bytes))
+        text = []
+        for slide in prs.slides:
+            for shape in slide.shapes:
+                if hasattr(shape, "text"):
+                    text.append(shape.text)
+        return "\n".join(text)
+    except Exception as e:
+        return f"❌ PPTX extraction error: {e}"
 def extract_text_from_xlsx(file_bytes):
+    try:
+        wb = openpyxl.load_workbook(io.BytesIO(file_bytes))
+        text = []
+        for sheet in wb.sheetnames:
+            ws = wb[sheet]
+            for row in ws.iter_rows(values_only=True):
+                line = " ".join(str(cell) for cell in row if cell)
+                text.append(line)
+        return "\n".join(text)
+    except Exception as e:
+        return f"❌ XLSX extraction error: {e}"
+# -------------------------
+# Main Logic
+# -------------------------
 def summarize_document(file):
     file_bytes = file.read()
     filename = getattr(file, "name", "").lower()
         return f"⚠️ Summarization error: {e}"
 def interpret_image(image):
     try:
         return f"🖼️ Caption:\n{image_captioner(image)[0]['generated_text']}"
     except Exception as e:
 )
 # -------------------------
+# Launch with FastAPI
 # -------------------------
 demo = gr.TabbedInterface([doc_summary, img_caption], ["Document Summary", "Image Captioning"])
 app = gr.mount_gradio_app(app, demo, path="/")