Spaces:

ikraamkb
/

Summarization

Sleeping

App Files Files Community

ikraamkb commited on Apr 13

Commit

0d84ecf

verified ·

1 Parent(s): 8b98a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -88

app.py CHANGED Viewed

@@ -1,99 +1,97 @@
-from fastapi import FastAPI, UploadFile, Form
-from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
 import os
-import shutil
 from PIL import Image
-from transformers import ViltProcessor, ViltForQuestionAnswering, pipeline
-from gtts import gTTS
 import easyocr
-import torch
-import tempfile
-import gradio as gr
-import numpy as np
-app = FastAPI()
-# Load VQA Model
-vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
-vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
-# Load image captioning model
 captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-reader = easyocr.Reader(['en', 'fr'])
-def classify_question(question: str):
-    question_lower = question.lower()
-    if any(word in question_lower for word in ["text", "say", "written", "read"]):
-        return "ocr"
-    elif any(word in question_lower for word in ["caption", "describe", "what is in the image"]):
-        return "caption"
-    else:
-        return "vqa"
-def answer_question_from_image(image, question):
-    if image is None or not question.strip():
-        return "Please upload an image and ask a question.", None
-    mode = classify_question(question)
-    if mode == "ocr":
-        try:
-            result = reader.readtext(np.array(image))
-            text = " ".join([entry[1] for entry in result])
-            answer = text.strip() or "No readable text found."
-        except Exception as e:
-            answer = f"OCR Error: {e}"
-    elif mode == "caption":
-        try:
-            answer = captioner(image)[0]['generated_text']
-        except Exception as e:
-            answer = f"Captioning error: {e}"
-    else:
-        try:
-            inputs = vqa_processor(image, question, return_tensors="pt")
-            with torch.no_grad():
-                outputs = vqa_model(**inputs)
-            predicted_id = outputs.logits.argmax(-1).item()
-            answer = vqa_model.config.id2label[predicted_id]
-        except Exception as e:
-            answer = f"VQA error: {e}"
     try:
-        tts = gTTS(text=answer)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
-            tts.save(tmp.name)
-            audio_path = tmp.name
     except Exception as e:
-        return f"Answer: {answer}\n\n⚠️ Audio generation error: {e}", None
-    return answer, audio_path
-def process_image_question(image: Image.Image, question: str):
-    answer, audio_path = answer_question_from_image(image, question)
-    return answer, audio_path
-gui = gr.Interface(
-    fn=process_image_question,
-    inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(lines=2, placeholder="Ask a question about the image...", label="Question")
-    ],
-    outputs=[
-        gr.Textbox(label="Answer", lines=5),
-        gr.Audio(label="Answer (Audio)", type="filepath")
-    ],
-    title="🧐 Image QA with Voice",
-    description="Upload an image and ask a question. Works for OCR, captioning, and VQA."
-)
-app = gr.mount_gradio_app(app, gui, path="/")
 @app.get("/")
-def home():
-    return RedirectResponse(url="/")

+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import RedirectResponse
+import gradio as gr
+from transformers import pipeline
+import tempfile
 import os
 from PIL import Image
+import fitz  # PyMuPDF
+import docx
+import openpyxl
+from pptx import Presentation
 import easyocr
+# Initialize models
+summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
 captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+reader = easyocr.Reader(['en'])  # For OCR
+app = FastAPI()
+def extract_text_from_file(file_path: str, file_type: str):
+    """Extract text from different document formats"""
     try:
+        if file_type == "pdf":
+            with fitz.open(file_path) as doc:
+                return "\n".join(page.get_text() for page in doc)
+        elif file_type == "docx":
+            doc = docx.Document(file_path)
+            return "\n".join(p.text for p in doc.paragraphs)
+        elif file_type == "pptx":
+            prs = Presentation(file_path)
+            return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
+        elif file_type == "xlsx":
+            wb = openpyxl.load_workbook(file_path)
+            return "\n".join(str(cell.value) for sheet in wb for row in sheet for cell in row)
+        else:
+            return "Unsupported file format"
     except Exception as e:
+        return f"Error reading file: {str(e)}"
+def process_document(file):
+    """Handle document upload and summarization"""
+    # Save temp file
+    file_ext = os.path.splitext(file.name)[1][1:].lower()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
+        tmp.write(file.read())
+        tmp_path = tmp.name
+    # Extract and summarize
+    text = extract_text_from_file(tmp_path, file_ext)
+    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
+    # Cleanup
+    os.unlink(tmp_path)
+    return summary
+def process_image(image):
+    """Handle image captioning and OCR"""
+    img = Image.open(image)
+    # Get caption
+    caption = captioner(img)[0]['generated_text']
+    # Get OCR text
+    ocr_result = reader.readtext(img)
+    ocr_text = " ".join([res[1] for res in ocr_result])
+    return {
+        "caption": caption,
+        "ocr_text": ocr_text if ocr_text else "No readable text found"
+    }
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Document & Image Analysis Web Service")
+    with gr.Tab("Document Summarization"):
+        doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
+        doc_output = gr.Textbox(label="Summary")
+        doc_button = gr.Button("Summarize")
+    with gr.Tab("Image Analysis"):
+        img_input = gr.Image(type="filepath", label="Upload Image")
+        with gr.Accordion("Results", open=True):
+            caption_output = gr.Textbox(label="Image Caption")
+            ocr_output = gr.Textbox(label="Extracted Text")
+        img_button = gr.Button("Analyze")
+    doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
+    img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])
+# Mount Gradio app
+app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
+def redirect_to_gradio():
+    return RedirectResponse(url="/")