Spaces:

ikraamkb
/

Summarization

Running

File size: 3,406 Bytes

from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import RedirectResponse
import gradio as gr
from transformers import pipeline
import tempfile
import os
from PIL import Image
import fitz  # PyMuPDF
import docx
import openpyxl
from pptx import Presentation
import easyocr

# Initialize models
summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
reader = easyocr.Reader(['en'])  # For OCR

app = FastAPI()

def extract_text_from_file(file_path: str, file_type: str):
    """Extract text from different document formats"""
    try:
        if file_type == "pdf":
            with fitz.open(file_path) as doc:
                return "\n".join(page.get_text() for page in doc)
        elif file_type == "docx":
            doc = docx.Document(file_path)
            return "\n".join(p.text for p in doc.paragraphs)
        elif file_type == "pptx":
            prs = Presentation(file_path)
            return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
        elif file_type == "xlsx":
            wb = openpyxl.load_workbook(file_path)
            return "\n".join(str(cell.value) for sheet in wb for row in sheet for cell in row)
        else:
            return "Unsupported file format"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def process_document(file):
    """Handle document upload and summarization"""
    # Save temp file
    file_ext = os.path.splitext(file.name)[1][1:].lower()
    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
        tmp.write(file.read())
        tmp_path = tmp.name
    
    # Extract and summarize
    text = extract_text_from_file(tmp_path, file_ext)
    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
    
    # Cleanup
    os.unlink(tmp_path)
    return summary

def process_image(image):
    """Handle image captioning and OCR"""
    img = Image.open(image)
    
    # Get caption
    caption = captioner(img)[0]['generated_text']
    
    # Get OCR text
    ocr_result = reader.readtext(img)
    ocr_text = " ".join([res[1] for res in ocr_result])
    
    return {
        "caption": caption,
        "ocr_text": ocr_text if ocr_text else "No readable text found"
    }

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 📄 Document & Image Analysis Web Service")
    
    with gr.Tab("Document Summarization"):
        doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
        doc_output = gr.Textbox(label="Summary")
        doc_button = gr.Button("Summarize")
        
    with gr.Tab("Image Analysis"):
        img_input = gr.Image(type="filepath", label="Upload Image")
        with gr.Accordion("Results", open=True):
            caption_output = gr.Textbox(label="Image Caption")
            ocr_output = gr.Textbox(label="Extracted Text")
        img_button = gr.Button("Analyze")
    
    doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
    img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])

# Mount Gradio app
app = gr.mount_gradio_app(app, demo, path="/")

@app.get("/")
def redirect_to_gradio():
    return RedirectResponse(url="/")