File size: 3,406 Bytes
0d84ecf
 
 
 
 
47942ca
c330600
0d84ecf
 
 
 
bca0a86
c330600
0d84ecf
 
4a81c80
0d84ecf
974f8bb
0d84ecf
461e409
0d84ecf
 
c330600
0d84ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
c330600
0d84ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c330600
5b4fc38
0d84ecf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import RedirectResponse
import gradio as gr
from transformers import pipeline
import tempfile
import os
from PIL import Image
import fitz  # PyMuPDF
import docx
import openpyxl
from pptx import Presentation
import easyocr

# Initialize models
summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
reader = easyocr.Reader(['en'])  # For OCR

app = FastAPI()

def extract_text_from_file(file_path: str, file_type: str):
    """Extract text from different document formats"""
    try:
        if file_type == "pdf":
            with fitz.open(file_path) as doc:
                return "\n".join(page.get_text() for page in doc)
        elif file_type == "docx":
            doc = docx.Document(file_path)
            return "\n".join(p.text for p in doc.paragraphs)
        elif file_type == "pptx":
            prs = Presentation(file_path)
            return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
        elif file_type == "xlsx":
            wb = openpyxl.load_workbook(file_path)
            return "\n".join(str(cell.value) for sheet in wb for row in sheet for cell in row)
        else:
            return "Unsupported file format"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def process_document(file):
    """Handle document upload and summarization"""
    # Save temp file
    file_ext = os.path.splitext(file.name)[1][1:].lower()
    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
        tmp.write(file.read())
        tmp_path = tmp.name
    
    # Extract and summarize
    text = extract_text_from_file(tmp_path, file_ext)
    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
    
    # Cleanup
    os.unlink(tmp_path)
    return summary

def process_image(image):
    """Handle image captioning and OCR"""
    img = Image.open(image)
    
    # Get caption
    caption = captioner(img)[0]['generated_text']
    
    # Get OCR text
    ocr_result = reader.readtext(img)
    ocr_text = " ".join([res[1] for res in ocr_result])
    
    return {
        "caption": caption,
        "ocr_text": ocr_text if ocr_text else "No readable text found"
    }

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# πŸ“„ Document & Image Analysis Web Service")
    
    with gr.Tab("Document Summarization"):
        doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
        doc_output = gr.Textbox(label="Summary")
        doc_button = gr.Button("Summarize")
        
    with gr.Tab("Image Analysis"):
        img_input = gr.Image(type="filepath", label="Upload Image")
        with gr.Accordion("Results", open=True):
            caption_output = gr.Textbox(label="Image Caption")
            ocr_output = gr.Textbox(label="Extracted Text")
        img_button = gr.Button("Analyze")
    
    doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
    img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])

# Mount Gradio app
app = gr.mount_gradio_app(app, demo, path="/")

@app.get("/")
def redirect_to_gradio():
    return RedirectResponse(url="/")