File size: 3,918 Bytes
5e30a65
0d84ecf
 
c3071ac
0d84ecf
47942ca
c330600
0d84ecf
 
bca0a86
c330600
0d84ecf
461e409
c3071ac
 
 
 
 
5e30a65
 
c3071ac
 
 
5e30a65
 
 
c3071ac
 
 
 
 
 
 
5e30a65
c3071ac
5e30a65
0d84ecf
 
c330600
0d84ecf
 
 
 
 
 
 
c3071ac
c330600
0d84ecf
 
 
c3071ac
5e30a65
 
c3071ac
 
 
5e30a65
 
 
 
 
c3071ac
5e30a65
 
 
 
 
0d84ecf
 
c3071ac
5e30a65
 
c3071ac
 
5e30a65
c3071ac
 
5e30a65
 
c3071ac
5e30a65
 
 
 
 
 
0d84ecf
 
c3071ac
 
0d84ecf
 
c3071ac
 
0d84ecf
 
 
 
c3071ac
0d84ecf
c3071ac
 
 
0d84ecf
 
 
 
 
c3071ac
0d84ecf
c330600
5b4fc38
c3071ac
0d84ecf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import RedirectResponse
import gradio as gr
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
import tempfile
import os
from PIL import Image
import fitz  # PyMuPDF
import docx
import easyocr

app = FastAPI()

# Lightweight model choices
SUMMARIZATION_MODEL = "facebook/bart-large-cnn"  # 500MB
IMAGE_CAPTIONING_MODEL = "Salesforce/blip-image-captioning-base"  # 300MB

# Initialize models
try:
    summarizer = pipeline(
        "summarization",
        model=SUMMARIZATION_MODEL,
        device="cpu"
    )
except Exception as e:
    print(f"Error loading summarizer: {e}")
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")  # Fallback 250MB model

captioner = pipeline(
    "image-to-text",
    model=IMAGE_CAPTIONING_MODEL,
    device="cpu"
)

reader = easyocr.Reader(['en'])  # Lightweight OCR

def extract_text_from_file(file_path: str, file_type: str):
    """Extract text from different document formats"""
    try:
        if file_type == "pdf":
            with fitz.open(file_path) as doc:
                return "\n".join(page.get_text() for page in doc)
        elif file_type == "docx":
            doc = docx.Document(file_path)
            return "\n".join(p.text for p in doc.paragraphs)
        else:
            return "Unsupported file format (only PDF/DOCX supported in lightweight version)"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def process_document(file):
    """Handle document summarization"""
    try:
        file_ext = os.path.splitext(file.name)[1][1:].lower()
        if file_ext not in ["pdf", "docx"]:
            return "Lightweight version only supports PDF and DOCX"
            
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
            tmp.write(file.read())
            tmp_path = tmp.name
        
        text = extract_text_from_file(tmp_path, file_ext)
        summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
        
        os.unlink(tmp_path)
        return summary
    except Exception as e:
        return f"Processing error: {str(e)}"

def process_image(image):
    """Handle image captioning and OCR"""
    try:
        img = Image.open(image)
        
        # Get caption
        caption = captioner(img)[0]['generated_text']
        
        # Get OCR text
        ocr_result = reader.readtext(img)
        ocr_text = " ".join([res[1] for res in ocr_result])
        
        return {
            "caption": caption,
            "ocr_text": ocr_text if ocr_text else "No readable text found"
        }
    except Exception as e:
        return {"error": str(e)}

# Gradio Interface
with gr.Blocks(title="Lightweight Document & Image Analysis") as demo:
    gr.Markdown("## πŸ“„ Lightweight Document & Image Analysis")
    
    with gr.Tab("Document Summarization"):
        gr.Markdown("Supports PDF and DOCX files (max 10MB)")
        doc_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx"])
        doc_output = gr.Textbox(label="Summary")
        doc_button = gr.Button("Summarize")
        
    with gr.Tab("Image Analysis"):
        gr.Markdown("Get captions and extracted text from images")
        img_input = gr.Image(type="filepath", label="Upload Image")
        with gr.Accordion("Results", open=False):
            caption_output = gr.Textbox(label="Image Caption")
            ocr_output = gr.Textbox(label="Extracted Text")
        img_button = gr.Button("Analyze")
    
    doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
    img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])

# Mount Gradio app
app = gr.mount_gradio_app(app, demo, path="/")

@app.get("/")
def redirect_to_interface():
    return RedirectResponse(url="/")