File size: 3,096 Bytes
e5b6ad2
 
 
1e83db4
 
e5b6ad2
d1b0e84
 
 
 
 
e5b6ad2
 
 
d1b0e84
1e83db4
a74f8b0
e5b6ad2
 
 
 
 
 
d1b0e84
e5b6ad2
 
 
 
 
d1b0e84
e5b6ad2
d1b0e84
e5b6ad2
 
 
 
 
 
d1b0e84
e5b6ad2
d1b0e84
 
e5b6ad2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1b0e84
e5b6ad2
 
 
 
 
 
 
 
d1b0e84
e5b6ad2
d1b0e84
e5b6ad2
 
 
 
 
 
 
 
d1b0e84
1e83db4
 
d1b0e84
e5b6ad2
 
d1b0e84
e5b6ad2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
from transformers import pipeline
from PIL import Image
from fastapi import FastAPI
from starlette.responses import RedirectResponse

# βœ… Patch for Pydantic v2 schema compatibility (important for FastAPI + Gradio)
from pydantic import BaseModel
BaseModel.model_config = {"arbitrary_types_allowed": True}

# βœ… Load models
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

# βœ… Initialize FastAPI app
app = FastAPI()

def analyze_input(file, question=None):
    if file is None:
        return "Please upload a document or image."

    filename = file.name.lower()

    # πŸ“· Image Processing
    if filename.endswith((".png", ".jpg", ".jpeg")):
        image = Image.open(file)
        caption = image_captioner(image)[0]['generated_text']
        return f"πŸ“· Image Interpretation:\n{caption}"

    # πŸ“„ Document Processing
    elif filename.endswith((".pdf", ".docx", ".pptx", ".xlsx")):
        import pdfplumber
        import docx
        import pptx
        import pandas as pd

        try:
            text = ""

            if filename.endswith(".pdf"):
                with pdfplumber.open(file) as pdf:
                    text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

            elif filename.endswith(".docx"):
                doc = docx.Document(file)
                text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

            elif filename.endswith(".pptx"):
                prs = pptx.Presentation(file)
                for slide in prs.slides:
                    for shape in slide.shapes:
                        if hasattr(shape, "text"):
                            text += shape.text + "\n"

            elif filename.endswith(".xlsx"):
                df = pd.read_excel(file, sheet_name=None)
                text = "\n".join([df[sheet].to_string() for sheet in df])

            if not text.strip():
                return "❌ Could not extract meaningful text from the document."

            summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False)
            return f"πŸ“„ Document Summary:\n{summary[0]['summary_text']}"

        except Exception as e:
            return f"❌ Error processing document: {str(e)}"

    else:
        return "❌ Unsupported file type. Please upload a valid image or document."

# βœ… Gradio Interface
iface = gr.Interface(
    fn=analyze_input,
    inputs=gr.File(label="Upload Document or Image"),
    outputs=gr.Textbox(label="Result", lines=10),
    title="Document & Image Analysis Web Service",
    description="Upload a document (PDF, DOCX, PPTX, XLSX) to get a summary or an image to get a caption. Runs fully on CPU."
)

# βœ… Wrap in a Tabbed Interface
demo = gr.TabbedInterface([iface], ["Docs and Images"])

# βœ… Mount to FastAPI app
app = gr.mount_gradio_app(app, demo, path="/")

# βœ… Redirect base URL to Gradio app
@app.get("/")
def home():
    return RedirectResponse(url="/")