import gradio as gr from transformers import pipeline from PIL import Image from fastapi import FastAPI from starlette.responses import RedirectResponse # ✅ Patch for Pydantic v2 schema compatibility (important for FastAPI + Gradio) from pydantic import BaseModel BaseModel.model_config = {"arbitrary_types_allowed": True} # ✅ Load models summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # ✅ Initialize FastAPI app app = FastAPI() def analyze_input(file, question=None): if file is None: return "Please upload a document or image." filename = file.name.lower() # 📷 Image Processing if filename.endswith((".png", ".jpg", ".jpeg")): image = Image.open(file) caption = image_captioner(image)[0]['generated_text'] return f"📷 Image Interpretation:\n{caption}" # 📄 Document Processing elif filename.endswith((".pdf", ".docx", ".pptx", ".xlsx")): import pdfplumber import docx import pptx import pandas as pd try: text = "" if filename.endswith(".pdf"): with pdfplumber.open(file) as pdf: text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) elif filename.endswith(".docx"): doc = docx.Document(file) text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()]) elif filename.endswith(".pptx"): prs = pptx.Presentation(file) for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): text += shape.text + "\n" elif filename.endswith(".xlsx"): df = pd.read_excel(file, sheet_name=None) text = "\n".join([df[sheet].to_string() for sheet in df]) if not text.strip(): return "❌ Could not extract meaningful text from the document." summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False) return f"📄 Document Summary:\n{summary[0]['summary_text']}" except Exception as e: return f"❌ Error processing document: {str(e)}" else: return "❌ Unsupported file type. Please upload a valid image or document." # ✅ Gradio Interface iface = gr.Interface( fn=analyze_input, inputs=gr.File(label="Upload Document or Image"), outputs=gr.Textbox(label="Result", lines=10), title="Document & Image Analysis Web Service", description="Upload a document (PDF, DOCX, PPTX, XLSX) to get a summary or an image to get a caption. Runs fully on CPU." ) # ✅ Wrap in a Tabbed Interface demo = gr.TabbedInterface([iface], ["Docs and Images"]) # ✅ Mount to FastAPI app app = gr.mount_gradio_app(app, demo, path="/") # ✅ Redirect base URL to Gradio app @app.get("/") def home(): return RedirectResponse(url="/")