from fastapi import FastAPI, UploadFile, File import fitz # PyMuPDF import docx import openpyxl import pptx from PIL import Image import io import gradio as gr from transformers import pipeline # Models summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") app = FastAPI() # ------------------------- # Document Extraction Utils # ------------------------- def extract_text_from_pdf(file): text = "" with fitz.open(stream=file.read(), filetype="pdf") as doc: for page in doc: text += page.get_text() return text def extract_text_from_docx(file): doc = docx.Document(io.BytesIO(file.read())) return "\n".join([para.text for para in doc.paragraphs if para.text.strip()]) def extract_text_from_pptx(file): text = [] prs = pptx.Presentation(io.BytesIO(file.read())) for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) def extract_text_from_xlsx(file): wb = openpyxl.load_workbook(io.BytesIO(file.read())) text = [] for sheet in wb.sheetnames: ws = wb[sheet] for row in ws.iter_rows(values_only=True): line = " ".join(str(cell) for cell in row if cell) text.append(line) return "\n".join(text) def summarize_document(file: UploadFile): ext = file.filename.split(".")[-1].lower() if ext == "pdf": text = extract_text_from_pdf(file) elif ext == "docx": text = extract_text_from_docx(file) elif ext == "pptx": text = extract_text_from_pptx(file) elif ext == "xlsx": text = extract_text_from_xlsx(file) else: return "Unsupported file format." if not text.strip(): return "No extractable text." # Trim large docs text = text[:3000] try: summary = summarizer(text, max_length=150, min_length=30, do_sample=False) return summary[0]["summary_text"] except Exception as e: return f"Summarization error: {e}" def interpret_image(image): if image is None: return "No image uploaded." try: return image_captioner(image)[0]["generated_text"] except Exception as e: return f"Image captioning error: {e}" # ------------------------- # Gradio UI # ------------------------- def run_interface(): doc_summary = gr.Interface( fn=summarize_document, inputs=gr.File(label="Upload a Document"), outputs="text", title="📄 Document Summarizer" ) img_caption = gr.Interface( fn=interpret_image, inputs=gr.Image(type="pil", label="Upload an Image"), outputs="text", title="🖼️ Image Interpreter" ) gr.TabbedInterface([doc_summary, img_caption], ["Summarize Document", "Caption Image"]).launch() # ------------------------- # Run from CLI or FastAPI # ------------------------- @app.get("/") def read_root(): return {"message": "Gradio running at /docs or use CLI"} if __name__ == "__main__": run_interface()