Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

Summarization / app.py

ikraamkb

Update app.py

e8ad5ec verified 19 days ago

raw

history blame

3.15 kB

	import gradio as gr
	from transformers import pipeline
	from PIL import Image
	from fastapi import FastAPI
	from starlette.responses import RedirectResponse

	# 🔥 Fix for Pydantic v2 compatibility with Gradio
	import gradio.context
	from pydantic import BaseModel
	if not hasattr(BaseModel, "model_fields"): # model_fields was renamed from __fields__ in Pydantic v1 → v2
	BaseModel.model_fields = BaseModel.__fields__

	# 🔁 Load Hugging Face Pipelines
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
	image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

	# 🚀 Create FastAPI App
	app = FastAPI()

	def analyze_input(file, question=None):
	if file is None:
	return "Please upload a document or image."

	filename = file.name.lower()

	# 🖼️ Image
	if filename.endswith((".png", ".jpg", ".jpeg")):
	image = Image.open(file)
	caption = image_captioner(image)[0]['generated_text']
	return f"📷 Image Interpretation:\n{caption}"

	# 📄 Document
	elif filename.endswith((".pdf", ".docx", ".pptx", ".xlsx")):
	import pdfplumber
	import docx
	import pptx
	import pandas as pd

	try:
	text = ""

	if filename.endswith(".pdf"):
	with pdfplumber.open(file) as pdf:
	text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

	elif filename.endswith(".docx"):
	doc = docx.Document(file)
	text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

	elif filename.endswith(".pptx"):
	prs = pptx.Presentation(file)
	for slide in prs.slides:
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	text += shape.text + "\n"

	elif filename.endswith(".xlsx"):
	df = pd.read_excel(file, sheet_name=None)
	text = "\n".join([df[sheet].to_string() for sheet in df])

	if not text.strip():
	return "❌ Could not extract meaningful text from the document."

	summary = summarizer(text[:3000], max_length=200, min_length=30, do_sample=False)
	return f"📄 Document Summary:\n{summary[0]['summary_text']}"

	except Exception as e:
	return f"❌ Error processing document: {str(e)}"

	else:
	return "❌ Unsupported file type. Please upload a valid image or document."

	# 🎛️ Gradio UI
	iface = gr.Interface(
	fn=analyze_input,
	inputs=gr.File(label="Upload Document or Image"),
	outputs=gr.Textbox(label="Result", lines=10),
	title="Document & Image Analysis Web Service",
	description="Upload a document (PDF, DOCX, PPTX, XLSX) or image to get a summary or caption. CPU-friendly."
	)

	# ⌨️ Wrap in Tabbed UI
	demo = gr.TabbedInterface([iface], ["Docs and Images"])

	# 🔗 Mount Gradio to FastAPI
	app = gr.mount_gradio_app(app, demo, path="/")

	# 🏠 Base redirect
	@app.get("/")
	def home():
	return RedirectResponse(url="/")