Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

Summarization / appImage.py

ikraamkb

Update appImage.py

5a7d5c7 verified 3 months ago

raw

history blame

5.74 kB

	import gradio as gr
	from transformers import pipeline
	import easyocr
	from fastapi import FastAPI
	from fastapi.responses import RedirectResponse
	import tempfile
	import os
	from gtts import gTTS
	from fpdf import FPDF
	import datetime

	# Initialize components
	app = FastAPI()

	# Load models
	captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
	reader = easyocr.Reader(['en', 'fr']) # English and French OCR

	def analyze_image(image_path):
	"""Process image with both captioning and OCR"""
	try:
	# Generate image caption
	caption_result = captioner(image_path)
	caption = caption_result[0]['generated_text']

	# Extract text with EasyOCR
	ocr_result = reader.readtext(image_path, detail=0)
	extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"

	return {
	"caption": caption,
	"extracted_text": extracted_text
	}
	except Exception as e:
	return {"error": str(e)}

	def text_to_speech(text: str) -> str:
	"""Convert text to speech"""
	try:
	tts = gTTS(text)
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_audio.name)
	return temp_audio.name
	except Exception as e:
	print(f"Text-to-speech error: {e}")
	return ""

	def create_pdf(content: dict, original_filename: str) -> str:
	"""Create PDF report"""
	try:
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)

	# Title
	pdf.set_font("Arial", 'B', 16)
	pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
	pdf.set_font("Arial", size=12)

	# Metadata
	pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
	pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
	pdf.ln(10)

	# Caption
	pdf.set_font("", 'B')
	pdf.cell(200, 10, txt="Image Caption:", ln=1)
	pdf.set_font("")
	pdf.multi_cell(0, 10, txt=content['caption'])
	pdf.ln(5)

	# Extracted Text
	pdf.set_font("", 'B')
	pdf.cell(200, 10, txt="Extracted Text:", ln=1)
	pdf.set_font("")
	pdf.multi_cell(0, 10, txt=content['extracted_text'])

	temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
	pdf.output(temp_pdf.name)
	return temp_pdf.name
	except Exception as e:
	print(f"PDF creation error: {e}")
	return ""

	def process_image(file, enable_tts: bool):
	"""Handle image processing for Gradio interface"""
	if file is None:
	return "Please upload an image first", "Ready", None, None

	file_path = file.name
	original_filename = os.path.basename(file_path)

	try:
	# Analyze image
	result = analyze_image(file_path)
	if "error" in result:
	return result["error"], "Error", None, None

	# Format output
	output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"

	# Generate audio
	audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None

	# Generate PDF
	pdf_path = create_pdf(result, original_filename)

	return output_text, "Analysis complete", audio_path, pdf_path
	except Exception as e:
	return f"Analysis error: {str(e)}", "Error", None, None

	# Gradio Interface
	with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🖼️ Image Analysis Service")
	gr.Markdown("Upload an image to get automatic captioning and text extraction")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="Upload Image", type="filepath")
	tts_checkbox = gr.Checkbox(
	label="Enable Text-to-Speech",
	value=False
	)
	analyze_btn = gr.Button("Analyze Image", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Analysis Results", lines=10)
	status = gr.Textbox(label="Status", interactive=False)
	audio_output = gr.Audio(label="Audio Summary", visible=False)
	pdf_download = gr.File(label="Download Report", visible=False)

	def toggle_audio_visibility(enable_tts):
	return gr.Audio(visible=enable_tts)

	def update_ui(result, status, audio_path, pdf_path):
	return (
	result,
	status,
	gr.Audio(visible=audio_path is not None, value=audio_path),
	gr.File(visible=pdf_path is not None, value=pdf_path)
	)

	tts_checkbox.change(
	fn=toggle_audio_visibility,
	inputs=tts_checkbox,
	outputs=audio_output
	)

	analyze_btn.click(
	fn=process_image,
	inputs=[image_input, tts_checkbox],
	outputs=[output, status, audio_output, pdf_download]
	).then(
	fn=update_ui,
	inputs=[output, status, audio_output, pdf_download],
	outputs=[output, status, audio_output, pdf_download]
	)

	# FastAPI setup
	@app.get("/files/{file_name}")
	async def get_file(file_name: str):
	file_path = os.path.join(tempfile.gettempdir(), file_name)
	if os.path.exists(file_path):
	return FileResponse(file_path)
	return JSONResponse({"error": "File not found"}, status_code=404)

	app = gr.mount_gradio_app(app, demo, path="/")

	@app.get("/")
	def redirect_to_interface():
	return RedirectResponse(url="/")