Spaces:

ikraamkb
/

Summarization

Sleeping

App Files Files Community

ikraamkb commited on Apr 18

Commit

5a7d5c7

verified ·

1 Parent(s): e9d6531

Update appImage.py

Browse files

Files changed (1) hide show

appImage.py +172 -0

appImage.py CHANGED Viewed

	@@ -0,0 +1,172 @@

+import gradio as gr
+from transformers import pipeline
+import easyocr
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+import tempfile
+import os
+from gtts import gTTS
+from fpdf import FPDF
+import datetime
+# Initialize components
+app = FastAPI()
+# Load models
+captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+reader = easyocr.Reader(['en', 'fr'])  # English and French OCR
+def analyze_image(image_path):
+    """Process image with both captioning and OCR"""
+    try:
+        # Generate image caption
+        caption_result = captioner(image_path)
+        caption = caption_result[0]['generated_text']
+        # Extract text with EasyOCR
+        ocr_result = reader.readtext(image_path, detail=0)
+        extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"
+        return {
+            "caption": caption,
+            "extracted_text": extracted_text
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def text_to_speech(text: str) -> str:
+    """Convert text to speech"""
+    try:
+        tts = gTTS(text)
+        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+        tts.save(temp_audio.name)
+        return temp_audio.name
+    except Exception as e:
+        print(f"Text-to-speech error: {e}")
+        return ""
+def create_pdf(content: dict, original_filename: str) -> str:
+    """Create PDF report"""
+    try:
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Arial", size=12)
+        # Title
+        pdf.set_font("Arial", 'B', 16)
+        pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
+        pdf.set_font("Arial", size=12)
+        # Metadata
+        pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
+        pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
+        pdf.ln(10)
+        # Caption
+        pdf.set_font("", 'B')
+        pdf.cell(200, 10, txt="Image Caption:", ln=1)
+        pdf.set_font("")
+        pdf.multi_cell(0, 10, txt=content['caption'])
+        pdf.ln(5)
+        # Extracted Text
+        pdf.set_font("", 'B')
+        pdf.cell(200, 10, txt="Extracted Text:", ln=1)
+        pdf.set_font("")
+        pdf.multi_cell(0, 10, txt=content['extracted_text'])
+        temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
+        pdf.output(temp_pdf.name)
+        return temp_pdf.name
+    except Exception as e:
+        print(f"PDF creation error: {e}")
+        return ""
+def process_image(file, enable_tts: bool):
+    """Handle image processing for Gradio interface"""
+    if file is None:
+        return "Please upload an image first", "Ready", None, None
+    file_path = file.name
+    original_filename = os.path.basename(file_path)
+    try:
+        # Analyze image
+        result = analyze_image(file_path)
+        if "error" in result:
+            return result["error"], "Error", None, None
+        # Format output
+        output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"
+        # Generate audio
+        audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None
+        # Generate PDF
+        pdf_path = create_pdf(result, original_filename)
+        return output_text, "Analysis complete", audio_path, pdf_path
+    except Exception as e:
+        return f"Analysis error: {str(e)}", "Error", None, None
+# Gradio Interface
+with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🖼️ Image Analysis Service")
+    gr.Markdown("Upload an image to get automatic captioning and text extraction")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Upload Image", type="filepath")
+            tts_checkbox = gr.Checkbox(
+                label="Enable Text-to-Speech",
+                value=False
+            )
+            analyze_btn = gr.Button("Analyze Image", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(label="Analysis Results", lines=10)
+            status = gr.Textbox(label="Status", interactive=False)
+            audio_output = gr.Audio(label="Audio Summary", visible=False)
+            pdf_download = gr.File(label="Download Report", visible=False)
+    def toggle_audio_visibility(enable_tts):
+        return gr.Audio(visible=enable_tts)
+    def update_ui(result, status, audio_path, pdf_path):
+        return (
+            result,
+            status,
+            gr.Audio(visible=audio_path is not None, value=audio_path),
+            gr.File(visible=pdf_path is not None, value=pdf_path)
+        )
+    tts_checkbox.change(
+        fn=toggle_audio_visibility,
+        inputs=tts_checkbox,
+        outputs=audio_output
+    )
+    analyze_btn.click(
+        fn=process_image,
+        inputs=[image_input, tts_checkbox],
+        outputs=[output, status, audio_output, pdf_download]
+    ).then(
+        fn=update_ui,
+        inputs=[output, status, audio_output, pdf_download],
+        outputs=[output, status, audio_output, pdf_download]
+    )
+# FastAPI setup
+@app.get("/files/{file_name}")
+async def get_file(file_name: str):
+    file_path = os.path.join(tempfile.gettempdir(), file_name)
+    if os.path.exists(file_path):
+        return FileResponse(file_path)
+    return JSONResponse({"error": "File not found"}, status_code=404)
+app = gr.mount_gradio_app(app, demo, path="/")
+@app.get("/")
+def redirect_to_interface():
+    return RedirectResponse(url="/")