Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update appImage.py
Browse files- appImage.py +172 -0
    	
        appImage.py
    CHANGED
    
    | @@ -0,0 +1,172 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            from transformers import pipeline
         | 
| 3 | 
            +
            import easyocr
         | 
| 4 | 
            +
            from fastapi import FastAPI
         | 
| 5 | 
            +
            from fastapi.responses import RedirectResponse
         | 
| 6 | 
            +
            import tempfile
         | 
| 7 | 
            +
            import os
         | 
| 8 | 
            +
            from gtts import gTTS
         | 
| 9 | 
            +
            from fpdf import FPDF
         | 
| 10 | 
            +
            import datetime
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # Initialize components
         | 
| 13 | 
            +
            app = FastAPI()
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            # Load models
         | 
| 16 | 
            +
            captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
         | 
| 17 | 
            +
            reader = easyocr.Reader(['en', 'fr'])  # English and French OCR
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            def analyze_image(image_path):
         | 
| 20 | 
            +
                """Process image with both captioning and OCR"""
         | 
| 21 | 
            +
                try:
         | 
| 22 | 
            +
                    # Generate image caption
         | 
| 23 | 
            +
                    caption_result = captioner(image_path)
         | 
| 24 | 
            +
                    caption = caption_result[0]['generated_text']
         | 
| 25 | 
            +
                    
         | 
| 26 | 
            +
                    # Extract text with EasyOCR
         | 
| 27 | 
            +
                    ocr_result = reader.readtext(image_path, detail=0)
         | 
| 28 | 
            +
                    extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"
         | 
| 29 | 
            +
                    
         | 
| 30 | 
            +
                    return {
         | 
| 31 | 
            +
                        "caption": caption,
         | 
| 32 | 
            +
                        "extracted_text": extracted_text
         | 
| 33 | 
            +
                    }
         | 
| 34 | 
            +
                except Exception as e:
         | 
| 35 | 
            +
                    return {"error": str(e)}
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            def text_to_speech(text: str) -> str:
         | 
| 38 | 
            +
                """Convert text to speech"""
         | 
| 39 | 
            +
                try:
         | 
| 40 | 
            +
                    tts = gTTS(text)
         | 
| 41 | 
            +
                    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
         | 
| 42 | 
            +
                    tts.save(temp_audio.name)
         | 
| 43 | 
            +
                    return temp_audio.name
         | 
| 44 | 
            +
                except Exception as e:
         | 
| 45 | 
            +
                    print(f"Text-to-speech error: {e}")
         | 
| 46 | 
            +
                    return ""
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            def create_pdf(content: dict, original_filename: str) -> str:
         | 
| 49 | 
            +
                """Create PDF report"""
         | 
| 50 | 
            +
                try:
         | 
| 51 | 
            +
                    pdf = FPDF()
         | 
| 52 | 
            +
                    pdf.add_page()
         | 
| 53 | 
            +
                    pdf.set_font("Arial", size=12)
         | 
| 54 | 
            +
                    
         | 
| 55 | 
            +
                    # Title
         | 
| 56 | 
            +
                    pdf.set_font("Arial", 'B', 16)
         | 
| 57 | 
            +
                    pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
         | 
| 58 | 
            +
                    pdf.set_font("Arial", size=12)
         | 
| 59 | 
            +
                    
         | 
| 60 | 
            +
                    # Metadata
         | 
| 61 | 
            +
                    pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
         | 
| 62 | 
            +
                    pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
         | 
| 63 | 
            +
                    pdf.ln(10)
         | 
| 64 | 
            +
                    
         | 
| 65 | 
            +
                    # Caption
         | 
| 66 | 
            +
                    pdf.set_font("", 'B')
         | 
| 67 | 
            +
                    pdf.cell(200, 10, txt="Image Caption:", ln=1)
         | 
| 68 | 
            +
                    pdf.set_font("")
         | 
| 69 | 
            +
                    pdf.multi_cell(0, 10, txt=content['caption'])
         | 
| 70 | 
            +
                    pdf.ln(5)
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                    # Extracted Text
         | 
| 73 | 
            +
                    pdf.set_font("", 'B')
         | 
| 74 | 
            +
                    pdf.cell(200, 10, txt="Extracted Text:", ln=1)
         | 
| 75 | 
            +
                    pdf.set_font("")
         | 
| 76 | 
            +
                    pdf.multi_cell(0, 10, txt=content['extracted_text'])
         | 
| 77 | 
            +
                    
         | 
| 78 | 
            +
                    temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
         | 
| 79 | 
            +
                    pdf.output(temp_pdf.name)
         | 
| 80 | 
            +
                    return temp_pdf.name
         | 
| 81 | 
            +
                except Exception as e:
         | 
| 82 | 
            +
                    print(f"PDF creation error: {e}")
         | 
| 83 | 
            +
                    return ""
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            def process_image(file, enable_tts: bool):
         | 
| 86 | 
            +
                """Handle image processing for Gradio interface"""
         | 
| 87 | 
            +
                if file is None:
         | 
| 88 | 
            +
                    return "Please upload an image first", "Ready", None, None
         | 
| 89 | 
            +
                
         | 
| 90 | 
            +
                file_path = file.name
         | 
| 91 | 
            +
                original_filename = os.path.basename(file_path)
         | 
| 92 | 
            +
                
         | 
| 93 | 
            +
                try:
         | 
| 94 | 
            +
                    # Analyze image
         | 
| 95 | 
            +
                    result = analyze_image(file_path)
         | 
| 96 | 
            +
                    if "error" in result:
         | 
| 97 | 
            +
                        return result["error"], "Error", None, None
         | 
| 98 | 
            +
                    
         | 
| 99 | 
            +
                    # Format output
         | 
| 100 | 
            +
                    output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"
         | 
| 101 | 
            +
                    
         | 
| 102 | 
            +
                    # Generate audio
         | 
| 103 | 
            +
                    audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None
         | 
| 104 | 
            +
                    
         | 
| 105 | 
            +
                    # Generate PDF
         | 
| 106 | 
            +
                    pdf_path = create_pdf(result, original_filename)
         | 
| 107 | 
            +
                    
         | 
| 108 | 
            +
                    return output_text, "Analysis complete", audio_path, pdf_path
         | 
| 109 | 
            +
                except Exception as e:
         | 
| 110 | 
            +
                    return f"Analysis error: {str(e)}", "Error", None, None
         | 
| 111 | 
            +
             | 
| 112 | 
            +
            # Gradio Interface
         | 
| 113 | 
            +
            with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
         | 
| 114 | 
            +
                gr.Markdown("# 🖼️ Image Analysis Service")
         | 
| 115 | 
            +
                gr.Markdown("Upload an image to get automatic captioning and text extraction")
         | 
| 116 | 
            +
                
         | 
| 117 | 
            +
                with gr.Row():
         | 
| 118 | 
            +
                    with gr.Column():
         | 
| 119 | 
            +
                        image_input = gr.Image(label="Upload Image", type="filepath")
         | 
| 120 | 
            +
                        tts_checkbox = gr.Checkbox(
         | 
| 121 | 
            +
                            label="Enable Text-to-Speech",
         | 
| 122 | 
            +
                            value=False
         | 
| 123 | 
            +
                        )
         | 
| 124 | 
            +
                        analyze_btn = gr.Button("Analyze Image", variant="primary")
         | 
| 125 | 
            +
                    
         | 
| 126 | 
            +
                    with gr.Column():
         | 
| 127 | 
            +
                        output = gr.Textbox(label="Analysis Results", lines=10)
         | 
| 128 | 
            +
                        status = gr.Textbox(label="Status", interactive=False)
         | 
| 129 | 
            +
                        audio_output = gr.Audio(label="Audio Summary", visible=False)
         | 
| 130 | 
            +
                        pdf_download = gr.File(label="Download Report", visible=False)
         | 
| 131 | 
            +
                
         | 
| 132 | 
            +
                def toggle_audio_visibility(enable_tts):
         | 
| 133 | 
            +
                    return gr.Audio(visible=enable_tts)
         | 
| 134 | 
            +
                
         | 
| 135 | 
            +
                def update_ui(result, status, audio_path, pdf_path):
         | 
| 136 | 
            +
                    return (
         | 
| 137 | 
            +
                        result,
         | 
| 138 | 
            +
                        status,
         | 
| 139 | 
            +
                        gr.Audio(visible=audio_path is not None, value=audio_path),
         | 
| 140 | 
            +
                        gr.File(visible=pdf_path is not None, value=pdf_path)
         | 
| 141 | 
            +
                    )
         | 
| 142 | 
            +
                
         | 
| 143 | 
            +
                tts_checkbox.change(
         | 
| 144 | 
            +
                    fn=toggle_audio_visibility,
         | 
| 145 | 
            +
                    inputs=tts_checkbox,
         | 
| 146 | 
            +
                    outputs=audio_output
         | 
| 147 | 
            +
                )
         | 
| 148 | 
            +
                
         | 
| 149 | 
            +
                analyze_btn.click(
         | 
| 150 | 
            +
                    fn=process_image,
         | 
| 151 | 
            +
                    inputs=[image_input, tts_checkbox],
         | 
| 152 | 
            +
                    outputs=[output, status, audio_output, pdf_download]
         | 
| 153 | 
            +
                ).then(
         | 
| 154 | 
            +
                    fn=update_ui,
         | 
| 155 | 
            +
                    inputs=[output, status, audio_output, pdf_download],
         | 
| 156 | 
            +
                    outputs=[output, status, audio_output, pdf_download]
         | 
| 157 | 
            +
                )
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            # FastAPI setup
         | 
| 160 | 
            +
            @app.get("/files/{file_name}")
         | 
| 161 | 
            +
            async def get_file(file_name: str):
         | 
| 162 | 
            +
                file_path = os.path.join(tempfile.gettempdir(), file_name)
         | 
| 163 | 
            +
                if os.path.exists(file_path):
         | 
| 164 | 
            +
                    return FileResponse(file_path)
         | 
| 165 | 
            +
                return JSONResponse({"error": "File not found"}, status_code=404)
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            app = gr.mount_gradio_app(app, demo, path="/")
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            @app.get("/")
         | 
| 170 | 
            +
            def redirect_to_interface():
         | 
| 171 | 
            +
                return RedirectResponse(url="/")
         | 
| 172 | 
            +
             |