ikraamkb commited on
Commit
5a7d5c7
·
verified ·
1 Parent(s): e9d6531

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +172 -0
appImage.py CHANGED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import easyocr
4
+ from fastapi import FastAPI
5
+ from fastapi.responses import RedirectResponse
6
+ import tempfile
7
+ import os
8
+ from gtts import gTTS
9
+ from fpdf import FPDF
10
+ import datetime
11
+
12
+ # Initialize components
13
+ app = FastAPI()
14
+
15
+ # Load models
16
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
17
+ reader = easyocr.Reader(['en', 'fr']) # English and French OCR
18
+
19
+ def analyze_image(image_path):
20
+ """Process image with both captioning and OCR"""
21
+ try:
22
+ # Generate image caption
23
+ caption_result = captioner(image_path)
24
+ caption = caption_result[0]['generated_text']
25
+
26
+ # Extract text with EasyOCR
27
+ ocr_result = reader.readtext(image_path, detail=0)
28
+ extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"
29
+
30
+ return {
31
+ "caption": caption,
32
+ "extracted_text": extracted_text
33
+ }
34
+ except Exception as e:
35
+ return {"error": str(e)}
36
+
37
+ def text_to_speech(text: str) -> str:
38
+ """Convert text to speech"""
39
+ try:
40
+ tts = gTTS(text)
41
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
42
+ tts.save(temp_audio.name)
43
+ return temp_audio.name
44
+ except Exception as e:
45
+ print(f"Text-to-speech error: {e}")
46
+ return ""
47
+
48
+ def create_pdf(content: dict, original_filename: str) -> str:
49
+ """Create PDF report"""
50
+ try:
51
+ pdf = FPDF()
52
+ pdf.add_page()
53
+ pdf.set_font("Arial", size=12)
54
+
55
+ # Title
56
+ pdf.set_font("Arial", 'B', 16)
57
+ pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
58
+ pdf.set_font("Arial", size=12)
59
+
60
+ # Metadata
61
+ pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
62
+ pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
63
+ pdf.ln(10)
64
+
65
+ # Caption
66
+ pdf.set_font("", 'B')
67
+ pdf.cell(200, 10, txt="Image Caption:", ln=1)
68
+ pdf.set_font("")
69
+ pdf.multi_cell(0, 10, txt=content['caption'])
70
+ pdf.ln(5)
71
+
72
+ # Extracted Text
73
+ pdf.set_font("", 'B')
74
+ pdf.cell(200, 10, txt="Extracted Text:", ln=1)
75
+ pdf.set_font("")
76
+ pdf.multi_cell(0, 10, txt=content['extracted_text'])
77
+
78
+ temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
79
+ pdf.output(temp_pdf.name)
80
+ return temp_pdf.name
81
+ except Exception as e:
82
+ print(f"PDF creation error: {e}")
83
+ return ""
84
+
85
+ def process_image(file, enable_tts: bool):
86
+ """Handle image processing for Gradio interface"""
87
+ if file is None:
88
+ return "Please upload an image first", "Ready", None, None
89
+
90
+ file_path = file.name
91
+ original_filename = os.path.basename(file_path)
92
+
93
+ try:
94
+ # Analyze image
95
+ result = analyze_image(file_path)
96
+ if "error" in result:
97
+ return result["error"], "Error", None, None
98
+
99
+ # Format output
100
+ output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"
101
+
102
+ # Generate audio
103
+ audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None
104
+
105
+ # Generate PDF
106
+ pdf_path = create_pdf(result, original_filename)
107
+
108
+ return output_text, "Analysis complete", audio_path, pdf_path
109
+ except Exception as e:
110
+ return f"Analysis error: {str(e)}", "Error", None, None
111
+
112
+ # Gradio Interface
113
+ with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
114
+ gr.Markdown("# 🖼️ Image Analysis Service")
115
+ gr.Markdown("Upload an image to get automatic captioning and text extraction")
116
+
117
+ with gr.Row():
118
+ with gr.Column():
119
+ image_input = gr.Image(label="Upload Image", type="filepath")
120
+ tts_checkbox = gr.Checkbox(
121
+ label="Enable Text-to-Speech",
122
+ value=False
123
+ )
124
+ analyze_btn = gr.Button("Analyze Image", variant="primary")
125
+
126
+ with gr.Column():
127
+ output = gr.Textbox(label="Analysis Results", lines=10)
128
+ status = gr.Textbox(label="Status", interactive=False)
129
+ audio_output = gr.Audio(label="Audio Summary", visible=False)
130
+ pdf_download = gr.File(label="Download Report", visible=False)
131
+
132
+ def toggle_audio_visibility(enable_tts):
133
+ return gr.Audio(visible=enable_tts)
134
+
135
+ def update_ui(result, status, audio_path, pdf_path):
136
+ return (
137
+ result,
138
+ status,
139
+ gr.Audio(visible=audio_path is not None, value=audio_path),
140
+ gr.File(visible=pdf_path is not None, value=pdf_path)
141
+ )
142
+
143
+ tts_checkbox.change(
144
+ fn=toggle_audio_visibility,
145
+ inputs=tts_checkbox,
146
+ outputs=audio_output
147
+ )
148
+
149
+ analyze_btn.click(
150
+ fn=process_image,
151
+ inputs=[image_input, tts_checkbox],
152
+ outputs=[output, status, audio_output, pdf_download]
153
+ ).then(
154
+ fn=update_ui,
155
+ inputs=[output, status, audio_output, pdf_download],
156
+ outputs=[output, status, audio_output, pdf_download]
157
+ )
158
+
159
+ # FastAPI setup
160
+ @app.get("/files/{file_name}")
161
+ async def get_file(file_name: str):
162
+ file_path = os.path.join(tempfile.gettempdir(), file_name)
163
+ if os.path.exists(file_path):
164
+ return FileResponse(file_path)
165
+ return JSONResponse({"error": "File not found"}, status_code=404)
166
+
167
+ app = gr.mount_gradio_app(app, demo, path="/")
168
+
169
+ @app.get("/")
170
+ def redirect_to_interface():
171
+ return RedirectResponse(url="/")
172
+