capradeepgujaran commited on
Commit
0b8f58d
·
verified ·
1 Parent(s): 1bd01d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -111
app.py CHANGED
@@ -70,13 +70,13 @@ def extract_frames_from_video(video, frame_points=[0, 0.5, 1], max_size=(800, 80
70
  cap.release()
71
  return frames
72
 
73
- def analyze_construction_media(media):
74
- if not media:
75
- logger.warning("No media provided")
76
  return [("No input", "Error: Please upload images or a video for analysis.")]
77
 
78
  try:
79
- logger.info(f"Starting analysis of {len(media)} files")
80
  results = []
81
 
82
  instruction = ("You are an AI assistant specialized in analyzing images for safety issues. "
@@ -84,114 +84,84 @@ def analyze_construction_media(media):
84
  "If it does, identify any safety issues or hazards, categorize them, and provide a detailed description, "
85
  "and suggest steps to resolve them. If it's not a construction site, simply state that")
86
 
87
- for i, file in enumerate(media):
88
- try:
89
- file_path = file.name # Get the file path
90
- logger.info(f"Processing file {i+1}/{len(media)}: {file_path}")
91
-
92
- if not os.path.exists(file_path):
93
- logger.error(f"File does not exist: {file_path}")
94
- results.append((f"File {i+1} analysis", f"Error: File does not exist: {file_path}"))
95
- continue
 
 
 
 
 
 
 
96
 
97
- file_type = os.path.splitext(file_path)[1][1:].lower()
98
-
99
- if file_type in ['jpg', 'jpeg', 'png', 'gif']:
100
- # Handle image
101
- try:
102
- with Image.open(file_path) as img:
103
- img = img.convert('RGB') # Convert to RGB to ensure compatibility
104
- image_base64 = encode_image(img)
105
- logger.info(f"Image {i+1} encoded, size: {len(image_base64)} bytes")
106
-
107
- messages = [
108
- {
109
- "role": "user",
110
- "content": [
111
- {
112
- "type": "text",
113
- "text": f"{instruction}\n\nAnalyze this image (File {i+1}/{len(media)}). First, determine if it's a construction site. If it is, explain the image in detail, focusing on safety aspects. If it's not, briefly describe what you see."
114
- },
115
- {
116
- "type": "image_url",
117
- "image_url": {
118
- "url": f"data:image/png;base64,{image_base64}"
119
- }
120
- }
121
- ]
122
  }
123
- ]
124
-
125
- logger.info(f"Sending request to AI model for image {i+1}")
126
- completion = client.chat.completions.create(
127
- model="llama-3.2-90b-vision-preview",
128
- messages=messages,
129
- temperature=0.7,
130
- max_tokens=1000,
131
- top_p=1,
132
- stream=False,
133
- stop=None
134
- )
135
- result = completion.choices[0].message.content
136
- logger.info(f"Received response from AI model for image {i+1}")
137
- results.append((f"Image {i+1} analysis", result))
138
- logger.info(f"Successfully analyzed image {i+1}")
139
- except Exception as img_error:
140
- logger.error(f"Error processing image {i+1}: {str(img_error)}")
141
- logger.error(traceback.format_exc())
142
- results.append((f"Image {i+1} analysis", f"Error processing image: {str(img_error)}"))
143
-
144
- elif file_type in ['mp4', 'avi', 'mov', 'wmv']:
145
- # Handle video
146
- try:
147
- frames = extract_frames_from_video(file_path)
148
- logger.info(f"Extracted {len(frames)} frames from video: {file_path}")
149
- for j, frame in enumerate(frames):
150
- frame_base64 = encode_image(frame)
151
- logger.info(f"Video {i+1}, Frame {j+1} encoded, size: {len(frame_base64)} bytes")
152
-
153
- messages = [
154
  {
155
- "role": "user",
156
- "content": [
157
- {
158
- "type": "text",
159
- "text": f"{instruction}\n\nAnalyze this frame from a video (File {i+1}/{len(media)}, Frame {j+1}/{len(frames)}). First, determine if it's a construction site. If it is, explain what you observe, focusing on safety aspects. If it's not, briefly describe what you see."
160
- },
161
- {
162
- "type": "image_url",
163
- "image_url": {
164
- "url": f"data:image/png;base64,{frame_base64}"
165
- }
166
- }
167
- ]
168
  }
169
  ]
170
- completion = client.chat.completions.create(
171
- model="llama-3.2-90b-vision-preview",
172
- messages=messages,
173
- temperature=0.7,
174
- max_tokens=1000,
175
- top_p=1,
176
- stream=False,
177
- stop=None
178
- )
179
- result = completion.choices[0].message.content
180
- results.append((f"Video {i+1}, Frame {j+1} analysis", result))
181
- logger.info(f"Successfully analyzed video {i+1}")
182
- except Exception as vid_error:
183
- logger.error(f"Error processing video {i+1}: {str(vid_error)}")
184
- logger.error(traceback.format_exc())
185
- results.append((f"Video {i+1} analysis", f"Error processing video: {str(vid_error)}"))
186
-
187
- else:
188
- logger.warning(f"Unsupported file type: {file_type}")
189
- results.append((f"File {i+1} analysis", f"Unsupported file type: {file_type}"))
190
-
191
- except Exception as file_error:
192
- logger.error(f"Error processing file {i+1}: {str(file_error)}")
193
- logger.error(traceback.format_exc())
194
- results.append((f"File {i+1} analysis", f"Error processing file: {str(file_error)}"))
195
 
196
  logger.info("Analysis completed successfully")
197
  return results
@@ -310,9 +280,9 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
310
  """
311
  )
312
 
313
- # Combined upload for images and videos
314
  with gr.Row():
315
- media_input = gr.File(label="Upload Construction Site Images or Videos", file_count="multiple", type="filepath", elem_classes="image-container")
316
 
317
  # Analyze Safety Hazards Button
318
  with gr.Row():
@@ -346,8 +316,8 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
346
  return history
347
 
348
  analyze_button.click(
349
- analyze_construction_media,
350
- inputs=[media_input],
351
  outputs=[chatbot],
352
  postprocess=lambda x: update_chat(chatbot.value, x)
353
  )
 
70
  cap.release()
71
  return frames
72
 
73
+ def analyze_mixed_input(input_files):
74
+ if not input_files:
75
+ logger.warning("No input files provided")
76
  return [("No input", "Error: Please upload images or a video for analysis.")]
77
 
78
  try:
79
+ logger.info("Starting analysis")
80
  results = []
81
 
82
  instruction = ("You are an AI assistant specialized in analyzing images for safety issues. "
 
84
  "If it does, identify any safety issues or hazards, categorize them, and provide a detailed description, "
85
  "and suggest steps to resolve them. If it's not a construction site, simply state that")
86
 
87
+ for i, file in enumerate(input_files):
88
+ file_type = file.name.split('.')[-1].lower()
89
+ if file_type in ['jpg', 'jpeg', 'png', 'bmp']:
90
+ # Process image
91
+ image = Image.open(file.name)
92
+ resized_image = resize_image(image)
93
+ image_data_url = f"data:image/png;base64,{encode_image(resized_image)}"
94
+ content_type = "image"
95
+ elif file_type in ['mp4', 'avi', 'mov', 'webm']:
96
+ # Process video
97
+ frames = extract_frames_from_video(file.name)
98
+ image_data_url = f"data:image/png;base64,{encode_image(frames[0])}" # Use the first frame
99
+ content_type = "video"
100
+ else:
101
+ results.append((f"File {i+1} analysis", f"Unsupported file type: {file_type}"))
102
+ continue
103
 
104
+ messages = [
105
+ {
106
+ "role": "user",
107
+ "content": [
108
+ {
109
+ "type": "text",
110
+ "text": f"{instruction}\n\nAnalyze this {content_type} (File {i+1}/{len(input_files)}). First, determine if it's a construction site. If it is, explain the {content_type} in detail, focusing on safety aspects. If it's not, briefly describe what you see."
111
+ },
112
+ {
113
+ "type": "image_url",
114
+ "image_url": {
115
+ "url": image_data_url
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
+ }
118
+ ]
119
+ }
120
+ ]
121
+ completion = client.chat.completions.create(
122
+ model="llama-3.2-90b-vision-preview",
123
+ messages=messages,
124
+ temperature=0.7,
125
+ max_tokens=1000,
126
+ top_p=1,
127
+ stream=False,
128
+ stop=None
129
+ )
130
+ result = completion.choices[0].message.content
131
+ results.append((f"File {i+1} analysis ({content_type})", result))
132
+
133
+ # If it's a video, analyze additional frames
134
+ if content_type == "video" and len(frames) > 1:
135
+ for j, frame in enumerate(frames[1:], start=2):
136
+ image_data_url = f"data:image/png;base64,{encode_image(frame)}"
137
+ messages = [
138
+ {
139
+ "role": "user",
140
+ "content": [
 
 
 
 
 
 
 
141
  {
142
+ "type": "text",
143
+ "text": f"{instruction}\n\nAnalyze this additional frame from the video (File {i+1}, Frame {j}/{len(frames)}). Focus on any new or changed safety aspects compared to the previous frame."
144
+ },
145
+ {
146
+ "type": "image_url",
147
+ "image_url": {
148
+ "url": image_data_url
149
+ }
 
 
 
 
 
150
  }
151
  ]
152
+ }
153
+ ]
154
+ completion = client.chat.completions.create(
155
+ model="llama-3.2-90b-vision-preview",
156
+ messages=messages,
157
+ temperature=0.7,
158
+ max_tokens=1000,
159
+ top_p=1,
160
+ stream=False,
161
+ stop=None
162
+ )
163
+ result = completion.choices[0].message.content
164
+ results.append((f"File {i+1} analysis (video frame {j})", result))
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  logger.info("Analysis completed successfully")
167
  return results
 
280
  """
281
  )
282
 
283
+ # Single upload window for both images and videos
284
  with gr.Row():
285
+ input_files = gr.File(label="Upload Construction Site Images or Videos", file_count="multiple", type="file", elem_classes="image-container")
286
 
287
  # Analyze Safety Hazards Button
288
  with gr.Row():
 
316
  return history
317
 
318
  analyze_button.click(
319
+ analyze_mixed_input,
320
+ inputs=[input_files],
321
  outputs=[chatbot],
322
  postprocess=lambda x: update_chat(chatbot.value, x)
323
  )