Construction_Snag_Tool_Llama_3.2_Vision

Running

App Files Files Community

capradeepgujaran commited on Oct 19, 2024

Commit

0b8f58d

verified ·

1 Parent(s): 1bd01d6

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -111

app.py CHANGED Viewed

@@ -70,13 +70,13 @@ def extract_frames_from_video(video, frame_points=[0, 0.5, 1], max_size=(800, 80
     cap.release()
     return frames
-def analyze_construction_media(media):
-    if not media:
-        logger.warning("No media provided")
         return [("No input", "Error: Please upload images or a video for analysis.")]
     try:
-        logger.info(f"Starting analysis of {len(media)} files")
         results = []
         instruction = ("You are an AI assistant specialized in analyzing images for safety issues. "
@@ -84,114 +84,84 @@ def analyze_construction_media(media):
                        "If it does, identify any safety issues or hazards, categorize them, and provide a detailed description, "
                        "and suggest steps to resolve them. If it's not a construction site, simply state that")
-        for i, file in enumerate(media):
-            try:
-                file_path = file.name  # Get the file path
-                logger.info(f"Processing file {i+1}/{len(media)}: {file_path}")
-                if not os.path.exists(file_path):
-                    logger.error(f"File does not exist: {file_path}")
-                    results.append((f"File {i+1} analysis", f"Error: File does not exist: {file_path}"))
-                    continue
-                file_type = os.path.splitext(file_path)[1][1:].lower()
-                if file_type in ['jpg', 'jpeg', 'png', 'gif']:
-                    # Handle image
-                    try:
-                        with Image.open(file_path) as img:
-                            img = img.convert('RGB')  # Convert to RGB to ensure compatibility
-                            image_base64 = encode_image(img)
-                        logger.info(f"Image {i+1} encoded, size: {len(image_base64)} bytes")
-                        messages = [
-                            {
-                                "role": "user",
-                                "content": [
-                                    {
-                                        "type": "text",
-                                        "text": f"{instruction}\n\nAnalyze this image (File {i+1}/{len(media)}). First, determine if it's a construction site. If it is, explain the image in detail, focusing on safety aspects. If it's not, briefly describe what you see."
-                                    },
-                                    {
-                                        "type": "image_url",
-                                        "image_url": {
-                                            "url": f"data:image/png;base64,{image_base64}"
-                                        }
-                                    }
-                                ]
                             }
-                        ]
-                        logger.info(f"Sending request to AI model for image {i+1}")
-                        completion = client.chat.completions.create(
-                            model="llama-3.2-90b-vision-preview",
-                            messages=messages,
-                            temperature=0.7,
-                            max_tokens=1000,
-                            top_p=1,
-                            stream=False,
-                            stop=None
-                        )
-                        result = completion.choices[0].message.content
-                        logger.info(f"Received response from AI model for image {i+1}")
-                        results.append((f"Image {i+1} analysis", result))
-                        logger.info(f"Successfully analyzed image {i+1}")
-                    except Exception as img_error:
-                        logger.error(f"Error processing image {i+1}: {str(img_error)}")
-                        logger.error(traceback.format_exc())
-                        results.append((f"Image {i+1} analysis", f"Error processing image: {str(img_error)}"))
-                elif file_type in ['mp4', 'avi', 'mov', 'wmv']:
-                    # Handle video
-                    try:
-                        frames = extract_frames_from_video(file_path)
-                        logger.info(f"Extracted {len(frames)} frames from video: {file_path}")
-                        for j, frame in enumerate(frames):
-                            frame_base64 = encode_image(frame)
-                            logger.info(f"Video {i+1}, Frame {j+1} encoded, size: {len(frame_base64)} bytes")
-                            messages = [
                                 {
-                                    "role": "user",
-                                    "content": [
-                                        {
-                                            "type": "text",
-                                            "text": f"{instruction}\n\nAnalyze this frame from a video (File {i+1}/{len(media)}, Frame {j+1}/{len(frames)}). First, determine if it's a construction site. If it is, explain what you observe, focusing on safety aspects. If it's not, briefly describe what you see."
-                                        },
-                                        {
-                                            "type": "image_url",
-                                            "image_url": {
-                                                "url": f"data:image/png;base64,{frame_base64}"
-                                            }
-                                        }
-                                    ]
                                 }
                             ]
-                            completion = client.chat.completions.create(
-                                model="llama-3.2-90b-vision-preview",
-                                messages=messages,
-                                temperature=0.7,
-                                max_tokens=1000,
-                                top_p=1,
-                                stream=False,
-                                stop=None
-                            )
-                            result = completion.choices[0].message.content
-                            results.append((f"Video {i+1}, Frame {j+1} analysis", result))
-                        logger.info(f"Successfully analyzed video {i+1}")
-                    except Exception as vid_error:
-                        logger.error(f"Error processing video {i+1}: {str(vid_error)}")
-                        logger.error(traceback.format_exc())
-                        results.append((f"Video {i+1} analysis", f"Error processing video: {str(vid_error)}"))
-                else:
-                    logger.warning(f"Unsupported file type: {file_type}")
-                    results.append((f"File {i+1} analysis", f"Unsupported file type: {file_type}"))
-            except Exception as file_error:
-                logger.error(f"Error processing file {i+1}: {str(file_error)}")
-                logger.error(traceback.format_exc())
-                results.append((f"File {i+1} analysis", f"Error processing file: {str(file_error)}"))
         logger.info("Analysis completed successfully")
         return results
@@ -310,9 +280,9 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
         """
     )
-    # Combined upload for images and videos
     with gr.Row():
-        media_input = gr.File(label="Upload Construction Site Images or Videos", file_count="multiple", type="filepath", elem_classes="image-container")
     # Analyze Safety Hazards Button
     with gr.Row():
@@ -346,8 +316,8 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
         return history
     analyze_button.click(
-        analyze_construction_media,
-        inputs=[media_input],
         outputs=[chatbot],
         postprocess=lambda x: update_chat(chatbot.value, x)
     )

     cap.release()
     return frames
+def analyze_mixed_input(input_files):
+    if not input_files:
+        logger.warning("No input files provided")
         return [("No input", "Error: Please upload images or a video for analysis.")]
     try:
+        logger.info("Starting analysis")
         results = []
         instruction = ("You are an AI assistant specialized in analyzing images for safety issues. "
                        "If it does, identify any safety issues or hazards, categorize them, and provide a detailed description, "
                        "and suggest steps to resolve them. If it's not a construction site, simply state that")
+        for i, file in enumerate(input_files):
+            file_type = file.name.split('.')[-1].lower()
+            if file_type in ['jpg', 'jpeg', 'png', 'bmp']:
+                # Process image
+                image = Image.open(file.name)
+                resized_image = resize_image(image)
+                image_data_url = f"data:image/png;base64,{encode_image(resized_image)}"
+                content_type = "image"
+            elif file_type in ['mp4', 'avi', 'mov', 'webm']:
+                # Process video
+                frames = extract_frames_from_video(file.name)
+                image_data_url = f"data:image/png;base64,{encode_image(frames[0])}"  # Use the first frame
+                content_type = "video"
+            else:
+                results.append((f"File {i+1} analysis", f"Unsupported file type: {file_type}"))
+                continue
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": f"{instruction}\n\nAnalyze this {content_type} (File {i+1}/{len(input_files)}). First, determine if it's a construction site. If it is, explain the {content_type} in detail, focusing on safety aspects. If it's not, briefly describe what you see."
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image_data_url
                             }
+                        }
+                    ]
+                }
+            ]
+            completion = client.chat.completions.create(
+                model="llama-3.2-90b-vision-preview",
+                messages=messages,
+                temperature=0.7,
+                max_tokens=1000,
+                top_p=1,
+                stream=False,
+                stop=None
+            )
+            result = completion.choices[0].message.content
+            results.append((f"File {i+1} analysis ({content_type})", result))
+            # If it's a video, analyze additional frames
+            if content_type == "video" and len(frames) > 1:
+                for j, frame in enumerate(frames[1:], start=2):
+                    image_data_url = f"data:image/png;base64,{encode_image(frame)}"
+                    messages = [
+                        {
+                            "role": "user",
+                            "content": [
                                 {
+                                    "type": "text",
+                                    "text": f"{instruction}\n\nAnalyze this additional frame from the video (File {i+1}, Frame {j}/{len(frames)}). Focus on any new or changed safety aspects compared to the previous frame."
+                                },
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": image_data_url
+                                    }
                                 }
                             ]
+                        }
+                    ]
+                    completion = client.chat.completions.create(
+                        model="llama-3.2-90b-vision-preview",
+                        messages=messages,
+                        temperature=0.7,
+                        max_tokens=1000,
+                        top_p=1,
+                        stream=False,
+                        stop=None
+                    )
+                    result = completion.choices[0].message.content
+                    results.append((f"File {i+1} analysis (video frame {j})", result))
         logger.info("Analysis completed successfully")
         return results
         """
     )
+    # Single upload window for both images and videos
     with gr.Row():
+        input_files = gr.File(label="Upload Construction Site Images or Videos", file_count="multiple", type="file", elem_classes="image-container")
     # Analyze Safety Hazards Button
     with gr.Row():
         return history
     analyze_button.click(
+        analyze_mixed_input,
+        inputs=[input_files],
         outputs=[chatbot],
         postprocess=lambda x: update_chat(chatbot.value, x)
     )