Construction_Snag_Tool_Llama_3.2_Vision

Running

File size: 3,156 Bytes

import os
import base64
import requests
import gradio as gr
from PIL import Image
import io

# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
GROQ_API_URL = "https://api.groq.com/v1/chat/completions"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def analyze_construction_image(image):
    if image is None:
        return "Error: No image uploaded", "", ""

    try:
        # Convert PIL Image to base64
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()

        # Prepare the message for Groq API
        messages = [
            {
                "role": "system",
                "content": "You are an AI assistant specialized in analyzing construction site images. Identify issues, categorize them, and provide steps to resolve them."
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Analyze this construction image. Identify the snag category, provide a detailed snag description, and list steps to desnag."
                    },
                    {
                        "type": "image_url",
                        "image_url": f"data:image/png;base64,{img_str}"
                    }
                ]
            }
        ]

        # Make API request to Groq
        headers = {
            "Authorization": f"Bearer {GROQ_API_KEY}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "llama3-2-vision-90b",  # Adjust model name if necessary
            "messages": messages,
            "max_tokens": 300,
            "temperature": 0.7
        }

        response = requests.post(GROQ_API_URL, headers=headers, json=data)
        response.raise_for_status()

        result = response.json()["choices"][0]["message"]["content"]

        # Parse the result
        lines = result.split('\n')
        snag_category = lines[0] if len(lines) > 0 else "N/A"
        snag_description = lines[1] if len(lines) > 1 else "N/A"
        desnag_steps = "\n".join(lines[2:]) if len(lines) > 2 else "N/A"

        return snag_category, snag_description, desnag_steps
    except Exception as e:
        return f"Error: {str(e)}", "", ""

# Create the Gradio interface
iface = gr.Interface(
    fn=analyze_construction_image,
    inputs=gr.Image(type="pil", label="Upload Construction Image"),
    outputs=[
        gr.Textbox(label="Snag Category"),
        gr.Textbox(label="Snag Description"),
        gr.Textbox(label="Steps to Desnag")
    ],
    title="Construction Image Analyzer (Llama 3.2-Vision via Groq)",
    description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2-Vision technology through Groq API.",
    examples=[
        ["example_image1.jpg"],
        ["example_image2.jpg"]
    ],
    cache_examples=True,
    theme="default"
)

# Launch the app
if __name__ == "__main__":
    iface.launch()