Construction_Snag_Tool_Llama_3.2_Vision

Running

File size: 7,661 Bytes

import os
import base64
import gradio as gr
from PIL import Image
import io
import json
from groq import Groq
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    logger.error("GROQ_API_KEY is not set in environment variables")
    raise ValueError("GROQ_API_KEY is not set")

# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)

def encode_image(image):
    try:
        if isinstance(image, str):  # If image is a file path
            with open(image, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        elif isinstance(image, Image.Image):  # If image is a PIL Image
            buffered = io.BytesIO()
            image.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode('utf-8')
        else:
            raise ValueError(f"Unsupported image type: {type(image)}")
    except Exception as e:
        logger.error(f"Error encoding image: {str(e)}")
        raise

def analyze_construction_image(image):
    if image is None:
        logger.warning("No image provided")
        return [(None, "Error: No image uploaded")]

    try:
        logger.info("Starting image analysis")
        image_data_url = f"data:image/png;base64,{encode_image(image)}"

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Format your response as a JSON object with keys 'snag_category', 'snag_description', and 'desnag_steps' (as an array)."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_data_url
                        }
                    }
                ]
            }
        ]

        logger.info("Sending request to Groq API")
        completion = client.chat.completions.create(
            model="llama-3.2-90b-vision-preview",
            messages=messages,
            temperature=0.7,
            max_tokens=1000,
            top_p=1,
            stream=False,
            response_format={"type": "json_object"},
            stop=None
        )

        logger.info("Received response from Groq API")
        result = completion.choices[0].message.content
        logger.debug(f"Raw API response: {result}")

        # Try to parse the result as JSON
        try:
            parsed_result = json.loads(result)
        except json.JSONDecodeError:
            logger.error("Failed to parse API response as JSON")
            return [(None, "Error: Invalid response format")]

        snag_category = str(parsed_result.get('snag_category', 'N/A'))
        snag_description = str(parsed_result.get('snag_description', 'N/A'))
        
        # Ensure desnag_steps is a list of strings
        desnag_steps = parsed_result.get('desnag_steps', ['N/A'])
        if not isinstance(desnag_steps, list):
            desnag_steps = [str(desnag_steps)]
        else:
            desnag_steps = [str(step) for step in desnag_steps]
        
        desnag_steps_str = '\n'.join(desnag_steps)

        logger.info("Analysis completed successfully")
        
        # Initialize chat history with analysis results
        chat_history = [
            (None, f"Image Analysis Results:\n\nSnag Category: {snag_category}\n\nSnag Description: {snag_description}\n\nSteps to Desnag:\n{desnag_steps_str}")
        ]
        
        return chat_history
    except Exception as e:
        logger.error(f"Error during image analysis: {str(e)}")
        return [(None, f"Error: {str(e)}")]

def chat_about_image(message, chat_history):
    try:
        # Prepare the conversation history for the API
        messages = [
            {"role": "system", "content": "You are an AI assistant specialized in analyzing construction site images and answering questions about them. Use the information from the initial analysis to answer user queries."},
        ]
        
        # Add chat history to messages
        for human, ai in chat_history:
            if human:
                messages.append({"role": "user", "content": human})
            if ai:
                messages.append({"role": "assistant", "content": ai})
        
        # Add the new user message
        messages.append({"role": "user", "content": message})
        
        # Make API call
        completion = client.chat.completions.create(
            model="llama-3.2-90b-vision-preview",
            messages=messages,
            temperature=0.7,
            max_tokens=500,
            top_p=1,
            stream=False,
            stop=None
        )
        
        response = completion.choices[0].message.content
        chat_history.append((message, response))
        
        return "", chat_history
    except Exception as e:
        logger.error(f"Error during chat: {str(e)}")
        return "", chat_history + [(message, f"Error: {str(e)}")]


custom_css = """
.container {
    max-width: 1000px;
    margin: auto;
    padding-top: 1.5rem;
}
.header {
    text-align: center;
    margin-bottom: 2rem;
}
.header h1 {
    color: #2c3e50;
    font-size: 2.5rem;
}
.subheader {
    color: #34495e;
    font-size: 1.2rem;
    margin-bottom: 2rem;
}
.image-container {
    border: 2px dashed #3498db;
    border-radius: 10px;
    padding: 1rem;
    text-align: center;
}
.analyze-button {
    background-color: #2ecc71 !important;
    color: white !important;
}
.clear-button {
    background-color: #e74c3c !important;
    color: white !important;
}
.chatbot {
    border: 1px solid #bdc3c7;
    border-radius: 10px;
    padding: 1rem;
    height: 400px;
    overflow-y: auto;
}
.chat-input {
    border: 1px solid #bdc3c7;
    border-radius: 5px;
    padding: 0.5rem;
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
    gr.HTML(
        """
        <div class="container">
            <div class="header">
                <h1>🏗️ Construction Image Analyzer with AI Chat</h1>
            </div>
            <p class="subheader">Upload a construction site image, analyze it for issues, and chat with AI about the findings.</p>
        </div>
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="pil", label="Upload Construction Image", elem_classes="image-container")
            analyze_button = gr.Button("🔍 Analyze Image", elem_classes="analyze-button")
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Analysis Results and Chat", elem_classes="chatbot")
            with gr.Row():
                msg = gr.Textbox(
                    label="Ask a question about the image",
                    placeholder="Type your question here and press Enter...",
                    show_label=False,
                    elem_classes="chat-input"
                )
                clear = gr.Button("🗑️ Clear Chat", elem_classes="clear-button")

    analyze_button.click(
        analyze_construction_image,
        inputs=[image_input],
        outputs=[chatbot]
    )

    msg.submit(chat_about_image, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

# Launch the app
if __name__ == "__main__":
    iface.launch(debug=True)