Construction_Snag_Tool_Llama_3.2_Vision

Running

File size: 3,443 Bytes

10b5661
 
4ec8ad4
 
10b5661
85d2f78
c8ee59e
4ec8ad4
10b5661
 
 
c8ee59e
 
4ec8ad4
85d2f78
55c45f5
 
 
 
 
 
 
 
 
85d2f78
 
10b5661
 
 
 
85d2f78
10b5661
85d2f78
 
 
 
 
 
 
 
 
 
 
 
c8ee59e
85d2f78
 
 
 
 
 
 
 
 
 
 
 
451175a
85d2f78
c8ee59e
85d2f78
c8ee59e
 
85d2f78
c8ee59e
 
10b5661
85d2f78
ef42063
85d2f78
 
 
10b5661
 
 
 
4ec8ad4
 
 
 
85d2f78
 
 
 
4ec8ad4
 
 
 
 
85d2f78
 
10b5661
85d2f78
 
10b5661
55c45f5
10b5661
4ec8ad4
 
 
10b5661

import os
import base64
import gradio as gr
from PIL import Image
import io
import json
from groq import Groq

# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)

def encode_image(image):
    if isinstance(image, str):  # If image is a file path
        with open(image, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    elif isinstance(image, Image.Image):  # If image is a PIL Image
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        return base64.b64encode(buffered.getvalue()).decode('utf-8')
    else:
        raise ValueError("Unsupported image type")

def analyze_construction_image(image, follow_up_question=""):
    if image is None:
        return "Error: No image uploaded", "", ""

    try:
        image_data_url = f"data:image/png;base64,{encode_image(image)}"

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Output the result in JSON format."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_data_url
                        }
                    }
                ]
            }
        ]

        if follow_up_question:
            messages.append({
                "role": "user",
                "content": follow_up_question
            })

        completion = client.chat.completions.create(
            model="llama-3.2-11b-vision-preview",
            messages=messages,
            temperature=0.7,
            max_tokens=1000,
            top_p=1,
            stream=False,
            response_format={"type": "json_object"},
            stop=None
        )

        result = json.loads(completion.choices[0].message.content)

        snag_category = result.get('snag_category', 'N/A')
        snag_description = result.get('snag_description', 'N/A')
        desnag_steps = '\n'.join(result.get('desnag_steps', ['N/A']))

        return snag_category, snag_description, desnag_steps
    except Exception as e:
        return f"Error: {str(e)}", "", ""

# Create the Gradio interface
iface = gr.Interface(
    fn=analyze_construction_image,
    inputs=[
        gr.Image(type="pil", label="Upload Construction Image"),
        gr.Textbox(label="Follow-up Question (Optional)")
    ],
    outputs=[
        gr.Textbox(label="Snag Category"),
        gr.Textbox(label="Snag Description"),
        gr.Textbox(label="Steps to Desnag")
    ],
    title="Construction Image Analyzer (Llama 3.2 90B Vision via Groq)",
    description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 90B Vision technology through Groq API. You can also ask follow-up questions about the image.",
    examples=[
        ["example_image1.jpg", "What safety concerns do you see?"],
        ["example_image2.jpg", "Is there any visible structural damage?"]
    ],
    cache_examples=False,  # Disable caching to avoid file moving issues
    theme="default"
)

# Launch the app
if __name__ == "__main__":
    iface.launch()