File size: 3,443 Bytes
10b5661 4ec8ad4 10b5661 85d2f78 c8ee59e 4ec8ad4 10b5661 c8ee59e 4ec8ad4 85d2f78 55c45f5 85d2f78 10b5661 85d2f78 10b5661 85d2f78 c8ee59e 85d2f78 451175a 85d2f78 c8ee59e 85d2f78 c8ee59e 85d2f78 c8ee59e 10b5661 85d2f78 ef42063 85d2f78 10b5661 4ec8ad4 85d2f78 4ec8ad4 85d2f78 10b5661 85d2f78 10b5661 55c45f5 10b5661 4ec8ad4 10b5661 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import base64
import gradio as gr
from PIL import Image
import io
import json
from groq import Groq
# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)
def encode_image(image):
if isinstance(image, str): # If image is a file path
with open(image, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
elif isinstance(image, Image.Image): # If image is a PIL Image
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
else:
raise ValueError("Unsupported image type")
def analyze_construction_image(image, follow_up_question=""):
if image is None:
return "Error: No image uploaded", "", ""
try:
image_data_url = f"data:image/png;base64,{encode_image(image)}"
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Output the result in JSON format."
},
{
"type": "image_url",
"image_url": {
"url": image_data_url
}
}
]
}
]
if follow_up_question:
messages.append({
"role": "user",
"content": follow_up_question
})
completion = client.chat.completions.create(
model="llama-3.2-11b-vision-preview",
messages=messages,
temperature=0.7,
max_tokens=1000,
top_p=1,
stream=False,
response_format={"type": "json_object"},
stop=None
)
result = json.loads(completion.choices[0].message.content)
snag_category = result.get('snag_category', 'N/A')
snag_description = result.get('snag_description', 'N/A')
desnag_steps = '\n'.join(result.get('desnag_steps', ['N/A']))
return snag_category, snag_description, desnag_steps
except Exception as e:
return f"Error: {str(e)}", "", ""
# Create the Gradio interface
iface = gr.Interface(
fn=analyze_construction_image,
inputs=[
gr.Image(type="pil", label="Upload Construction Image"),
gr.Textbox(label="Follow-up Question (Optional)")
],
outputs=[
gr.Textbox(label="Snag Category"),
gr.Textbox(label="Snag Description"),
gr.Textbox(label="Steps to Desnag")
],
title="Construction Image Analyzer (Llama 3.2 90B Vision via Groq)",
description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 90B Vision technology through Groq API. You can also ask follow-up questions about the image.",
examples=[
["example_image1.jpg", "What safety concerns do you see?"],
["example_image2.jpg", "Is there any visible structural damage?"]
],
cache_examples=False, # Disable caching to avoid file moving issues
theme="default"
)
# Launch the app
if __name__ == "__main__":
iface.launch() |