import os import base64 import gradio as gr from PIL import Image import io import json from groq import Groq # Load environment variables GROQ_API_KEY = os.environ.get("GROQ_API_KEY") # Initialize Groq client client = Groq(api_key=GROQ_API_KEY) def encode_image(image): buffered = io.BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode('utf-8') def analyze_construction_image(image, follow_up_question=""): if image is None: return "Error: No image uploaded", "", "" try: image_data_url = f"data:image/png;base64,{encode_image(image)}" messages = [ { "role": "user", "content": [ { "type": "text", "text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Output the result in JSON format." }, { "type": "image_url", "image_url": { "url": image_data_url } } ] } ] if follow_up_question: messages.append({ "role": "user", "content": follow_up_question }) completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=1000, top_p=1, stream=False, response_format={"type": "json_object"}, stop=None ) result = json.loads(completion.choices[0].message.content) snag_category = result.get('snag_category', 'N/A') snag_description = result.get('snag_description', 'N/A') desnag_steps = '\n'.join(result.get('desnag_steps', ['N/A'])) return snag_category, snag_description, desnag_steps except Exception as e: return f"Error: {str(e)}", "", "" # Create the Gradio interface iface = gr.Interface( fn=analyze_construction_image, inputs=[ gr.Image(type="pil", label="Upload Construction Image"), gr.Textbox(label="Follow-up Question (Optional)") ], outputs=[ gr.Textbox(label="Snag Category"), gr.Textbox(label="Snag Description"), gr.Textbox(label="Steps to Desnag") ], title="Construction Image Analyzer (Llama 3.2 90B Vision via Groq)", description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 90B Vision technology through Groq API. You can also ask follow-up questions about the image.", examples=[ ["example_image1.jpg", "What safety concerns do you see?"], ["example_image2.jpg", "Is there any visible structural damage?"] ], cache_examples=True, theme="default" ) # Launch the app if __name__ == "__main__": iface.launch()