import os import base64 import gradio as gr from PIL import Image import io import json from groq import Groq import logging # Set up logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # Load environment variables GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if not GROQ_API_KEY: logger.error("GROQ_API_KEY is not set in environment variables") raise ValueError("GROQ_API_KEY is not set") # Initialize Groq client client = Groq(api_key=GROQ_API_KEY) def encode_image(image): try: if isinstance(image, str): # If image is a file path with open(image, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') elif isinstance(image, Image.Image): # If image is a PIL Image buffered = io.BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode('utf-8') else: raise ValueError(f"Unsupported image type: {type(image)}") except Exception as e: logger.error(f"Error encoding image: {str(e)}") raise def analyze_construction_image(image): if image is None: logger.warning("No image provided") return [(None, "Error: No image uploaded")] try: logger.info("Starting image analysis") image_data_url = f"data:image/png;base64,{encode_image(image)}" messages = [ { "role": "user", "content": [ { "type": "text", "text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Format your response as a JSON object with keys 'snag_category', 'snag_description', and 'desnag_steps' (as an array)." }, { "type": "image_url", "image_url": { "url": image_data_url } } ] } ] logger.info("Sending request to Groq API") completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=1000, top_p=1, stream=False, response_format={"type": "json_object"}, stop=None ) logger.info("Received response from Groq API") result = completion.choices[0].message.content logger.debug(f"Raw API response: {result}") # Try to parse the result as JSON try: parsed_result = json.loads(result) except json.JSONDecodeError: logger.error("Failed to parse API response as JSON") return [(None, "Error: Invalid response format")] snag_category = str(parsed_result.get('snag_category', 'N/A')) snag_description = str(parsed_result.get('snag_description', 'N/A')) # Ensure desnag_steps is a list of strings desnag_steps = parsed_result.get('desnag_steps', ['N/A']) if not isinstance(desnag_steps, list): desnag_steps = [str(desnag_steps)] else: desnag_steps = [str(step) for step in desnag_steps] desnag_steps_str = '\n'.join(desnag_steps) logger.info("Analysis completed successfully") # Initialize chat history with analysis results chat_history = [ (None, f"Image Analysis Results:\n\nSnag Category: {snag_category}\n\nSnag Description: {snag_description}\n\nSteps to Desnag:\n{desnag_steps_str}") ] return chat_history except Exception as e: logger.error(f"Error during image analysis: {str(e)}") return [(None, f"Error: {str(e)}")] def chat_about_image(message, chat_history): try: # Prepare the conversation history for the API messages = [ {"role": "system", "content": "You are an AI assistant specialized in analyzing construction site images and answering questions about them. Use the information from the initial analysis to answer user queries."}, ] # Add chat history to messages for human, ai in chat_history: if human: messages.append({"role": "user", "content": human}) if ai: messages.append({"role": "assistant", "content": ai}) # Add the new user message messages.append({"role": "user", "content": message}) # Make API call completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=500, top_p=1, stream=False, stop=None ) response = completion.choices[0].message.content chat_history.append((message, response)) return "", chat_history except Exception as e: logger.error(f"Error during chat: {str(e)}") return "", chat_history + [(message, f"Error: {str(e)}")] custom_css = """ .container { max-width: 1000px; margin: auto; padding-top: 1.5rem; } .header { text-align: center; margin-bottom: 2rem; } .header h1 { color: #2c3e50; font-size: 2.5rem; } .subheader { color: #34495e; font-size: 1.2rem; margin-bottom: 2rem; } .image-container { border: 2px dashed #3498db; border-radius: 10px; padding: 1rem; text-align: center; } .analyze-button { background-color: #2ecc71 !important; color: white !important; } .clear-button { background-color: #e74c3c !important; color: white !important; } .chatbot { border: 1px solid #bdc3c7; border-radius: 10px; padding: 1rem; height: 400px; overflow-y: auto; } .chat-input { border: 1px solid #bdc3c7; border-radius: 5px; padding: 0.5rem; } """ # Create the Gradio interface with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface: gr.HTML( """

🏗️ Construction Image Analyzer with AI Chat

Upload a construction site image, analyze it for issues, and chat with AI about the findings.

""" ) with gr.Row(): with gr.Column(scale=1): image_input = gr.Image(type="pil", label="Upload Construction Image", elem_classes="image-container") analyze_button = gr.Button("🔍 Analyze Image", elem_classes="analyze-button") with gr.Column(scale=2): chatbot = gr.Chatbot(label="Analysis Results and Chat", elem_classes="chatbot") with gr.Row(): msg = gr.Textbox( label="Ask a question about the image", placeholder="Type your question here and press Enter...", show_label=False, elem_classes="chat-input" ) clear = gr.Button("🗑️ Clear Chat", elem_classes="clear-button") analyze_button.click( analyze_construction_image, inputs=[image_input], outputs=[chatbot] ) msg.submit(chat_about_image, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) # Launch the app if __name__ == "__main__": iface.launch(debug=True)