Spaces:

snyk-etso
/

prompt-injection-instruction-defense-challenge

Running on Zero

App Files Files Community

ddas commited on 16 days ago

Commit

e79c8c3

unverified ·

1 Parent(s): 4ce7d6f

better error messages

Browse files

Files changed (1) hide show

app.py +206 -16

app.py CHANGED Viewed

@@ -36,6 +36,115 @@ MODEL_URL = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.b
 # Global counter for error modals to help with debugging
 _ERROR_MODAL_COUNTER = 0
 def validate_english_only_windowed(text, model=None):
     """
     Validates that input text is English-only using sliding window approach.
@@ -652,10 +761,13 @@ def reset_to_initial_state():
     for i, email in enumerate(INBOX):
         reset_emails.append(format_single_email(email, i + 1))
     return (
         "",  # Clear attack subject
         "",  # Clear attack body
-        "gpt-4o",  # Reset model to default
         "",  # Final output cleared
         "🔄 Interface reset to initial state",  # Trace message in accordion
         reset_emails[0],  # Reset email 1
@@ -1112,31 +1224,33 @@ You achieve Attacker Goal 2 and user goal is also achieved and no information ab
                 with gr.Group(elem_id="defense-toggle-container"):
                     with gr.Row(elem_id="defense-toggle-row"):
                         with gr.Column(scale=2, min_width=200):
                             model_selector = gr.Dropdown(
-                                choices=["claude-3-5-haiku-20241022", "gpt-4o", "claude-3-5-sonnet-20241022","gpt-5"],
-                                value="gpt-4o",
                                 label="Select Agent LLM",
                                 elem_id="model-selector"
                             )
                         with gr.Column(scale=2, min_width=100):
                             defense_toggle = gr.Checkbox(label="Instruction Tagger Defense", value=True, elem_id="defense-toggle")
-                system_display = gr.Textbox(
-                    value=SYSTEM_PROMPT,
                     lines=2,
-                    interactive=False,
-                    show_copy_button=True,
                     label="System Prompt"
-                )
-                user_input_display = gr.Textbox(
-                    value=USER_INPUT,
                 lines=1,
-                    interactive=False,
-                    show_copy_button=True,
                 label="User Query (Fixed)"
-                )
                 # (Moved defense toggle to top; removed previous placement)
@@ -1376,7 +1490,24 @@ You achieve Attacker Goal 2 and user goal is also achieved and no information ab
                 except Exception as e:
                     validation_errors.append(f"EMAIL BODY: Validation failed - {str(e)}")
-            # If there are validation errors, show them all in the popup
             if validation_errors:
                 error_timestamp = int(time.time() * 1000)
                 print(f"🚨 VALIDATION ERRORS FOUND: {len(validation_errors)} errors at {error_timestamp}")
@@ -1402,7 +1533,66 @@ You achieve Attacker Goal 2 and user goal is also achieved and no information ab
                 "subject_confidence_scores": subject_confidence_scores,
                 "body_confidence_scores": body_confidence_scores
             }
-            exec_log, final_out = submit_attack(from_addr.strip(), subject, body, model, defense_enabled, user_info.strip(), confidence_scores)
             # Build a formatted results summary extracted from exec_log
             def build_results_html(log_text: str) -> str:

 # Global counter for error modals to help with debugging
 _ERROR_MODAL_COUNTER = 0
+def get_available_api_keys():
+    """
+    Check which API keys are available in environment variables.
+    Returns:
+        dict: Dictionary with 'openai', 'anthropic', and 'invariant' boolean flags
+    """
+    return {
+        'openai': bool(os.getenv('OPENAI_API_KEY')),
+        'anthropic': bool(os.getenv('ANTHROPIC_API_KEY')),
+        'invariant': bool(os.getenv('INVARIANT_API_KEY'))
+    }
+def get_available_models():
+    """
+    Get list of available models based on API keys present.
+    Returns:
+        tuple: (choices_list, default_model)
+    """
+    api_keys = get_available_api_keys()
+    choices = []
+    # OpenAI models
+    if api_keys['openai']:
+        choices.extend(["gpt-4o", "gpt-5"])
+    # Anthropic models
+    if api_keys['anthropic']:
+        choices.extend(["claude-3-5-haiku-20241022", "claude-3-5-sonnet-20241022"])
+    # Determine default model based on available keys
+    if api_keys['openai']:
+        default_model = "gpt-4o"
+    elif api_keys['anthropic']:
+        default_model = "claude-3-5-sonnet-20241022"
+    else:
+        # No API keys available - we'll handle this in submit function
+        choices = ["No models available"]
+        default_model = "No models available"
+    return choices, default_model
+def validate_api_key_for_model(model_name):
+    """
+    Validate that the required API key is available for the selected model.
+    Args:
+        model_name (str): Selected model name
+    Returns:
+        tuple: (is_valid, error_message)
+    """
+    api_keys = get_available_api_keys()
+    if model_name.startswith("gpt"):
+        if not api_keys['openai']:
+            return False, "OpenAI API key is required for GPT models. Please add OPENAI_API_KEY to your environment variables."
+    elif model_name.startswith("claude"):
+        if not api_keys['anthropic']:
+            return False, "Anthropic API key is required for Claude models. Please add ANTHROPIC_API_KEY to your environment variables."
+    elif model_name == "No models available":
+        return False, "No API keys found. Please add either OPENAI_API_KEY or ANTHROPIC_API_KEY to your environment variables to use this application."
+    return True, ""
+def validate_invariant_api_key():
+    """
+    Validate that INVARIANT_API_KEY is available for trace collection.
+    Returns:
+        tuple: (is_valid, error_message)
+    """
+    api_keys = get_available_api_keys()
+    if not api_keys['invariant']:
+        return False, "Invariant Labs API key is required for trace collection and analysis. Please add INVARIANT_API_KEY to your environment variables. You can get an API key from https://invariantlabs.ai/"
+    return True, ""
+def validate_model_dependencies():
+    """
+    Validate that critical models can be loaded.
+    Returns:
+        tuple: (is_valid, error_message)
+    """
+    try:
+        # Test FastText model loading
+        model = load_fasttext_model()
+        if model is None:
+            return False, "FastText language detection model failed to load. This is required for input validation."
+    except Exception as e:
+        return False, f"FastText model loading error: {str(e)}. Language detection is required for the application to function."
+    try:
+        # Test instruction classifier loading (only if defense would be enabled)
+        from instruction_classifier import get_sanitizer
+        sanitizer = get_sanitizer()
+        if sanitizer is None:
+            return False, "Instruction classifier model failed to load. This is required for defense system functionality."
+    except Exception as e:
+        return False, f"Instruction classifier loading error: {str(e)}. Defense system requires this model to function properly."
+    return True, ""
 def validate_english_only_windowed(text, model=None):
     """
     Validates that input text is English-only using sliding window approach.
     for i, email in enumerate(INBOX):
         reset_emails.append(format_single_email(email, i + 1))
+    # Get current default model based on available API keys
+    _, default_model = get_available_models()
     return (
         "",  # Clear attack subject
         "",  # Clear attack body
+        default_model,  # Reset model to current default
         "",  # Final output cleared
         "🔄 Interface reset to initial state",  # Trace message in accordion
         reset_emails[0],  # Reset email 1
                 with gr.Group(elem_id="defense-toggle-container"):
                     with gr.Row(elem_id="defense-toggle-row"):
                         with gr.Column(scale=2, min_width=200):
+                            # Get available models based on API keys
+                            available_choices, default_model = get_available_models()
                             model_selector = gr.Dropdown(
+                                choices=available_choices,
+                                value=default_model,
                                 label="Select Agent LLM",
                                 elem_id="model-selector"
                             )
                         with gr.Column(scale=2, min_width=100):
                             defense_toggle = gr.Checkbox(label="Instruction Tagger Defense", value=True, elem_id="defense-toggle")
+                    system_display = gr.Textbox(
+                        value=SYSTEM_PROMPT,
                     lines=2,
+                        interactive=False,
+                        show_copy_button=True,
                     label="System Prompt"
+                    )
+                    user_input_display = gr.Textbox(
+                        value=USER_INPUT,
                 lines=1,
+                        interactive=False,
+                        show_copy_button=True,
                 label="User Query (Fixed)"
+                    )
                 # (Moved defense toggle to top; removed previous placement)
                 except Exception as e:
                     validation_errors.append(f"EMAIL BODY: Validation failed - {str(e)}")
+            # 4. Validate API key for selected model
+            is_api_valid, api_error_msg = validate_api_key_for_model(model)
+            if not is_api_valid:
+                validation_errors.append(f"API CONFIGURATION: {api_error_msg}")
+            # 5. Validate Invariant API key for trace collection
+            is_invariant_valid, invariant_error_msg = validate_invariant_api_key()
+            if not is_invariant_valid:
+                validation_errors.append(f"TRACE COLLECTION: {invariant_error_msg}")
+            # 6. Validate critical model dependencies
+            is_models_valid, models_error_msg = validate_model_dependencies()
+            if not is_models_valid:
+                validation_errors.append(f"MODEL LOADING: {models_error_msg}")
+            # If there are validation errors (including API key), show them all in the popup
             if validation_errors:
                 error_timestamp = int(time.time() * 1000)
                 print(f"🚨 VALIDATION ERRORS FOUND: {len(validation_errors)} errors at {error_timestamp}")
                 "subject_confidence_scores": subject_confidence_scores,
                 "body_confidence_scores": body_confidence_scores
             }
+            try:
+                exec_log, final_out = submit_attack(from_addr.strip(), subject, body, model, defense_enabled, user_info.strip(), confidence_scores)
+            except Exception as e:
+                # Handle any setup or execution errors with detailed messages
+                error_str = str(e).lower()
+                original_error = str(e)
+                # Categorize errors and provide specific guidance
+                if "fasttext" in error_str or "lid.176.bin" in error_str:
+                    setup_error_msg = f"LANGUAGE MODEL ERROR: FastText language detection model failed to load. {original_error}"
+                    setup_error_msg += " This could be due to corrupted model file, insufficient memory, or missing dependencies. Try refreshing the page or contact support if the issue persists."
+                elif "instruction_classifier" in error_str or "instruction classifier" in error_str or "sanitizer" in error_str:
+                    setup_error_msg = f"DEFENSE MODEL ERROR: Instruction classifier model failed to load. {original_error}"
+                    setup_error_msg += " The defense system requires a working instruction classifier. This could be due to model file corruption, insufficient GPU memory, or missing dependencies."
+                elif "api_key" in error_str or "api key" in error_str or "authentication" in error_str or "unauthorized" in error_str:
+                    setup_error_msg = f"API AUTHENTICATION ERROR: {original_error}"
+                    setup_error_msg += " Please verify your API keys are correct and have sufficient credits/permissions."
+                elif "model" in error_str and ("not found" in error_str or "unavailable" in error_str or "invalid" in error_str):
+                    setup_error_msg = f"MODEL AVAILABILITY ERROR: {original_error}"
+                    setup_error_msg += " The selected model may be temporarily unavailable or you may not have access to it. Try a different model."
+                elif "network" in error_str or "connection" in error_str or "timeout" in error_str or "dns" in error_str:
+                    setup_error_msg = f"NETWORK ERROR: {original_error}"
+                    setup_error_msg += " Please check your internet connection and try again. If the problem persists, the service may be temporarily unavailable."
+                elif "memory" in error_str or "oom" in error_str or "cuda" in error_str or "gpu" in error_str:
+                    setup_error_msg = f"RESOURCE ERROR: {original_error}"
+                    setup_error_msg += " Insufficient system resources (memory/GPU). Try using a smaller model or refreshing the page."
+                elif "import" in error_str or "module" in error_str or "dependency" in error_str:
+                    setup_error_msg = f"DEPENDENCY ERROR: {original_error}"
+                    setup_error_msg += " Missing required dependencies. Please ensure all required packages are installed."
+                elif "permission" in error_str or "access" in error_str or "denied" in error_str:
+                    setup_error_msg = f"PERMISSION ERROR: {original_error}"
+                    setup_error_msg += " File system permission issue. Contact administrator if running on shared system."
+                else:
+                    # Generic catch-all with enhanced information
+                    setup_error_msg = f"RUNTIME ERROR: {original_error}"
+                    setup_error_msg += " An unexpected error occurred during execution. Please try again, and if the problem persists, check the browser console for more details or contact support."
+                error_timestamp = int(time.time() * 1000)
+                print(f"🚨 RUNTIME ERROR: {original_error} at {error_timestamp}")
+                print(f"🔍 Error category: {setup_error_msg.split(':')[0]}")
+                modal_html = create_error_modal_html([setup_error_msg])
+                return (
+                    gr.update(),  # final_output_display - no change
+                    gr.update(),  # results_display - no change
+                    gr.update(),  # trace_display - no change
+                    gr.update(),  # email1_display - no change
+                    gr.update(),  # email2_display - no change
+                    gr.update(),  # email3_display - no change
+                    gr.update(value=modal_html, visible=True)  # error_modal_html
+                )
             # Build a formatted results summary extracted from exec_log
             def build_results_html(log_text: str) -> str: