Spaces:
Running
on
Zero
Running
on
Zero
trace format updated
Browse files
agent.py
CHANGED
@@ -780,7 +780,7 @@ def is_running_on_spaces():
|
|
780 |
return os.getenv("IS_SPACE", "").lower() == "true"
|
781 |
|
782 |
|
783 |
-
def push_trace_to_explorer(trace_messages, annotations=None, user_info="", evaluation_results=None, model_name=""):
|
784 |
"""
|
785 |
Push the complete conversation trace to Invariant Labs Explorer using Push API
|
786 |
|
@@ -804,10 +804,8 @@ def push_trace_to_explorer(trace_messages, annotations=None, user_info="", evalu
|
|
804 |
# Determine dataset based on environment
|
805 |
if is_running_on_spaces():
|
806 |
dataset_name = "public-instruction-challenge"
|
807 |
-
environment = "Hugging Face Spaces"
|
808 |
else:
|
809 |
dataset_name = "instruction-challenge"
|
810 |
-
environment = "Local Development"
|
811 |
|
812 |
# Prepare metadata
|
813 |
metadata = {"pushed_at": datetime.now().isoformat()}
|
@@ -815,6 +813,8 @@ def push_trace_to_explorer(trace_messages, annotations=None, user_info="", evalu
|
|
815 |
metadata["user_info"] = user_info.strip()
|
816 |
if model_name and model_name.strip():
|
817 |
metadata["model_name"] = model_name.strip()
|
|
|
|
|
818 |
|
819 |
# Add evaluation results to metadata if provided
|
820 |
if evaluation_results:
|
@@ -857,7 +857,7 @@ def push_trace_to_explorer(trace_messages, annotations=None, user_info="", evalu
|
|
857 |
print(f" Error Message: {str(e)}")
|
858 |
|
859 |
|
860 |
-
def tool_agent_loop(user_query, inbox, system_prompt, model_name="gpt-4o-mini", defense_enabled=True, user_info="", fasttext_confidence_scores=None):
|
861 |
"""
|
862 |
Main tool agent loop implementation with proper tool call tracing:
|
863 |
1. Start with System + User input
|
@@ -1198,7 +1198,7 @@ def tool_agent_loop(user_query, inbox, system_prompt, model_name="gpt-4o-mini",
|
|
1198 |
if fasttext_confidence_scores:
|
1199 |
evaluation_results.update(fasttext_confidence_scores)
|
1200 |
|
1201 |
-
push_trace_to_explorer(trace_messages, all_annotations if all_annotations else None, user_info, evaluation_results, model_name)
|
1202 |
|
1203 |
# Add confirmation to execution log
|
1204 |
final_trace_msg = f"π Trace push completed (with {len(all_annotations)} annotations)"
|
|
|
780 |
return os.getenv("IS_SPACE", "").lower() == "true"
|
781 |
|
782 |
|
783 |
+
def push_trace_to_explorer(trace_messages, annotations=None, user_info="", evaluation_results=None, model_name="", attack_email=None):
|
784 |
"""
|
785 |
Push the complete conversation trace to Invariant Labs Explorer using Push API
|
786 |
|
|
|
804 |
# Determine dataset based on environment
|
805 |
if is_running_on_spaces():
|
806 |
dataset_name = "public-instruction-challenge"
|
|
|
807 |
else:
|
808 |
dataset_name = "instruction-challenge"
|
|
|
809 |
|
810 |
# Prepare metadata
|
811 |
metadata = {"pushed_at": datetime.now().isoformat()}
|
|
|
813 |
metadata["user_info"] = user_info.strip()
|
814 |
if model_name and model_name.strip():
|
815 |
metadata["model_name"] = model_name.strip()
|
816 |
+
if attack_email and attack_email.strip():
|
817 |
+
metadata["attack_email"] = attack_email.strip()
|
818 |
|
819 |
# Add evaluation results to metadata if provided
|
820 |
if evaluation_results:
|
|
|
857 |
print(f" Error Message: {str(e)}")
|
858 |
|
859 |
|
860 |
+
def tool_agent_loop(user_query, inbox, system_prompt, model_name="gpt-4o-mini", defense_enabled=True, user_info="", fasttext_confidence_scores=None, attack_email=None):
|
861 |
"""
|
862 |
Main tool agent loop implementation with proper tool call tracing:
|
863 |
1. Start with System + User input
|
|
|
1198 |
if fasttext_confidence_scores:
|
1199 |
evaluation_results.update(fasttext_confidence_scores)
|
1200 |
|
1201 |
+
push_trace_to_explorer(trace_messages, all_annotations if all_annotations else None, user_info, evaluation_results, model_name, attack_email)
|
1202 |
|
1203 |
# Add confirmation to execution log
|
1204 |
final_trace_msg = f"π Trace push completed (with {len(all_annotations)} annotations)"
|
app.py
CHANGED
@@ -747,6 +747,9 @@ def submit_attack(from_addr, attack_subject, attack_body, model_name="gpt-4o", d
|
|
747 |
)
|
748 |
INBOX.append(attack_email)
|
749 |
|
|
|
|
|
|
|
750 |
# Use passed confidence scores or empty defaults
|
751 |
fasttext_confidence_scores = confidence_scores or {
|
752 |
"subject_confidence_scores": [],
|
@@ -761,7 +764,8 @@ def submit_attack(from_addr, attack_subject, attack_body, model_name="gpt-4o", d
|
|
761 |
model_name=model_name,
|
762 |
defense_enabled=defense_enabled,
|
763 |
user_info=user_info,
|
764 |
-
fasttext_confidence_scores=fasttext_confidence_scores
|
|
|
765 |
)
|
766 |
|
767 |
# Return execution log, final output, and flagged content separately
|
|
|
747 |
)
|
748 |
INBOX.append(attack_email)
|
749 |
|
750 |
+
# Create concatenated attack email string for trace logging
|
751 |
+
attack_email_string = f"To: [email protected] | From: {from_addr} | Subject: {attack_subject} | Body: {attack_body}"
|
752 |
+
|
753 |
# Use passed confidence scores or empty defaults
|
754 |
fasttext_confidence_scores = confidence_scores or {
|
755 |
"subject_confidence_scores": [],
|
|
|
764 |
model_name=model_name,
|
765 |
defense_enabled=defense_enabled,
|
766 |
user_info=user_info,
|
767 |
+
fasttext_confidence_scores=fasttext_confidence_scores,
|
768 |
+
attack_email=attack_email_string
|
769 |
)
|
770 |
|
771 |
# Return execution log, final output, and flagged content separately
|