Spaces:
Running
Running
Commit
·
6780f80
1
Parent(s):
c89e6e0
update: evaluation + classifier guardrail
Browse files
application_pages/evaluation_app.py
CHANGED
@@ -4,6 +4,7 @@ import time
|
|
4 |
from importlib import import_module
|
5 |
|
6 |
import pandas as pd
|
|
|
7 |
import streamlit as st
|
8 |
import weave
|
9 |
from dotenv import load_dotenv
|
@@ -181,11 +182,12 @@ if st.session_state.dataset_previewed:
|
|
181 |
st.session_state.evaluation_call_manager.call_list.append(
|
182 |
{
|
183 |
"guardrail_name": guardrail_name,
|
184 |
-
"calls": st.session_state.evaluation_call_manager.collect_guardrail_guard_calls_from_eval(
|
185 |
-
call=call
|
186 |
-
),
|
187 |
}
|
188 |
)
|
|
|
|
|
|
|
189 |
st.dataframe(
|
190 |
st.session_state.evaluation_call_manager.render_calls_to_streamlit()
|
191 |
)
|
|
|
4 |
from importlib import import_module
|
5 |
|
6 |
import pandas as pd
|
7 |
+
import rich
|
8 |
import streamlit as st
|
9 |
import weave
|
10 |
from dotenv import load_dotenv
|
|
|
182 |
st.session_state.evaluation_call_manager.call_list.append(
|
183 |
{
|
184 |
"guardrail_name": guardrail_name,
|
185 |
+
"calls": st.session_state.evaluation_call_manager.collect_guardrail_guard_calls_from_eval(),
|
|
|
|
|
186 |
}
|
187 |
)
|
188 |
+
rich.print(
|
189 |
+
st.session_state.evaluation_call_manager.call_list
|
190 |
+
)
|
191 |
st.dataframe(
|
192 |
st.session_state.evaluation_call_manager.render_calls_to_streamlit()
|
193 |
)
|
guardrails_genie/guardrails/injection/protectai_guardrail.py
CHANGED
@@ -37,11 +37,6 @@ class PromptInjectionClassifierGuardrail(Guardrail):
|
|
37 |
def classify(self, prompt: str):
|
38 |
return self._classifier(prompt)
|
39 |
|
40 |
-
@weave.op()
|
41 |
-
def predict(self, prompt: str):
|
42 |
-
response = self.classify(prompt)
|
43 |
-
return {"safe": response[0]["label"] != "INJECTION"}
|
44 |
-
|
45 |
@weave.op()
|
46 |
def guard(self, prompt: str):
|
47 |
response = self.classify(prompt)
|
@@ -50,3 +45,7 @@ class PromptInjectionClassifierGuardrail(Guardrail):
|
|
50 |
"safe": response[0]["label"] != "INJECTION",
|
51 |
"summary": f"Prompt is deemed {response[0]['label']} with {confidence_percentage}% confidence.",
|
52 |
}
|
|
|
|
|
|
|
|
|
|
37 |
def classify(self, prompt: str):
|
38 |
return self._classifier(prompt)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
40 |
@weave.op()
|
41 |
def guard(self, prompt: str):
|
42 |
response = self.classify(prompt)
|
|
|
45 |
"safe": response[0]["label"] != "INJECTION",
|
46 |
"summary": f"Prompt is deemed {response[0]['label']} with {confidence_percentage}% confidence.",
|
47 |
}
|
48 |
+
|
49 |
+
@weave.op()
|
50 |
+
def predict(self, prompt: str):
|
51 |
+
return self.guard(prompt)
|
guardrails_genie/utils.py
CHANGED
@@ -22,16 +22,19 @@ class EvaluationCallManager:
|
|
22 |
self.show_warning_in_app = False
|
23 |
self.call_list = []
|
24 |
|
25 |
-
def collect_guardrail_guard_calls_from_eval(self
|
26 |
guard_calls, count = [], 0
|
27 |
-
for
|
28 |
-
if "Evaluation.summarize" in
|
29 |
break
|
30 |
-
|
|
|
|
|
31 |
guard_calls.append(
|
32 |
{
|
33 |
-
"input_prompt": str(
|
34 |
-
"outputs": dict(
|
|
|
35 |
}
|
36 |
)
|
37 |
count += 1
|
@@ -50,7 +53,7 @@ class EvaluationCallManager:
|
|
50 |
dataframe[guardrail_call["guardrail_name"] + ".safe"] = [
|
51 |
call["outputs"]["safe"] for call in guardrail_call["calls"]
|
52 |
]
|
53 |
-
dataframe[guardrail_call["guardrail_name"] + ".
|
54 |
-
call["
|
55 |
]
|
56 |
return pd.DataFrame(dataframe)
|
|
|
22 |
self.show_warning_in_app = False
|
23 |
self.call_list = []
|
24 |
|
25 |
+
def collect_guardrail_guard_calls_from_eval(self):
|
26 |
guard_calls, count = [], 0
|
27 |
+
for eval_predict_and_score_call in self.base_call.children():
|
28 |
+
if "Evaluation.summarize" in eval_predict_and_score_call._op_name:
|
29 |
break
|
30 |
+
guardrail_predict_call = eval_predict_and_score_call.children()[0]
|
31 |
+
guard_call = guardrail_predict_call.children()[0]
|
32 |
+
score_call = eval_predict_and_score_call.children()[1]
|
33 |
guard_calls.append(
|
34 |
{
|
35 |
+
"input_prompt": str(guard_call.inputs["prompt"]),
|
36 |
+
"outputs": dict(guard_call.output),
|
37 |
+
"score": dict(score_call.output),
|
38 |
}
|
39 |
)
|
40 |
count += 1
|
|
|
53 |
dataframe[guardrail_call["guardrail_name"] + ".safe"] = [
|
54 |
call["outputs"]["safe"] for call in guardrail_call["calls"]
|
55 |
]
|
56 |
+
dataframe[guardrail_call["guardrail_name"] + ".prediction_correctness"] = [
|
57 |
+
call["score"]["correct"] for call in guardrail_call["calls"]
|
58 |
]
|
59 |
return pd.DataFrame(dataframe)
|