geekyrakshit commited on
Commit
6780f80
·
1 Parent(s): c89e6e0

update: evaluation + classifier guardrail

Browse files
application_pages/evaluation_app.py CHANGED
@@ -4,6 +4,7 @@ import time
4
  from importlib import import_module
5
 
6
  import pandas as pd
 
7
  import streamlit as st
8
  import weave
9
  from dotenv import load_dotenv
@@ -181,11 +182,12 @@ if st.session_state.dataset_previewed:
181
  st.session_state.evaluation_call_manager.call_list.append(
182
  {
183
  "guardrail_name": guardrail_name,
184
- "calls": st.session_state.evaluation_call_manager.collect_guardrail_guard_calls_from_eval(
185
- call=call
186
- ),
187
  }
188
  )
 
 
 
189
  st.dataframe(
190
  st.session_state.evaluation_call_manager.render_calls_to_streamlit()
191
  )
 
4
  from importlib import import_module
5
 
6
  import pandas as pd
7
+ import rich
8
  import streamlit as st
9
  import weave
10
  from dotenv import load_dotenv
 
182
  st.session_state.evaluation_call_manager.call_list.append(
183
  {
184
  "guardrail_name": guardrail_name,
185
+ "calls": st.session_state.evaluation_call_manager.collect_guardrail_guard_calls_from_eval(),
 
 
186
  }
187
  )
188
+ rich.print(
189
+ st.session_state.evaluation_call_manager.call_list
190
+ )
191
  st.dataframe(
192
  st.session_state.evaluation_call_manager.render_calls_to_streamlit()
193
  )
guardrails_genie/guardrails/injection/protectai_guardrail.py CHANGED
@@ -37,11 +37,6 @@ class PromptInjectionClassifierGuardrail(Guardrail):
37
  def classify(self, prompt: str):
38
  return self._classifier(prompt)
39
 
40
- @weave.op()
41
- def predict(self, prompt: str):
42
- response = self.classify(prompt)
43
- return {"safe": response[0]["label"] != "INJECTION"}
44
-
45
  @weave.op()
46
  def guard(self, prompt: str):
47
  response = self.classify(prompt)
@@ -50,3 +45,7 @@ class PromptInjectionClassifierGuardrail(Guardrail):
50
  "safe": response[0]["label"] != "INJECTION",
51
  "summary": f"Prompt is deemed {response[0]['label']} with {confidence_percentage}% confidence.",
52
  }
 
 
 
 
 
37
  def classify(self, prompt: str):
38
  return self._classifier(prompt)
39
 
 
 
 
 
 
40
  @weave.op()
41
  def guard(self, prompt: str):
42
  response = self.classify(prompt)
 
45
  "safe": response[0]["label"] != "INJECTION",
46
  "summary": f"Prompt is deemed {response[0]['label']} with {confidence_percentage}% confidence.",
47
  }
48
+
49
+ @weave.op()
50
+ def predict(self, prompt: str):
51
+ return self.guard(prompt)
guardrails_genie/utils.py CHANGED
@@ -22,16 +22,19 @@ class EvaluationCallManager:
22
  self.show_warning_in_app = False
23
  self.call_list = []
24
 
25
- def collect_guardrail_guard_calls_from_eval(self, call):
26
  guard_calls, count = [], 0
27
- for eval_predict_call in call.children():
28
- if "Evaluation.summarize" in eval_predict_call._op_name:
29
  break
30
- required_call = eval_predict_call.children()[0].children()[0].children()[0]
 
 
31
  guard_calls.append(
32
  {
33
- "input_prompt": str(required_call.inputs["prompt"]),
34
- "outputs": dict(required_call.output),
 
35
  }
36
  )
37
  count += 1
@@ -50,7 +53,7 @@ class EvaluationCallManager:
50
  dataframe[guardrail_call["guardrail_name"] + ".safe"] = [
51
  call["outputs"]["safe"] for call in guardrail_call["calls"]
52
  ]
53
- dataframe[guardrail_call["guardrail_name"] + ".summary"] = [
54
- call["outputs"]["summary"] for call in guardrail_call["calls"]
55
  ]
56
  return pd.DataFrame(dataframe)
 
22
  self.show_warning_in_app = False
23
  self.call_list = []
24
 
25
+ def collect_guardrail_guard_calls_from_eval(self):
26
  guard_calls, count = [], 0
27
+ for eval_predict_and_score_call in self.base_call.children():
28
+ if "Evaluation.summarize" in eval_predict_and_score_call._op_name:
29
  break
30
+ guardrail_predict_call = eval_predict_and_score_call.children()[0]
31
+ guard_call = guardrail_predict_call.children()[0]
32
+ score_call = eval_predict_and_score_call.children()[1]
33
  guard_calls.append(
34
  {
35
+ "input_prompt": str(guard_call.inputs["prompt"]),
36
+ "outputs": dict(guard_call.output),
37
+ "score": dict(score_call.output),
38
  }
39
  )
40
  count += 1
 
53
  dataframe[guardrail_call["guardrail_name"] + ".safe"] = [
54
  call["outputs"]["safe"] for call in guardrail_call["calls"]
55
  ]
56
+ dataframe[guardrail_call["guardrail_name"] + ".prediction_correctness"] = [
57
+ call["score"]["correct"] for call in guardrail_call["calls"]
58
  ]
59
  return pd.DataFrame(dataframe)