Sasidhar commited on
Commit
efdc7de
·
verified ·
1 Parent(s): a7b98db

Create gaurdrails_manager.py

Browse files
Files changed (1) hide show
  1. gaurdrails_manager.py +48 -0
gaurdrails_manager.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A simple result class to hold individual check outcomes.
2
+ class Result:
3
+ def __init__(self):
4
+ self.details = {}
5
+
6
+ def add(self, rule_name: str, passed: bool):
7
+ self.details[rule_name] = passed
8
+
9
+ def grounded(self) -> bool:
10
+ # The response is considered "grounded" if all enabled rules pass.
11
+ return all(self.details.values())
12
+
13
+ # Define guardrail rule classes.
14
+ class FactualConsistencyRule:
15
+ name = "FactualConsistency"
16
+
17
+ def check(self, response_text: str) -> bool:
18
+ # For demonstration: pass if the response contains the word "fact".
19
+ return "fact" in response_text.lower()
20
+
21
+ class ToxicityRule:
22
+ name = "Toxicity"
23
+
24
+ def check(self, response_text: str) -> bool:
25
+ # For demonstration: fail if negative words like "hate" or "kill" are found.
26
+ return not re.search(r"(hate|kill)", response_text, re.IGNORECASE)
27
+
28
+ # Manager class to load and execute the enabled guardrail rules.
29
+ class GuardrailsManager:
30
+ def __init__(self, config: GuardrailsConfig):
31
+ self.config = config
32
+ self.rules = self.load_rules()
33
+
34
+ def load_rules(self):
35
+ rules = []
36
+ if self.config.factual_consistency:
37
+ rules.append(FactualConsistencyRule())
38
+ if self.config.toxicity:
39
+ rules.append(ToxicityRule())
40
+ # Add additional rules based on configuration here.
41
+ return rules
42
+
43
+ def check(self, response_text: str) -> Result:
44
+ result = Result()
45
+ for rule in self.rules:
46
+ rule_result = rule.check(response_text)
47
+ result.add(rule.name, rule_result)
48
+ return result