Spaces:

Sasidhar
/

llmgaurdrails

Sleeping

App Files Files Community

Sasidhar commited on Mar 1

Commit

efdc7de

verified ·

1 Parent(s): a7b98db

Create gaurdrails_manager.py

Browse files

Files changed (1) hide show

gaurdrails_manager.py +48 -0

gaurdrails_manager.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# A simple result class to hold individual check outcomes.
+class Result:
+    def __init__(self):
+        self.details = {}
+    def add(self, rule_name: str, passed: bool):
+        self.details[rule_name] = passed
+    def grounded(self) -> bool:
+        # The response is considered "grounded" if all enabled rules pass.
+        return all(self.details.values())
+# Define guardrail rule classes.
+class FactualConsistencyRule:
+    name = "FactualConsistency"
+    def check(self, response_text: str) -> bool:
+        # For demonstration: pass if the response contains the word "fact".
+        return "fact" in response_text.lower()
+class ToxicityRule:
+    name = "Toxicity"
+    def check(self, response_text: str) -> bool:
+        # For demonstration: fail if negative words like "hate" or "kill" are found.
+        return not re.search(r"(hate|kill)", response_text, re.IGNORECASE)
+# Manager class to load and execute the enabled guardrail rules.
+class GuardrailsManager:
+    def __init__(self, config: GuardrailsConfig):
+        self.config = config
+        self.rules = self.load_rules()
+    def load_rules(self):
+        rules = []
+        if self.config.factual_consistency:
+            rules.append(FactualConsistencyRule())
+        if self.config.toxicity:
+            rules.append(ToxicityRule())
+        # Add additional rules based on configuration here.
+        return rules
+    def check(self, response_text: str) -> Result:
+        result = Result()
+        for rule in self.rules:
+            rule_result = rule.check(response_text)
+            result.add(rule.name, rule_result)
+        return result