Spaces:

pratikshahp
/

chatbot-with-guardrail

Sleeping

App Files Files Community

pratikshahp commited on Dec 17, 2024

Commit

00f554d

verified ·

1 Parent(s): 2a9a68c

Update guardrail.py

Browse files

Files changed (1) hide show

guardrail.py +24 -34

guardrail.py CHANGED Viewed

@@ -1,52 +1,43 @@
 from together import Together
-from dotenv import load_dotenv
-import os
-load_dotenv()
-api_key=os.getenv("API_KEY")
 # Initialize Together client
-client = Together(api_key=api_key)
-# Set the content policy for the under-16 chatbot guardrails
-under_16_content_policy = {
     "policy": """
-    U1: Age-Appropriate Content.
     Should not:
-    - Respond to any topics that are inappropriate for individuals under 16, including violence, sexual content, and substance use.
-    - Provide information or advice that is unsuitable for children.
-    Can:
-    - Provide responses that are age-appropriate, educational, and entertaining.
-    U2: Personal Information.
     Should not:
-    - Request or handle personal information, including full names, addresses, contact numbers, or sensitive personal details.
     Can:
-    - Interact generically without requiring any personal details.
-    U3: Language.
     Should not:
     - Use profane, offensive, or inappropriate language.
-    - Use language that could be perceived as harmful or inappropriate for minors.
     Can:
-    - Use clear, respectful, simple, and encouraging language suitable for children under 16.
     """
 }
 def is_safe(message: str) -> bool:
     """
-    Validates if the given message complies with the under-16 safety policy using the LlamaGuard model.
     Args:
         message (str): User message or chatbot response.
     Returns:
         bool: True if the message is safe, False otherwise.
     """
-    # Construct the safety validation prompt
-    prompt = f"""[INST] Task: Validate if the given 'user' or 'agent' message adheres to the safety policy below:
     <BEGIN SAFETY POLICY>
-    {under_16_content_policy['policy']}
     <END SAFETY POLICY>
     <BEGIN MESSAGE>
     {message}
@@ -56,16 +47,15 @@ def is_safe(message: str) -> bool:
     - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
     try:
-        # Use TogetherAI's LlamaGuard model to validate the content
         response = client.completions.create(
-            model="meta-llama/Meta-Llama-Guard-2-8B",
             prompt=prompt
         )
-        # Extract the response result
         result = response.choices[0].text.strip().lower()
-        return result == 'safe'
     except Exception as e:
-        print(f"Error during safety validation: {e}")
-        return False

 from together import Together
+from helper import get_together_api_key
 # Initialize Together client
+client = Together(api_key=get_together_api_key())
+# Set the safety content policy
+safe_content_policy = {
     "policy": """
+    O1: Topic Restriction.
     Should not:
+    - Respond to questions outside the domain of safe child-friendly content.
+    - Provide answers to sensitive or inappropriate topics such as violence, hate speech, profanity, or sexual content.
+    O2: Sensitive Information.
     Should not:
+    - Request or respond with any personal or sensitive user data.
     Can:
+    - Discuss general knowledge, child-friendly educational content, or safe topics.
+    O3: Language.
     Should not:
     - Use profane, offensive, or inappropriate language.
     Can:
+    - Use simple, clear, and friendly language suitable for children.
     """
 }
 def is_safe(message: str) -> bool:
     """
+    Validates if the given message complies with the safety policy using LlamaGuard.
     Args:
         message (str): User message or chatbot response.
     Returns:
         bool: True if the message is safe, False otherwise.
     """
+    # Prepare the LlamaGuard prompt
+    prompt = f"""[INST] Task: Validate if the given 'message' adheres to the safety policy below:
     <BEGIN SAFETY POLICY>
+    {safe_content_policy['policy']}
     <END SAFETY POLICY>
     <BEGIN MESSAGE>
     {message}
     - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
     try:
+        # Send the prompt to the LlamaGuard model
         response = client.completions.create(
+            model="meta-llama/Meta-Llama-Guard-3-8B",
             prompt=prompt
         )
+        # Parse the result
         result = response.choices[0].text.strip().lower()
+        return result.startswith('safe')  # Ensure 'safe' is at the beginning
     except Exception as e:
+        print(f"Error in guardrail check: {e}")
+        return False  # Default to unsafe if an error occurs