Spaces:

wandb
/

guardrails-genie

Running

App Files Files Community

geekyrakshit commited on Dec 3, 2024

Commit

4c92daa

unverified ·

2 Parent(s): 01e55c4 2633ee9

Merge pull request #9 from soumik12345/feat/secrets-detection

Browse files

Files changed (14) hide show

application_pages/chat_app.py +7 -0
docs/.DS_Store +0 -0
docs/guardrails/secrets_detection.md +3 -0
guardrails_genie/guardrails/__init__.py +5 -2
guardrails_genie/guardrails/secrets_detection/__init__.py +17 -0
guardrails_genie/guardrails/secrets_detection/secrets_detection.py +198 -0
guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl +0 -0
guardrails_genie/regex_model.py +25 -18
mkdocs.yml +1 -0
pyproject.toml +7 -0
tests/__init__.py +0 -0
tests/guardrails_genie/__init__.py +0 -0
tests/guardrails_genie/guardrails/__init__.py +0 -0
tests/guardrails_genie/guardrails/test_secrets_detection.py +150 -0

application_pages/chat_app.py CHANGED Viewed

@@ -89,6 +89,13 @@ def initialize_guardrails():
                     guardrail_name,
                 )(should_anonymize=True)
             )
     st.session_state.guardrails_manager = GuardrailManager(
         guardrails=st.session_state.guardrails
     )

                     guardrail_name,
                 )(should_anonymize=True)
             )
+        else:
+            st.session_state.guardrails.append(
+                getattr(
+                    importlib.import_module("guardrails_genie.guardrails"),
+                    guardrail_name,
+                )()
+            )
     st.session_state.guardrails_manager = GuardrailManager(
         guardrails=st.session_state.guardrails
     )

docs/.DS_Store DELETED Viewed

Binary file (6.15 kB)

docs/guardrails/secrets_detection.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Secrets Detection
2	+
3	+ ::: guardrails_genie.guardrails.secrets_detection.secrets_detection

guardrails_genie/guardrails/__init__.py CHANGED Viewed

@@ -1,13 +1,15 @@
-from .entity_recognition import (
     PresidioEntityRecognitionGuardrail,
     RegexEntityRecognitionGuardrail,
     RestrictedTermsJudge,
     TransformersEntityRecognitionGuardrail,
 )
-from .injection import (
     PromptInjectionClassifierGuardrail,
     PromptInjectionSurveyGuardrail,
 )
 from .manager import GuardrailManager
 __all__ = [
@@ -18,4 +20,5 @@ __all__ = [
     "TransformersEntityRecognitionGuardrail",
     "RestrictedTermsJudge",
     "GuardrailManager",
 ]

+from guardrails_genie.guardrails.entity_recognition import (
     PresidioEntityRecognitionGuardrail,
     RegexEntityRecognitionGuardrail,
     RestrictedTermsJudge,
     TransformersEntityRecognitionGuardrail,
 )
+from guardrails_genie.guardrails.injection import (
     PromptInjectionClassifierGuardrail,
     PromptInjectionSurveyGuardrail,
 )
+from guardrails_genie.guardrails.secrets_detection import SecretsDetectionGuardrail
 from .manager import GuardrailManager
 __all__ = [
     "TransformersEntityRecognitionGuardrail",
     "RestrictedTermsJudge",
     "GuardrailManager",
+    "SecretsDetectionGuardrail",
 ]

guardrails_genie/guardrails/secrets_detection/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from guardrails_genie.guardrails.secrets_detection.secrets_detection import (
+    DEFAULT_SECRETS_PATTERNS,
+    REDACTION,
+    SecretsDetectionGuardrail,
+    SecretsDetectionResponse,
+    SecretsDetectionSimpleResponse,
+    redact,
+)
+__all__ = [
+    "DEFAULT_SECRETS_PATTERNS",
+    "SecretsDetectionGuardrail",
+    "SecretsDetectionSimpleResponse",
+    "SecretsDetectionResponse",
+    "REDACTION",
+    "redact",
+]

guardrails_genie/guardrails/secrets_detection/secrets_detection.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import hashlib
+import json
+import pathlib
+from enum import Enum
+from typing import Optional, Union
+import weave
+from pydantic import BaseModel
+from guardrails_genie.guardrails.base import Guardrail
+from guardrails_genie.regex_model import RegexModel
+def load_secrets_patterns() -> dict[str, list[str]]:
+    """
+    Load secret patterns from a JSONL file and return them as a dictionary.
+    Returns:
+        dict: A dictionary where keys are pattern names and values are lists of regex patterns.
+    """
+    default_patterns = {}
+    patterns = (
+        pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
+    ).read_text()
+    for pattern in patterns.splitlines():
+        pattern = json.loads(pattern)
+        default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
+    return default_patterns
+# Load default secret patterns from the JSONL file
+DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()
+class REDACTION(str, Enum):
+    """
+    Enum for different types of redaction methods.
+    """
+    REDACT_PARTIAL = "REDACT_PARTIAL"
+    REDACT_ALL = "REDACT_ALL"
+    REDACT_HASH = "REDACT_HASH"
+    REDACT_NONE = "REDACT_NONE"
+def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
+    """
+    Redact the given matches in the text based on the redaction type.
+    Args:
+        text (str): The input text to redact.
+        matches (list[str]): List of strings to be redacted.
+        redaction_type (REDACTION): The type of redaction to apply.
+    Returns:
+        str: The redacted text.
+    """
+    for match in matches:
+        if redaction_type == REDACTION.REDACT_PARTIAL:
+            replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
+        elif redaction_type == REDACTION.REDACT_ALL:
+            replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
+        elif redaction_type == REDACTION.REDACT_HASH:
+            replacement = (
+                "[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
+            )
+        else:
+            replacement = match
+        text = text.replace(match, replacement)
+    return text
+class SecretsDetectionSimpleResponse(BaseModel):
+    """
+    A simple response model for secrets detection.
+    Attributes:
+        contains_secrets (bool): Indicates if secrets were detected.
+        explanation (str): Explanation of the detection result.
+        redacted_text (Optional[str]): The redacted text if secrets were found.
+    """
+    contains_secrets: bool
+    explanation: str
+    redacted_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        """
+        Property to check if the text is safe (no secrets detected).
+        Returns:
+            bool: True if no secrets were detected, False otherwise.
+        """
+        return not self.contains_secrets
+class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
+    """
+    A detailed response model for secrets detection.
+    Attributes:
+        detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
+    """
+    detected_secrets: dict[str, list[str]]
+class SecretsDetectionGuardrail(Guardrail):
+    """
+    A guardrail for detecting secrets in text using regex patterns.
+    reference: SecretBench: A Dataset of Software Secrets
+    https://arxiv.org/abs/2303.06729
+    Attributes:
+        regex_model (RegexModel): The regex model used for detection.
+        patterns (Union[dict[str, str], dict[str, list[str]]]): The patterns used for detection.
+        redaction (REDACTION): The type of redaction to apply.
+    """
+    regex_model: RegexModel
+    patterns: Union[dict[str, str], dict[str, list[str]]] = {}
+    redaction: REDACTION
+    def __init__(
+        self,
+        use_defaults: bool = True,
+        redaction: REDACTION = REDACTION.REDACT_ALL,
+        **kwargs,
+    ):
+        """
+        Initialize the SecretsDetectionGuardrail.
+        Args:
+            use_defaults (bool): Whether to use default patterns.
+            redaction (REDACTION): The type of redaction to apply.
+            **kwargs: Additional keyword arguments.
+        """
+        patterns = {}
+        if use_defaults:
+            patterns = DEFAULT_SECRETS_PATTERNS.copy()
+        if kwargs.get("patterns"):
+            patterns.update(kwargs["patterns"])
+        regex_model = RegexModel(patterns=patterns)
+        super().__init__(
+            regex_model=regex_model,
+            patterns=patterns,
+            redaction=redaction,
+        )
+    @weave.op()
+    def guard(
+        self,
+        prompt: str,
+        return_detected_secrets: bool = True,
+        **kwargs,
+    ) -> SecretsDetectionResponse | SecretsDetectionResponse:
+        """
+        Check if the input prompt contains any secrets based on the regex patterns.
+        Args:
+            prompt (str): Input text to check for secrets.
+            return_detected_secrets (bool): If True, returns detailed secrets type information.
+        Returns:
+            SecretsDetectionResponse or SecretsDetectionResponse: Detection results.
+        """
+        result = self.regex_model.check(prompt)
+        explanation_parts = []
+        if result.matched_patterns:
+            explanation_parts.append("Found the following secrets in the text:")
+            for secret_type, matches in result.matched_patterns.items():
+                explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
+        else:
+            explanation_parts.append("No secrets detected in the text.")
+        redacted_text = prompt
+        if result.matched_patterns:
+            for secret_type, matches in result.matched_patterns.items():
+                redacted_text = redact(redacted_text, matches, self.redaction)
+        if return_detected_secrets:
+            return SecretsDetectionResponse(
+                contains_secrets=not result.passed,
+                detected_secrets=result.matched_patterns,
+                explanation="\n".join(explanation_parts),
+                redacted_text=redacted_text,
+            )
+        else:
+            return SecretsDetectionSimpleResponse(
+                contains_secrets=not result.passed,
+                explanation="\n".join(explanation_parts),
+                redacted_text=redacted_text,
+            )

guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

guardrails_genie/regex_model.py CHANGED Viewed

@@ -1,14 +1,14 @@
-import re
-from typing import Dict, List
 import weave
 from pydantic import BaseModel
 class RegexResult(BaseModel):
     passed: bool
-    matched_patterns: Dict[str, List[str]]
-    failed_patterns: List[str]
 class RegexModel(weave.Model):
@@ -19,16 +19,22 @@ class RegexModel(weave.Model):
         patterns (Dict[str, str]): Dictionary where key is pattern name and value is regex pattern.
     """
-    patterns: Dict[str, str]
-    def __init__(self, patterns: Dict[str, str]) -> None:
         super().__init__(patterns=patterns)
         self._compiled_patterns = {
-            name: re.compile(pattern) for name, pattern in patterns.items()
         }
     @weave.op()
-    def check(self, prompt: str) -> RegexResult:
         """
         Check text against all patterns and return detailed results.
@@ -41,17 +47,18 @@ class RegexModel(weave.Model):
         matched_patterns = {}
         failed_patterns = []
-        for pattern_name, pattern in self.patterns.items():
             matches = []
-            for match in re.finditer(pattern, prompt):
-                if match.groups():
-                    # If there are capture groups, join them with a separator
-                    matches.append(
-                        "-".join(str(g) for g in match.groups() if g is not None)
-                    )
-                else:
-                    # If no capture groups, use the full match
-                    matches.append(match.group(0))
             if matches:
                 matched_patterns[pattern_name] = matches

+from typing import Optional, Union
+import regex as re
 import weave
 from pydantic import BaseModel
 class RegexResult(BaseModel):
     passed: bool
+    matched_patterns: dict[str, list[str]]
+    failed_patterns: list[str]
 class RegexModel(weave.Model):
         patterns (Dict[str, str]): Dictionary where key is pattern name and value is regex pattern.
     """
+    patterns: Optional[Union[dict[str, str], dict[str, list[str]]]] = None
+    def __init__(
+        self, patterns: Optional[Union[dict[str, str], dict[str, list[str]]]] = None
+    ) -> None:
         super().__init__(patterns=patterns)
+        normalized_patterns = {}
+        for k, v in patterns.items():
+            normalized_patterns[k] = v if isinstance(v, list) else [v]
         self._compiled_patterns = {
+            name: [re.compile(p) for p in pattern]
+            for name, pattern in normalized_patterns.items()
         }
     @weave.op()
+    def check(self, text: str) -> RegexResult:
         """
         Check text against all patterns and return detailed results.
         matched_patterns = {}
         failed_patterns = []
+        for pattern_name, pats in self._compiled_patterns.items():
             matches = []
+            for pattern in pats:
+                for match in pattern.finditer(text):
+                    if match.groups():
+                        # If there are capture groups, join them with a separator
+                        matches.append(
+                            "-".join(str(g) for g in match.groups() if g is not None)
+                        )
+                    else:
+                        # If no capture groups, use the full match
+                        matches.append(match.group(0))
             if matches:
                 matched_patterns[pattern_name] = matches

mkdocs.yml CHANGED Viewed

@@ -73,6 +73,7 @@ nav:
     - Prompt Injection Guardrails:
       - Classifier Guardrail: 'guardrails/prompt_injection/classifier.md'
       - Survey Guardrail: 'guardrails/prompt_injection/llm_survey.md'
   - LLM: 'llm.md'
   - Metrics: 'metrics.md'
   - RegexModel: 'regex_model.md'

     - Prompt Injection Guardrails:
       - Classifier Guardrail: 'guardrails/prompt_injection/classifier.md'
       - Survey Guardrail: 'guardrails/prompt_injection/llm_survey.md'
+    - Secrets Detection Guardrail: "guardrails/secrets_detection.md"
   - LLM: 'llm.md'
   - Metrics: 'metrics.md'
   - RegexModel: 'regex_model.md'

pyproject.toml CHANGED Viewed

@@ -38,6 +38,13 @@ docs = [
     "mkdocs-jupyter>=0.25.0",
     "jupyter>=1.1.1",
 ]
 [tool.setuptools]
 py-modules = ["guardrails_genie"]

     "mkdocs-jupyter>=0.25.0",
     "jupyter>=1.1.1",
 ]
+test = [
+    "pytest>=8.2.0",
+    "pytest-asyncio>=0.23.6",
+    "pytest-cov>=5.0.0",
+    "pytest-xdist>=3.1.0",
+    "hypothesis>=6.122.1"
+]
 [tool.setuptools]
 py-modules = ["guardrails_genie"]

tests/__init__.py ADDED Viewed

File without changes

tests/guardrails_genie/__init__.py ADDED Viewed

File without changes

tests/guardrails_genie/guardrails/__init__.py ADDED Viewed

File without changes

tests/guardrails_genie/guardrails/test_secrets_detection.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import hashlib
+import re
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from guardrails_genie.guardrails.secrets_detection import (
+    DEFAULT_SECRETS_PATTERNS,
+    REDACTION,
+    SecretsDetectionGuardrail,
+    SecretsDetectionResponse,
+    SecretsDetectionSimpleResponse,
+    redact,
+)
+@pytest.fixture
+def mock_secrets_guard(monkeypatch):
+    def _mock_guard(*args, **kwargs):
+        prompt = kwargs.get("prompt")
+        return_detected_types = kwargs.get("return_detected_types")
+        if "safe text" in prompt:
+            if return_detected_types:
+                return SecretsDetectionResponse(
+                    contains_secrets=False,
+                    explanation="No secrets detected in the text.",
+                    detected_secrets={},
+                    redacted_text=prompt,
+                )
+            else:
+                return SecretsDetectionSimpleResponse(
+                    contains_secrets=False,
+                    explanation="No secrets detected in the text.",
+                    redacted_text=prompt,
+                )
+        else:
+            if return_detected_types:
+                return SecretsDetectionResponse(
+                    contains_secrets=True,
+                    explanation="The output contains secrets.",
+                    detected_secrets={"secrets": ["API_KEY"]},
+                    redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
+                )
+            else:
+                return SecretsDetectionSimpleResponse(
+                    contains_secrets=True,
+                    explanation="The output contains secrets.",
+                    redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
+                )
+    monkeypatch.setattr(
+        "guardrails_genie.guardrails.secrets_detection.SecretsDetectionGuardrail.guard",
+        _mock_guard,
+    )
+def test_redact_partial():
+    text = "My secret key is ABCDEFGHIJKL"
+    matches = ["ABCDEFGHIJKL"]
+    redacted_text = redact(text, matches, REDACTION.REDACT_PARTIAL)
+    assert redacted_text == "My secret key is [REDACTED:]AB..KL[:REDACTED]"
+def test_redact_all():
+    text = "My secret key is ABCDEFGHIJKL"
+    matches = ["ABCDEFGHIJKL"]
+    redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
+    assert redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
+def test_redact_hash():
+    text = "My secret key is ABCDEFGHIJKL"
+    matches = ["ABCDEFGHIJKL"]
+    hashed_value = hashlib.md5("ABCDEFGHIJKL".encode()).hexdigest()
+    redacted_text = redact(text, matches, REDACTION.REDACT_HASH)
+    assert redacted_text == f"My secret key is [REDACTED:]{hashed_value}[:REDACTED]"
+def test_redact_no_match():
+    text = "My secret key is ABCDEFGHIJKL"
+    matches = ["XYZ"]
+    redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
+    assert redacted_text == text
+def test_secrets_detection_guardrail_detect_types(mock_secrets_guard):
+    from guardrails_genie.guardrails.secrets_detection import (
+        REDACTION,
+        SecretsDetectionGuardrail,
+    )
+    guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
+    prompt = "My secret key is ABCDEFGHIJKL"
+    result = guardrail.guard(prompt=prompt, return_detected_secrets=True)
+    assert result.contains_secrets is True
+    assert result.explanation == "The output contains secrets."
+    assert result.detected_secrets == {"secrets": ["API_KEY"]}
+    assert result.redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
+def test_secrets_detection_guardrail_simple_response(mock_secrets_guard):
+    from guardrails_genie.guardrails.secrets_detection import (
+        REDACTION,
+        SecretsDetectionGuardrail,
+    )
+    guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
+    prompt = "My secret key is ABCDEFGHIJKL"
+    result = guardrail.guard(prompt=prompt, return_detected_secrets=False)
+    assert result.contains_secrets is True
+    assert result.explanation == "The output contains secrets."
+    assert result.redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
+def test_secrets_detection_guardrail_no_secrets(mock_secrets_guard):
+    from guardrails_genie.guardrails.secrets_detection import (
+        REDACTION,
+        SecretsDetectionGuardrail,
+    )
+    guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
+    prompt = "This is a safe text with no secrets."
+    result = guardrail.guard(prompt=prompt, return_detected_secrets=True)
+    assert result.contains_secrets is False
+    assert result.explanation == "No secrets detected in the text."
+    assert result.detected_secrets == {}
+    assert result.redacted_text == prompt
+# Create a strategy to generate strings that match the patterns
+def pattern_strategy(pattern):
+    return st.from_regex(re.compile(pattern), fullmatch=True)
+@settings(deadline=1000)  # Set the deadline to 1000 milliseconds (1 second)
+@given(pattern_strategy(DEFAULT_SECRETS_PATTERNS["JwtToken"][0]))
+def test_specific_pattern_guardrail(text):
+    guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
+    result = guardrail.guard(prompt=text, return_detected_secrets=True)
+    assert result.contains_secrets is True
+    assert "JwtToken" in result.detected_secrets