Spaces:
Running
Running
Merge pull request #9 from soumik12345/feat/secrets-detection
Browse files- application_pages/chat_app.py +7 -0
- docs/.DS_Store +0 -0
- docs/guardrails/secrets_detection.md +3 -0
- guardrails_genie/guardrails/__init__.py +5 -2
- guardrails_genie/guardrails/secrets_detection/__init__.py +17 -0
- guardrails_genie/guardrails/secrets_detection/secrets_detection.py +198 -0
- guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl +0 -0
- guardrails_genie/regex_model.py +25 -18
- mkdocs.yml +1 -0
- pyproject.toml +7 -0
- tests/__init__.py +0 -0
- tests/guardrails_genie/__init__.py +0 -0
- tests/guardrails_genie/guardrails/__init__.py +0 -0
- tests/guardrails_genie/guardrails/test_secrets_detection.py +150 -0
application_pages/chat_app.py
CHANGED
@@ -89,6 +89,13 @@ def initialize_guardrails():
|
|
89 |
guardrail_name,
|
90 |
)(should_anonymize=True)
|
91 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
st.session_state.guardrails_manager = GuardrailManager(
|
93 |
guardrails=st.session_state.guardrails
|
94 |
)
|
|
|
89 |
guardrail_name,
|
90 |
)(should_anonymize=True)
|
91 |
)
|
92 |
+
else:
|
93 |
+
st.session_state.guardrails.append(
|
94 |
+
getattr(
|
95 |
+
importlib.import_module("guardrails_genie.guardrails"),
|
96 |
+
guardrail_name,
|
97 |
+
)()
|
98 |
+
)
|
99 |
st.session_state.guardrails_manager = GuardrailManager(
|
100 |
guardrails=st.session_state.guardrails
|
101 |
)
|
docs/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
docs/guardrails/secrets_detection.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Secrets Detection
|
2 |
+
|
3 |
+
::: guardrails_genie.guardrails.secrets_detection.secrets_detection
|
guardrails_genie/guardrails/__init__.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
-
from .entity_recognition import (
|
2 |
PresidioEntityRecognitionGuardrail,
|
3 |
RegexEntityRecognitionGuardrail,
|
4 |
RestrictedTermsJudge,
|
5 |
TransformersEntityRecognitionGuardrail,
|
6 |
)
|
7 |
-
from .injection import (
|
8 |
PromptInjectionClassifierGuardrail,
|
9 |
PromptInjectionSurveyGuardrail,
|
10 |
)
|
|
|
|
|
11 |
from .manager import GuardrailManager
|
12 |
|
13 |
__all__ = [
|
@@ -18,4 +20,5 @@ __all__ = [
|
|
18 |
"TransformersEntityRecognitionGuardrail",
|
19 |
"RestrictedTermsJudge",
|
20 |
"GuardrailManager",
|
|
|
21 |
]
|
|
|
1 |
+
from guardrails_genie.guardrails.entity_recognition import (
|
2 |
PresidioEntityRecognitionGuardrail,
|
3 |
RegexEntityRecognitionGuardrail,
|
4 |
RestrictedTermsJudge,
|
5 |
TransformersEntityRecognitionGuardrail,
|
6 |
)
|
7 |
+
from guardrails_genie.guardrails.injection import (
|
8 |
PromptInjectionClassifierGuardrail,
|
9 |
PromptInjectionSurveyGuardrail,
|
10 |
)
|
11 |
+
from guardrails_genie.guardrails.secrets_detection import SecretsDetectionGuardrail
|
12 |
+
|
13 |
from .manager import GuardrailManager
|
14 |
|
15 |
__all__ = [
|
|
|
20 |
"TransformersEntityRecognitionGuardrail",
|
21 |
"RestrictedTermsJudge",
|
22 |
"GuardrailManager",
|
23 |
+
"SecretsDetectionGuardrail",
|
24 |
]
|
guardrails_genie/guardrails/secrets_detection/__init__.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from guardrails_genie.guardrails.secrets_detection.secrets_detection import (
|
2 |
+
DEFAULT_SECRETS_PATTERNS,
|
3 |
+
REDACTION,
|
4 |
+
SecretsDetectionGuardrail,
|
5 |
+
SecretsDetectionResponse,
|
6 |
+
SecretsDetectionSimpleResponse,
|
7 |
+
redact,
|
8 |
+
)
|
9 |
+
|
10 |
+
__all__ = [
|
11 |
+
"DEFAULT_SECRETS_PATTERNS",
|
12 |
+
"SecretsDetectionGuardrail",
|
13 |
+
"SecretsDetectionSimpleResponse",
|
14 |
+
"SecretsDetectionResponse",
|
15 |
+
"REDACTION",
|
16 |
+
"redact",
|
17 |
+
]
|
guardrails_genie/guardrails/secrets_detection/secrets_detection.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import json
|
3 |
+
import pathlib
|
4 |
+
from enum import Enum
|
5 |
+
from typing import Optional, Union
|
6 |
+
|
7 |
+
import weave
|
8 |
+
from pydantic import BaseModel
|
9 |
+
|
10 |
+
from guardrails_genie.guardrails.base import Guardrail
|
11 |
+
from guardrails_genie.regex_model import RegexModel
|
12 |
+
|
13 |
+
|
14 |
+
def load_secrets_patterns() -> dict[str, list[str]]:
|
15 |
+
"""
|
16 |
+
Load secret patterns from a JSONL file and return them as a dictionary.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
dict: A dictionary where keys are pattern names and values are lists of regex patterns.
|
20 |
+
"""
|
21 |
+
default_patterns = {}
|
22 |
+
patterns = (
|
23 |
+
pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
|
24 |
+
).read_text()
|
25 |
+
|
26 |
+
for pattern in patterns.splitlines():
|
27 |
+
pattern = json.loads(pattern)
|
28 |
+
default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
|
29 |
+
return default_patterns
|
30 |
+
|
31 |
+
|
32 |
+
# Load default secret patterns from the JSONL file
|
33 |
+
DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()
|
34 |
+
|
35 |
+
|
36 |
+
class REDACTION(str, Enum):
|
37 |
+
"""
|
38 |
+
Enum for different types of redaction methods.
|
39 |
+
"""
|
40 |
+
|
41 |
+
REDACT_PARTIAL = "REDACT_PARTIAL"
|
42 |
+
REDACT_ALL = "REDACT_ALL"
|
43 |
+
REDACT_HASH = "REDACT_HASH"
|
44 |
+
REDACT_NONE = "REDACT_NONE"
|
45 |
+
|
46 |
+
|
47 |
+
def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
|
48 |
+
"""
|
49 |
+
Redact the given matches in the text based on the redaction type.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
text (str): The input text to redact.
|
53 |
+
matches (list[str]): List of strings to be redacted.
|
54 |
+
redaction_type (REDACTION): The type of redaction to apply.
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
str: The redacted text.
|
58 |
+
"""
|
59 |
+
for match in matches:
|
60 |
+
if redaction_type == REDACTION.REDACT_PARTIAL:
|
61 |
+
replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
|
62 |
+
elif redaction_type == REDACTION.REDACT_ALL:
|
63 |
+
replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
|
64 |
+
elif redaction_type == REDACTION.REDACT_HASH:
|
65 |
+
replacement = (
|
66 |
+
"[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
|
67 |
+
)
|
68 |
+
else:
|
69 |
+
replacement = match
|
70 |
+
text = text.replace(match, replacement)
|
71 |
+
return text
|
72 |
+
|
73 |
+
|
74 |
+
class SecretsDetectionSimpleResponse(BaseModel):
|
75 |
+
"""
|
76 |
+
A simple response model for secrets detection.
|
77 |
+
|
78 |
+
Attributes:
|
79 |
+
contains_secrets (bool): Indicates if secrets were detected.
|
80 |
+
explanation (str): Explanation of the detection result.
|
81 |
+
redacted_text (Optional[str]): The redacted text if secrets were found.
|
82 |
+
"""
|
83 |
+
|
84 |
+
contains_secrets: bool
|
85 |
+
explanation: str
|
86 |
+
redacted_text: Optional[str] = None
|
87 |
+
|
88 |
+
@property
|
89 |
+
def safe(self) -> bool:
|
90 |
+
"""
|
91 |
+
Property to check if the text is safe (no secrets detected).
|
92 |
+
|
93 |
+
Returns:
|
94 |
+
bool: True if no secrets were detected, False otherwise.
|
95 |
+
"""
|
96 |
+
return not self.contains_secrets
|
97 |
+
|
98 |
+
|
99 |
+
class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
|
100 |
+
"""
|
101 |
+
A detailed response model for secrets detection.
|
102 |
+
|
103 |
+
Attributes:
|
104 |
+
detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
|
105 |
+
"""
|
106 |
+
|
107 |
+
detected_secrets: dict[str, list[str]]
|
108 |
+
|
109 |
+
|
110 |
+
class SecretsDetectionGuardrail(Guardrail):
|
111 |
+
"""
|
112 |
+
A guardrail for detecting secrets in text using regex patterns.
|
113 |
+
reference: SecretBench: A Dataset of Software Secrets
|
114 |
+
https://arxiv.org/abs/2303.06729
|
115 |
+
|
116 |
+
Attributes:
|
117 |
+
regex_model (RegexModel): The regex model used for detection.
|
118 |
+
patterns (Union[dict[str, str], dict[str, list[str]]]): The patterns used for detection.
|
119 |
+
redaction (REDACTION): The type of redaction to apply.
|
120 |
+
"""
|
121 |
+
|
122 |
+
regex_model: RegexModel
|
123 |
+
patterns: Union[dict[str, str], dict[str, list[str]]] = {}
|
124 |
+
redaction: REDACTION
|
125 |
+
|
126 |
+
def __init__(
|
127 |
+
self,
|
128 |
+
use_defaults: bool = True,
|
129 |
+
redaction: REDACTION = REDACTION.REDACT_ALL,
|
130 |
+
**kwargs,
|
131 |
+
):
|
132 |
+
"""
|
133 |
+
Initialize the SecretsDetectionGuardrail.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
use_defaults (bool): Whether to use default patterns.
|
137 |
+
redaction (REDACTION): The type of redaction to apply.
|
138 |
+
**kwargs: Additional keyword arguments.
|
139 |
+
"""
|
140 |
+
patterns = {}
|
141 |
+
if use_defaults:
|
142 |
+
patterns = DEFAULT_SECRETS_PATTERNS.copy()
|
143 |
+
if kwargs.get("patterns"):
|
144 |
+
patterns.update(kwargs["patterns"])
|
145 |
+
|
146 |
+
regex_model = RegexModel(patterns=patterns)
|
147 |
+
|
148 |
+
super().__init__(
|
149 |
+
regex_model=regex_model,
|
150 |
+
patterns=patterns,
|
151 |
+
redaction=redaction,
|
152 |
+
)
|
153 |
+
|
154 |
+
@weave.op()
|
155 |
+
def guard(
|
156 |
+
self,
|
157 |
+
prompt: str,
|
158 |
+
return_detected_secrets: bool = True,
|
159 |
+
**kwargs,
|
160 |
+
) -> SecretsDetectionResponse | SecretsDetectionResponse:
|
161 |
+
"""
|
162 |
+
Check if the input prompt contains any secrets based on the regex patterns.
|
163 |
+
|
164 |
+
Args:
|
165 |
+
prompt (str): Input text to check for secrets.
|
166 |
+
return_detected_secrets (bool): If True, returns detailed secrets type information.
|
167 |
+
|
168 |
+
Returns:
|
169 |
+
SecretsDetectionResponse or SecretsDetectionResponse: Detection results.
|
170 |
+
"""
|
171 |
+
result = self.regex_model.check(prompt)
|
172 |
+
|
173 |
+
explanation_parts = []
|
174 |
+
if result.matched_patterns:
|
175 |
+
explanation_parts.append("Found the following secrets in the text:")
|
176 |
+
for secret_type, matches in result.matched_patterns.items():
|
177 |
+
explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
|
178 |
+
else:
|
179 |
+
explanation_parts.append("No secrets detected in the text.")
|
180 |
+
|
181 |
+
redacted_text = prompt
|
182 |
+
if result.matched_patterns:
|
183 |
+
for secret_type, matches in result.matched_patterns.items():
|
184 |
+
redacted_text = redact(redacted_text, matches, self.redaction)
|
185 |
+
|
186 |
+
if return_detected_secrets:
|
187 |
+
return SecretsDetectionResponse(
|
188 |
+
contains_secrets=not result.passed,
|
189 |
+
detected_secrets=result.matched_patterns,
|
190 |
+
explanation="\n".join(explanation_parts),
|
191 |
+
redacted_text=redacted_text,
|
192 |
+
)
|
193 |
+
else:
|
194 |
+
return SecretsDetectionSimpleResponse(
|
195 |
+
contains_secrets=not result.passed,
|
196 |
+
explanation="\n".join(explanation_parts),
|
197 |
+
redacted_text=redacted_text,
|
198 |
+
)
|
guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
guardrails_genie/regex_model.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
-
import
|
2 |
-
from typing import Dict, List
|
3 |
|
|
|
4 |
import weave
|
5 |
from pydantic import BaseModel
|
6 |
|
7 |
|
8 |
class RegexResult(BaseModel):
|
9 |
passed: bool
|
10 |
-
matched_patterns:
|
11 |
-
failed_patterns:
|
12 |
|
13 |
|
14 |
class RegexModel(weave.Model):
|
@@ -19,16 +19,22 @@ class RegexModel(weave.Model):
|
|
19 |
patterns (Dict[str, str]): Dictionary where key is pattern name and value is regex pattern.
|
20 |
"""
|
21 |
|
22 |
-
patterns:
|
23 |
|
24 |
-
def __init__(
|
|
|
|
|
25 |
super().__init__(patterns=patterns)
|
|
|
|
|
|
|
26 |
self._compiled_patterns = {
|
27 |
-
name: re.compile(
|
|
|
28 |
}
|
29 |
|
30 |
@weave.op()
|
31 |
-
def check(self,
|
32 |
"""
|
33 |
Check text against all patterns and return detailed results.
|
34 |
|
@@ -41,17 +47,18 @@ class RegexModel(weave.Model):
|
|
41 |
matched_patterns = {}
|
42 |
failed_patterns = []
|
43 |
|
44 |
-
for pattern_name,
|
45 |
matches = []
|
46 |
-
for
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
if matches:
|
57 |
matched_patterns[pattern_name] = matches
|
|
|
1 |
+
from typing import Optional, Union
|
|
|
2 |
|
3 |
+
import regex as re
|
4 |
import weave
|
5 |
from pydantic import BaseModel
|
6 |
|
7 |
|
8 |
class RegexResult(BaseModel):
|
9 |
passed: bool
|
10 |
+
matched_patterns: dict[str, list[str]]
|
11 |
+
failed_patterns: list[str]
|
12 |
|
13 |
|
14 |
class RegexModel(weave.Model):
|
|
|
19 |
patterns (Dict[str, str]): Dictionary where key is pattern name and value is regex pattern.
|
20 |
"""
|
21 |
|
22 |
+
patterns: Optional[Union[dict[str, str], dict[str, list[str]]]] = None
|
23 |
|
24 |
+
def __init__(
|
25 |
+
self, patterns: Optional[Union[dict[str, str], dict[str, list[str]]]] = None
|
26 |
+
) -> None:
|
27 |
super().__init__(patterns=patterns)
|
28 |
+
normalized_patterns = {}
|
29 |
+
for k, v in patterns.items():
|
30 |
+
normalized_patterns[k] = v if isinstance(v, list) else [v]
|
31 |
self._compiled_patterns = {
|
32 |
+
name: [re.compile(p) for p in pattern]
|
33 |
+
for name, pattern in normalized_patterns.items()
|
34 |
}
|
35 |
|
36 |
@weave.op()
|
37 |
+
def check(self, text: str) -> RegexResult:
|
38 |
"""
|
39 |
Check text against all patterns and return detailed results.
|
40 |
|
|
|
47 |
matched_patterns = {}
|
48 |
failed_patterns = []
|
49 |
|
50 |
+
for pattern_name, pats in self._compiled_patterns.items():
|
51 |
matches = []
|
52 |
+
for pattern in pats:
|
53 |
+
for match in pattern.finditer(text):
|
54 |
+
if match.groups():
|
55 |
+
# If there are capture groups, join them with a separator
|
56 |
+
matches.append(
|
57 |
+
"-".join(str(g) for g in match.groups() if g is not None)
|
58 |
+
)
|
59 |
+
else:
|
60 |
+
# If no capture groups, use the full match
|
61 |
+
matches.append(match.group(0))
|
62 |
|
63 |
if matches:
|
64 |
matched_patterns[pattern_name] = matches
|
mkdocs.yml
CHANGED
@@ -73,6 +73,7 @@ nav:
|
|
73 |
- Prompt Injection Guardrails:
|
74 |
- Classifier Guardrail: 'guardrails/prompt_injection/classifier.md'
|
75 |
- Survey Guardrail: 'guardrails/prompt_injection/llm_survey.md'
|
|
|
76 |
- LLM: 'llm.md'
|
77 |
- Metrics: 'metrics.md'
|
78 |
- RegexModel: 'regex_model.md'
|
|
|
73 |
- Prompt Injection Guardrails:
|
74 |
- Classifier Guardrail: 'guardrails/prompt_injection/classifier.md'
|
75 |
- Survey Guardrail: 'guardrails/prompt_injection/llm_survey.md'
|
76 |
+
- Secrets Detection Guardrail: "guardrails/secrets_detection.md"
|
77 |
- LLM: 'llm.md'
|
78 |
- Metrics: 'metrics.md'
|
79 |
- RegexModel: 'regex_model.md'
|
pyproject.toml
CHANGED
@@ -38,6 +38,13 @@ docs = [
|
|
38 |
"mkdocs-jupyter>=0.25.0",
|
39 |
"jupyter>=1.1.1",
|
40 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
[tool.setuptools]
|
43 |
py-modules = ["guardrails_genie"]
|
|
|
38 |
"mkdocs-jupyter>=0.25.0",
|
39 |
"jupyter>=1.1.1",
|
40 |
]
|
41 |
+
test = [
|
42 |
+
"pytest>=8.2.0",
|
43 |
+
"pytest-asyncio>=0.23.6",
|
44 |
+
"pytest-cov>=5.0.0",
|
45 |
+
"pytest-xdist>=3.1.0",
|
46 |
+
"hypothesis>=6.122.1"
|
47 |
+
]
|
48 |
|
49 |
[tool.setuptools]
|
50 |
py-modules = ["guardrails_genie"]
|
tests/__init__.py
ADDED
File without changes
|
tests/guardrails_genie/__init__.py
ADDED
File without changes
|
tests/guardrails_genie/guardrails/__init__.py
ADDED
File without changes
|
tests/guardrails_genie/guardrails/test_secrets_detection.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import re
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
from hypothesis import given, settings
|
6 |
+
from hypothesis import strategies as st
|
7 |
+
|
8 |
+
from guardrails_genie.guardrails.secrets_detection import (
|
9 |
+
DEFAULT_SECRETS_PATTERNS,
|
10 |
+
REDACTION,
|
11 |
+
SecretsDetectionGuardrail,
|
12 |
+
SecretsDetectionResponse,
|
13 |
+
SecretsDetectionSimpleResponse,
|
14 |
+
redact,
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
@pytest.fixture
|
19 |
+
def mock_secrets_guard(monkeypatch):
|
20 |
+
def _mock_guard(*args, **kwargs):
|
21 |
+
prompt = kwargs.get("prompt")
|
22 |
+
return_detected_types = kwargs.get("return_detected_types")
|
23 |
+
|
24 |
+
if "safe text" in prompt:
|
25 |
+
if return_detected_types:
|
26 |
+
return SecretsDetectionResponse(
|
27 |
+
contains_secrets=False,
|
28 |
+
explanation="No secrets detected in the text.",
|
29 |
+
detected_secrets={},
|
30 |
+
redacted_text=prompt,
|
31 |
+
)
|
32 |
+
else:
|
33 |
+
return SecretsDetectionSimpleResponse(
|
34 |
+
contains_secrets=False,
|
35 |
+
explanation="No secrets detected in the text.",
|
36 |
+
redacted_text=prompt,
|
37 |
+
)
|
38 |
+
else:
|
39 |
+
if return_detected_types:
|
40 |
+
return SecretsDetectionResponse(
|
41 |
+
contains_secrets=True,
|
42 |
+
explanation="The output contains secrets.",
|
43 |
+
detected_secrets={"secrets": ["API_KEY"]},
|
44 |
+
redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
|
45 |
+
)
|
46 |
+
else:
|
47 |
+
return SecretsDetectionSimpleResponse(
|
48 |
+
contains_secrets=True,
|
49 |
+
explanation="The output contains secrets.",
|
50 |
+
redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
|
51 |
+
)
|
52 |
+
|
53 |
+
monkeypatch.setattr(
|
54 |
+
"guardrails_genie.guardrails.secrets_detection.SecretsDetectionGuardrail.guard",
|
55 |
+
_mock_guard,
|
56 |
+
)
|
57 |
+
|
58 |
+
|
59 |
+
def test_redact_partial():
|
60 |
+
text = "My secret key is ABCDEFGHIJKL"
|
61 |
+
matches = ["ABCDEFGHIJKL"]
|
62 |
+
redacted_text = redact(text, matches, REDACTION.REDACT_PARTIAL)
|
63 |
+
assert redacted_text == "My secret key is [REDACTED:]AB..KL[:REDACTED]"
|
64 |
+
|
65 |
+
|
66 |
+
def test_redact_all():
|
67 |
+
text = "My secret key is ABCDEFGHIJKL"
|
68 |
+
matches = ["ABCDEFGHIJKL"]
|
69 |
+
redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
|
70 |
+
assert redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
|
71 |
+
|
72 |
+
|
73 |
+
def test_redact_hash():
|
74 |
+
text = "My secret key is ABCDEFGHIJKL"
|
75 |
+
matches = ["ABCDEFGHIJKL"]
|
76 |
+
hashed_value = hashlib.md5("ABCDEFGHIJKL".encode()).hexdigest()
|
77 |
+
redacted_text = redact(text, matches, REDACTION.REDACT_HASH)
|
78 |
+
assert redacted_text == f"My secret key is [REDACTED:]{hashed_value}[:REDACTED]"
|
79 |
+
|
80 |
+
|
81 |
+
def test_redact_no_match():
|
82 |
+
text = "My secret key is ABCDEFGHIJKL"
|
83 |
+
matches = ["XYZ"]
|
84 |
+
redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
|
85 |
+
assert redacted_text == text
|
86 |
+
|
87 |
+
|
88 |
+
def test_secrets_detection_guardrail_detect_types(mock_secrets_guard):
|
89 |
+
from guardrails_genie.guardrails.secrets_detection import (
|
90 |
+
REDACTION,
|
91 |
+
SecretsDetectionGuardrail,
|
92 |
+
)
|
93 |
+
|
94 |
+
guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
|
95 |
+
prompt = "My secret key is ABCDEFGHIJKL"
|
96 |
+
|
97 |
+
result = guardrail.guard(prompt=prompt, return_detected_secrets=True)
|
98 |
+
|
99 |
+
assert result.contains_secrets is True
|
100 |
+
assert result.explanation == "The output contains secrets."
|
101 |
+
assert result.detected_secrets == {"secrets": ["API_KEY"]}
|
102 |
+
assert result.redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
|
103 |
+
|
104 |
+
|
105 |
+
def test_secrets_detection_guardrail_simple_response(mock_secrets_guard):
|
106 |
+
from guardrails_genie.guardrails.secrets_detection import (
|
107 |
+
REDACTION,
|
108 |
+
SecretsDetectionGuardrail,
|
109 |
+
)
|
110 |
+
|
111 |
+
guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
|
112 |
+
prompt = "My secret key is ABCDEFGHIJKL"
|
113 |
+
|
114 |
+
result = guardrail.guard(prompt=prompt, return_detected_secrets=False)
|
115 |
+
|
116 |
+
assert result.contains_secrets is True
|
117 |
+
assert result.explanation == "The output contains secrets."
|
118 |
+
assert result.redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
|
119 |
+
|
120 |
+
|
121 |
+
def test_secrets_detection_guardrail_no_secrets(mock_secrets_guard):
|
122 |
+
from guardrails_genie.guardrails.secrets_detection import (
|
123 |
+
REDACTION,
|
124 |
+
SecretsDetectionGuardrail,
|
125 |
+
)
|
126 |
+
|
127 |
+
guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
|
128 |
+
prompt = "This is a safe text with no secrets."
|
129 |
+
|
130 |
+
result = guardrail.guard(prompt=prompt, return_detected_secrets=True)
|
131 |
+
|
132 |
+
assert result.contains_secrets is False
|
133 |
+
assert result.explanation == "No secrets detected in the text."
|
134 |
+
assert result.detected_secrets == {}
|
135 |
+
assert result.redacted_text == prompt
|
136 |
+
|
137 |
+
|
138 |
+
# Create a strategy to generate strings that match the patterns
|
139 |
+
def pattern_strategy(pattern):
|
140 |
+
return st.from_regex(re.compile(pattern), fullmatch=True)
|
141 |
+
|
142 |
+
|
143 |
+
@settings(deadline=1000) # Set the deadline to 1000 milliseconds (1 second)
|
144 |
+
@given(pattern_strategy(DEFAULT_SECRETS_PATTERNS["JwtToken"][0]))
|
145 |
+
def test_specific_pattern_guardrail(text):
|
146 |
+
guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
|
147 |
+
result = guardrail.guard(prompt=text, return_detected_secrets=True)
|
148 |
+
|
149 |
+
assert result.contains_secrets is True
|
150 |
+
assert "JwtToken" in result.detected_secrets
|