File size: 4,395 Bytes
7e16d4f
 
 
 
 
 
 
 
 
 
 
 
fcae57e
 
 
 
 
 
7e16d4f
0f0578b
7e16d4f
 
 
 
 
0f0578b
7e16d4f
 
 
 
 
 
 
 
 
 
 
 
 
 
0f0578b
7e16d4f
 
 
 
 
 
 
 
 
 
0f0578b
 
 
 
 
7e16d4f
 
fcae57e
7e16d4f
 
 
 
 
fcae57e
7e16d4f
 
fcae57e
7e16d4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f0578b
 
 
 
 
 
 
 
fcae57e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from typing import Dict, Optional, ClassVar

import weave
from pydantic import BaseModel

from ...regex_model import RegexModel
from ..base import Guardrail


class RegexPIIGuardrailResponse(BaseModel):
    contains_pii: bool
    detected_pii_types: Dict[str, list[str]]
    explanation: str
    anonymized_text: Optional[str] = None


class RegexPIIGuardrailSimpleResponse(BaseModel):
    contains_pii: bool
    explanation: str
    anonymized_text: Optional[str] = None


class RegexPIIGuardrail(Guardrail):
    regex_model: RegexModel
    patterns: Dict[str, str] = {}
    should_anonymize: bool = False
    
    DEFAULT_PII_PATTERNS: ClassVar[Dict[str, str]] = {
        "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
        "phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
        "ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
        "credit_card": r"\b\d{4}[-.]?\d{4}[-.]?\d{4}[-.]?\d{4}\b",
        "ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
        "date_of_birth": r"\b\d{2}[-/]\d{2}[-/]\d{4}\b",
        "passport": r"\b[A-Z]{1,2}[0-9]{6,9}\b",
        "drivers_license": r"\b[A-Z]\d{7}\b",
        "bank_account": r"\b\d{8,17}\b",
        "zip_code": r"\b\d{5}(?:[-]\d{4})?\b"
    }
    
    def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
        patterns = {}
        if use_defaults:
            patterns = self.DEFAULT_PII_PATTERNS.copy()
        if kwargs.get("patterns"):
            patterns.update(kwargs["patterns"])
        
        # Create the RegexModel instance
        regex_model = RegexModel(patterns=patterns)
        
        # Initialize the base class with both the regex_model and patterns
        super().__init__(
            regex_model=regex_model, 
            patterns=patterns,
            should_anonymize=should_anonymize
        )

    @weave.op()
    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
        """
        Check if the input prompt contains any PII based on the regex patterns.
        
        Args:
            prompt: Input text to check for PII
            return_detected_types: If True, returns detailed PII type information
            
        Returns:
            RegexPIIGuardrailResponse or RegexPIIGuardrailSimpleResponse containing PII detection results
        """
        result = self.regex_model.check(prompt)
        
        # Create detailed explanation
        explanation_parts = []
        if result.matched_patterns:
            explanation_parts.append("Found the following PII in the text:")
            for pii_type, matches in result.matched_patterns.items():
                explanation_parts.append(f"- {pii_type}: {len(matches)} instance(s)")
        else:
            explanation_parts.append("No PII detected in the text.")
        
        if result.failed_patterns:
            explanation_parts.append("\nChecked but did not find these PII types:")
            for pattern in result.failed_patterns:
                explanation_parts.append(f"- {pattern}")
                
        # Add anonymization logic
        anonymized_text = None
        if getattr(self, 'should_anonymize', False) and result.matched_patterns:
            anonymized_text = prompt
            for pii_type, matches in result.matched_patterns.items():
                for match in matches:
                    replacement = f"[{pii_type.upper()}]"
                    anonymized_text = anonymized_text.replace(match, replacement)
        
        if return_detected_types:
            return RegexPIIGuardrailResponse(
                contains_pii=not result.passed,
                detected_pii_types=result.matched_patterns,
                explanation="\n".join(explanation_parts),
                anonymized_text=anonymized_text
            )
        else:
            return RegexPIIGuardrailSimpleResponse(
                contains_pii=not result.passed,
                explanation="\n".join(explanation_parts),
                anonymized_text=anonymized_text
            )

    @weave.op()
    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
        return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)