Spaces:

wandb
/

guardrails-genie

Runtime error

App Files Files Community

ash0ts commited on Nov 29, 2024

Commit

fcae57e

1 Parent(s): 3a97187

add pii guardrails that also work for banned words guardrails

Browse files

Files changed (5) hide show

guardrails_genie/{spacy_model.py → guardrails/banned_terms/llm_judge.py} +0 -0
guardrails_genie/guardrails/pii/presidio_pii_guardrail.py +76 -20
guardrails_genie/guardrails/pii/regex_pii_guardrail.py +27 -11
guardrails_genie/guardrails/pii/run_transformers.py +35 -0
guardrails_genie/guardrails/pii/transformers_pipeline_guardrail.py +179 -0

guardrails_genie/{spacy_model.py → guardrails/banned_terms/llm_judge.py} RENAMED Viewed

File without changes

guardrails_genie/guardrails/pii/presidio_pii_guardrail.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from typing import List, Dict, Optional, ClassVar
 import weave
 from pydantic import BaseModel
-from presidio_analyzer import AnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine
 from ..base import Guardrail
@@ -10,18 +10,22 @@ from ..base import Guardrail
 class PresidioPIIGuardrailResponse(BaseModel):
     contains_pii: bool
     detected_pii_types: Dict[str, List[str]]
-    safe_to_process: bool
     explanation: str
     anonymized_text: Optional[str] = None
 #TODO: Add support for transformers workflow and not just Spacy
 class PresidioPIIGuardrail(Guardrail):
-    AVAILABLE_ENTITIES: ClassVar[List[str]] = [
-        "PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER", "LOCATION",
-        "CREDIT_CARD", "CRYPTO", "DATE_TIME", "NRP", "MEDICAL_LICENSE",
-        "URL", "US_BANK_NUMBER", "US_DRIVER_LICENSE", "US_ITIN",
-        "US_PASSPORT", "US_SSN", "UK_NHS", "IP_ADDRESS"
-    ]
     analyzer: AnalyzerEngine
     anonymizer: AnonymizerEngine
@@ -33,7 +37,10 @@ class PresidioPIIGuardrail(Guardrail):
         self,
         selected_entities: Optional[List[str]] = None,
         should_anonymize: bool = False,
-        language: str = "en"
     ):
         # Initialize default values
         if selected_entities is None:
@@ -42,13 +49,48 @@ class PresidioPIIGuardrail(Guardrail):
                 "LOCATION", "CREDIT_CARD", "US_SSN"
             ]
         # Validate selected entities
-        invalid_entities = set(selected_entities) - set(self.AVAILABLE_ENTITIES)
         if invalid_entities:
             raise ValueError(f"Invalid entities: {invalid_entities}")
-        # Initialize Presidio engines
         analyzer = AnalyzerEngine()
         anonymizer = AnonymizerEngine()
         # Call parent class constructor with all fields
@@ -61,9 +103,13 @@ class PresidioPIIGuardrail(Guardrail):
         )
     @weave.op()
-    def guard(self, prompt: str, **kwargs) -> PresidioPIIGuardrailResponse:
         """
         Check if the input prompt contains any PII using Presidio.
         """
         # Analyze text for PII
         analyzer_results = self.analyzer.analyze(
@@ -104,10 +150,20 @@ class PresidioPIIGuardrail(Guardrail):
             )
             anonymized_text = anonymized_result.text
-        return PresidioPIIGuardrailResponse(
-            contains_pii=bool(detected_pii),
-            detected_pii_types=detected_pii,
-            safe_to_process=not bool(detected_pii),
-            explanation="\n".join(explanation_parts),
-            anonymized_text=anonymized_text
-        )

+from typing import List, Dict, Optional, ClassVar, Any
 import weave
 from pydantic import BaseModel
+from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, Pattern, PatternRecognizer
 from presidio_anonymizer import AnonymizerEngine
 from ..base import Guardrail
 class PresidioPIIGuardrailResponse(BaseModel):
     contains_pii: bool
     detected_pii_types: Dict[str, List[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+class PresidioPIIGuardrailSimpleResponse(BaseModel):
+    contains_pii: bool
     explanation: str
     anonymized_text: Optional[str] = None
 #TODO: Add support for transformers workflow and not just Spacy
 class PresidioPIIGuardrail(Guardrail):
+    @staticmethod
+    def get_available_entities() -> List[str]:
+        registry = RecognizerRegistry()
+        analyzer = AnalyzerEngine(registry=registry)
+        return [recognizer.supported_entities[0]
+                for recognizer in analyzer.registry.recognizers]
     analyzer: AnalyzerEngine
     anonymizer: AnonymizerEngine
         self,
         selected_entities: Optional[List[str]] = None,
         should_anonymize: bool = False,
+        language: str = "en",
+        deny_lists: Optional[Dict[str, List[str]]] = None,
+        regex_patterns: Optional[Dict[str, List[Dict[str, str]]]] = None,
+        custom_recognizers: Optional[List[Any]] = None
     ):
         # Initialize default values
         if selected_entities is None:
                 "LOCATION", "CREDIT_CARD", "US_SSN"
             ]
+        # Get available entities dynamically
+        available_entities = self.get_available_entities()
         # Validate selected entities
+        invalid_entities = set(selected_entities) - set(available_entities)
         if invalid_entities:
             raise ValueError(f"Invalid entities: {invalid_entities}")
+        # Initialize analyzer with default recognizers
         analyzer = AnalyzerEngine()
+        # Add custom recognizers if provided
+        if custom_recognizers:
+            for recognizer in custom_recognizers:
+                analyzer.registry.add_recognizer(recognizer)
+        # Add deny list recognizers if provided
+        if deny_lists:
+            for entity_type, tokens in deny_lists.items():
+                deny_list_recognizer = PatternRecognizer(
+                    supported_entity=entity_type,
+                    deny_list=tokens
+                )
+                analyzer.registry.add_recognizer(deny_list_recognizer)
+        # Add regex pattern recognizers if provided
+        if regex_patterns:
+            for entity_type, patterns in regex_patterns.items():
+                presidio_patterns = [
+                    Pattern(
+                        name=pattern.get("name", f"pattern_{i}"),
+                        regex=pattern["regex"],
+                        score=pattern.get("score", 0.5)
+                    ) for i, pattern in enumerate(patterns)
+                ]
+                regex_recognizer = PatternRecognizer(
+                    supported_entity=entity_type,
+                    patterns=presidio_patterns
+                )
+                analyzer.registry.add_recognizer(regex_recognizer)
+        # Initialize Presidio engines
         anonymizer = AnonymizerEngine()
         # Call parent class constructor with all fields
         )
     @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioPIIGuardrailResponse | PresidioPIIGuardrailSimpleResponse:
         """
         Check if the input prompt contains any PII using Presidio.
+        Args:
+            prompt: The text to analyze
+            return_detected_types: If True, returns detailed PII type information
         """
         # Analyze text for PII
         analyzer_results = self.analyzer.analyze(
             )
             anonymized_text = anonymized_result.text
+        if return_detected_types:
+            return PresidioPIIGuardrailResponse(
+                contains_pii=bool(detected_pii),
+                detected_pii_types=detected_pii,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return PresidioPIIGuardrailSimpleResponse(
+                contains_pii=bool(detected_pii),
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioPIIGuardrailResponse | PresidioPIIGuardrailSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

guardrails_genie/guardrails/pii/regex_pii_guardrail.py CHANGED Viewed

@@ -10,7 +10,12 @@ from ..base import Guardrail
 class RegexPIIGuardrailResponse(BaseModel):
     contains_pii: bool
     detected_pii_types: Dict[str, list[str]]
-    safe_to_process: bool
     explanation: str
     anonymized_text: Optional[str] = None
@@ -51,15 +56,16 @@ class RegexPIIGuardrail(Guardrail):
         )
     @weave.op()
-    def guard(self, prompt: str, **kwargs) -> RegexPIIGuardrailResponse:
         """
         Check if the input prompt contains any PII based on the regex patterns.
         Args:
             prompt: Input text to check for PII
         Returns:
-            RegexPIIGuardrailResponse containing PII detection results and recommendations
         """
         result = self.regex_model.check(prompt)
@@ -85,11 +91,21 @@ class RegexPIIGuardrail(Guardrail):
                 for match in matches:
                     replacement = f"[{pii_type.upper()}]"
                     anonymized_text = anonymized_text.replace(match, replacement)
-        return RegexPIIGuardrailResponse(
-            contains_pii=not result.passed,
-            detected_pii_types=result.matched_patterns,
-            safe_to_process=result.passed,
-            explanation="\n".join(explanation_parts),
-            anonymized_text=anonymized_text
-        )

 class RegexPIIGuardrailResponse(BaseModel):
     contains_pii: bool
     detected_pii_types: Dict[str, list[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+class RegexPIIGuardrailSimpleResponse(BaseModel):
+    contains_pii: bool
     explanation: str
     anonymized_text: Optional[str] = None
         )
     @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
         """
         Check if the input prompt contains any PII based on the regex patterns.
         Args:
             prompt: Input text to check for PII
+            return_detected_types: If True, returns detailed PII type information
         Returns:
+            RegexPIIGuardrailResponse or RegexPIIGuardrailSimpleResponse containing PII detection results
         """
         result = self.regex_model.check(prompt)
                 for match in matches:
                     replacement = f"[{pii_type.upper()}]"
                     anonymized_text = anonymized_text.replace(match, replacement)
+        if return_detected_types:
+            return RegexPIIGuardrailResponse(
+                contains_pii=not result.passed,
+                detected_pii_types=result.matched_patterns,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return RegexPIIGuardrailSimpleResponse(
+                contains_pii=not result.passed,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

guardrails_genie/guardrails/pii/run_transformers.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from guardrails_genie.guardrails.pii.transformers_pipeline_guardrail import TransformersPipelinePIIGuardrail
+import weave
+def run_transformers_pipeline():
+    weave.init("guardrails-genie-pii-transformers-pipeline-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = TransformersPipelinePIIGuardrail(
+        selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "TELEPHONENUM", "SOCIALNUM", "PHONE_NUMBER"],
+        should_anonymize=True,
+        model_name="lakshyakh93/deberta_finetuned_pii",
+        show_available_entities=True
+    )
+    # Check a prompt
+    prompt = "Please contact John Smith at [email protected] or call 123-456-7890. My SSN is 123-45-6789"
+    result = pii_guardrail.guard(prompt, aggregate_redaction=False)
+    print(result)
+    # Result will contain:
+    # - contains_pii: True
+    # - detected_pii_types: {
+    #     "GIVENNAME": ["John"],
+    #     "SURNAME": ["Smith"],
+    #     "EMAIL": ["[email protected]"],
+    #     "TELEPHONENUM": ["123-456-7890"],
+    #     "SOCIALNUM": ["123-45-6789"]
+    # }
+    # - safe_to_process: False
+    # - explanation: Detailed explanation of findings
+    # - anonymized_text: "Please contact [redacted] [redacted] at [redacted] or call [redacted]. My SSN is [redacted]"
+if __name__ == "__main__":
+    run_transformers_pipeline()

guardrails_genie/guardrails/pii/transformers_pipeline_guardrail.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from typing import List, Dict, Optional, ClassVar
+from transformers import pipeline, AutoConfig
+import json
+from pydantic import BaseModel
+from ..base import Guardrail
+import weave
+class TransformersPipelinePIIGuardrailResponse(BaseModel):
+    contains_pii: bool
+    detected_pii_types: Dict[str, List[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+class TransformersPipelinePIIGuardrailSimpleResponse(BaseModel):
+    contains_pii: bool
+    explanation: str
+    anonymized_text: Optional[str] = None
+class TransformersPipelinePIIGuardrail(Guardrail):
+    """Generic guardrail for detecting PII using any token classification model."""
+    _pipeline: Optional[object] = None
+    selected_entities: List[str]
+    should_anonymize: bool
+    available_entities: List[str]
+    def __init__(
+        self,
+        model_name: str = "iiiorg/piiranha-v1-detect-personal-information",
+        selected_entities: Optional[List[str]] = None,
+        should_anonymize: bool = False,
+        show_available_entities: bool = True,
+    ):
+        # Load model config and extract available entities
+        config = AutoConfig.from_pretrained(model_name)
+        entities = self._extract_entities_from_config(config)
+        if show_available_entities:
+            self._print_available_entities(entities)
+        # Initialize default values if needed
+        if selected_entities is None:
+            selected_entities = entities  # Use all available entities by default
+        # Filter out invalid entities and warn user
+        invalid_entities = [e for e in selected_entities if e not in entities]
+        valid_entities = [e for e in selected_entities if e in entities]
+        if invalid_entities:
+            print(f"\nWarning: The following entities are not available and will be ignored: {invalid_entities}")
+            print(f"Continuing with valid entities: {valid_entities}")
+            selected_entities = valid_entities
+        # Call parent class constructor
+        super().__init__(
+            selected_entities=selected_entities,
+            should_anonymize=should_anonymize,
+            available_entities=entities
+        )
+        # Initialize pipeline
+        self._pipeline = pipeline(
+            task="token-classification",
+            model=model_name,
+            aggregation_strategy="simple"  # Merge same entities
+        )
+    def _extract_entities_from_config(self, config) -> List[str]:
+        """Extract unique entity types from the model config."""
+        # Get id2label mapping from config
+        id2label = config.id2label
+        # Extract unique entity types (removing B- and I- prefixes)
+        entities = set()
+        for label in id2label.values():
+            if label.startswith(('B-', 'I-')):
+                entities.add(label[2:])  # Remove prefix
+            elif label != 'O':  # Skip the 'O' (Outside) label
+                entities.add(label)
+        return sorted(list(entities))
+    def _print_available_entities(self, entities: List[str]):
+        """Print all available entity types that can be detected by the model."""
+        print("\nAvailable PII entity types:")
+        print("=" * 25)
+        for entity in entities:
+            print(f"- {entity}")
+        print("=" * 25 + "\n")
+    def print_available_entities(self):
+        """Print all available entity types that can be detected by the model."""
+        self._print_available_entities(self.available_entities)
+    def _detect_pii(self, text: str) -> Dict[str, List[str]]:
+        """Detect PII entities in the text using the pipeline."""
+        results = self._pipeline(text)
+        # Group findings by entity type
+        detected_pii = {}
+        for entity in results:
+            entity_type = entity['entity_group']
+            if entity_type in self.selected_entities:
+                if entity_type not in detected_pii:
+                    detected_pii[entity_type] = []
+                detected_pii[entity_type].append(entity['word'])
+        return detected_pii
+    def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
+        """Anonymize detected PII in text using the pipeline."""
+        results = self._pipeline(text)
+        # Sort entities by start position in reverse order to avoid offset issues
+        entities = sorted(results, key=lambda x: x['start'], reverse=True)
+        # Create a mutable list of characters
+        chars = list(text)
+        # Apply redactions
+        for entity in entities:
+            if entity['entity_group'] in self.selected_entities:
+                start, end = entity['start'], entity['end']
+                replacement = ' [redacted] ' if aggregate_redaction else f" [{entity['entity_group']}] "
+                # Replace the entity with the redaction marker
+                chars[start:end] = replacement
+        # Join and clean up multiple spaces
+        result = ''.join(chars)
+        return ' '.join(result.split())
+    @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersPipelinePIIGuardrailResponse | TransformersPipelinePIIGuardrailSimpleResponse:
+        """Check if the input prompt contains any PII using Piiranha.
+        Args:
+            prompt: The text to analyze
+            return_detected_types: If True, returns detailed PII type information
+            aggregate_redaction: If True, uses generic [redacted] instead of entity type
+        """
+        # Detect PII
+        detected_pii = self._detect_pii(prompt)
+        # Create explanation
+        explanation_parts = []
+        if detected_pii:
+            explanation_parts.append("Found the following PII in the text:")
+            for pii_type, instances in detected_pii.items():
+                explanation_parts.append(f"- {pii_type}: {len(instances)} instance(s)")
+        else:
+            explanation_parts.append("No PII detected in the text.")
+        explanation_parts.append("\nChecked for these PII types:")
+        for entity in self.selected_entities:
+            explanation_parts.append(f"- {entity}")
+        # Anonymize if requested
+        anonymized_text = None
+        if self.should_anonymize and detected_pii:
+            anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
+        if return_detected_types:
+            return TransformersPipelinePIIGuardrailResponse(
+                contains_pii=bool(detected_pii),
+                detected_pii_types=detected_pii,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return TransformersPipelinePIIGuardrailSimpleResponse(
+                contains_pii=bool(detected_pii),
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersPipelinePIIGuardrailResponse | TransformersPipelinePIIGuardrailSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)