Spaces:
Sleeping
Sleeping
# src/ontology.py | |
from typing import List, Dict, Pattern | |
import re | |
class OntologyRegistry: | |
"""Registry for pattern matching and entity validation.""" | |
def __init__(self) -> None: | |
self.temporal_patterns: List[str] = [ | |
r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b', | |
r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b', | |
r'\btomorrow\b', | |
r'\bin \d+ (?:days?|weeks?|months?)\b' | |
] | |
self.location_patterns: List[str] = [ | |
r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b', | |
r'\b[A-Z][a-zA-Z]+ Base\b', | |
r'\bHeadquarters\b', | |
r'\bHQ\b' | |
] | |
self.entity_types: Dict[str, str] = { | |
'PER': 'person', | |
'ORG': 'organization', | |
'LOC': 'location', | |
'MISC': 'miscellaneous' | |
} | |
# Compile patterns for better performance | |
self._compiled_patterns: Dict[str, List[Pattern]] = { | |
'temporal': [re.compile(p) for p in self.temporal_patterns], | |
'location': [re.compile(p) for p in self.location_patterns] | |
} | |
def validate_pattern(self, text: str, pattern_type: str) -> List[str]: | |
""" | |
Validate text against specified pattern type. | |
Args: | |
text: Input text to validate | |
pattern_type: Type of pattern to match ('temporal' or 'location') | |
Returns: | |
List of matched strings | |
""" | |
matches = [] | |
patterns = self._compiled_patterns.get(pattern_type, []) | |
for pattern in patterns: | |
matches.extend(match.group() for match in pattern.finditer(text)) | |
return matches | |
def get_entity_type(self, ner_type: str) -> str: | |
"""Map NER entity type to ontology type.""" | |
return self.entity_types.get(ner_type, 'miscellaneous') |