toy-intelligence / src /ontology.py
dwb2023's picture
initial update
c3af845
# src/ontology.py
from typing import List, Dict, Pattern
import re
class OntologyRegistry:
"""Registry for pattern matching and entity validation."""
def __init__(self) -> None:
self.temporal_patterns: List[str] = [
r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b',
r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b',
r'\btomorrow\b',
r'\bin \d+ (?:days?|weeks?|months?)\b'
]
self.location_patterns: List[str] = [
r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b',
r'\b[A-Z][a-zA-Z]+ Base\b',
r'\bHeadquarters\b',
r'\bHQ\b'
]
self.entity_types: Dict[str, str] = {
'PER': 'person',
'ORG': 'organization',
'LOC': 'location',
'MISC': 'miscellaneous'
}
# Compile patterns for better performance
self._compiled_patterns: Dict[str, List[Pattern]] = {
'temporal': [re.compile(p) for p in self.temporal_patterns],
'location': [re.compile(p) for p in self.location_patterns]
}
def validate_pattern(self, text: str, pattern_type: str) -> List[str]:
"""
Validate text against specified pattern type.
Args:
text: Input text to validate
pattern_type: Type of pattern to match ('temporal' or 'location')
Returns:
List of matched strings
"""
matches = []
patterns = self._compiled_patterns.get(pattern_type, [])
for pattern in patterns:
matches.extend(match.group() for match in pattern.finditer(text))
return matches
def get_entity_type(self, ner_type: str) -> str:
"""Map NER entity type to ontology type."""
return self.entity_types.get(ner_type, 'miscellaneous')