Spaces:
Sleeping
Sleeping
| # src/ontology.py | |
| from typing import List, Dict, Pattern | |
| import re | |
| class OntologyRegistry: | |
| """Registry for pattern matching and entity validation.""" | |
| def __init__(self) -> None: | |
| self.temporal_patterns: List[str] = [ | |
| r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b', | |
| r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b', | |
| r'\btomorrow\b', | |
| r'\bin \d+ (?:days?|weeks?|months?)\b' | |
| ] | |
| self.location_patterns: List[str] = [ | |
| r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b', | |
| r'\b[A-Z][a-zA-Z]+ Base\b', | |
| r'\bHeadquarters\b', | |
| r'\bHQ\b' | |
| ] | |
| self.entity_types: Dict[str, str] = { | |
| 'PER': 'person', | |
| 'ORG': 'organization', | |
| 'LOC': 'location', | |
| 'MISC': 'miscellaneous' | |
| } | |
| # Compile patterns for better performance | |
| self._compiled_patterns: Dict[str, List[Pattern]] = { | |
| 'temporal': [re.compile(p) for p in self.temporal_patterns], | |
| 'location': [re.compile(p) for p in self.location_patterns] | |
| } | |
| def validate_pattern(self, text: str, pattern_type: str) -> List[str]: | |
| """ | |
| Validate text against specified pattern type. | |
| Args: | |
| text: Input text to validate | |
| pattern_type: Type of pattern to match ('temporal' or 'location') | |
| Returns: | |
| List of matched strings | |
| """ | |
| matches = [] | |
| patterns = self._compiled_patterns.get(pattern_type, []) | |
| for pattern in patterns: | |
| matches.extend(match.group() for match in pattern.finditer(text)) | |
| return matches | |
| def get_entity_type(self, ner_type: str) -> str: | |
| """Map NER entity type to ontology type.""" | |
| return self.entity_types.get(ner_type, 'miscellaneous') |