import gradio as gr from transformers import pipeline import json from datetime import datetime import sqlite3 import asyncio from concurrent.futures import ThreadPoolExecutor import re # Initialize NLP pipelines ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") classifier = pipeline("zero-shot-classification") class OntologyRegistry: def __init__(self): self.temporal_patterns = [ r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b', r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b', r'\btomorrow\b', r'\bin \d+ (?:days?|weeks?|months?)\b' ] self.location_patterns = [ r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b', r'\b[A-Z][a-zA-Z]+ Base\b', r'\bHeadquarters\b', r'\bHQ\b' ] self.entity_types = { 'PER': 'person', 'ORG': 'organization', 'LOC': 'location', 'MISC': 'miscellaneous' } def validate_pattern(self, text, pattern_type): patterns = getattr(self, f"{pattern_type}_patterns", []) matches = [] for pattern in patterns: matches.extend(re.finditer(pattern, text)) return [m.group() for m in matches] class RelationshipEngine: def __init__(self, db_path=':memory:'): self.conn = sqlite3.connect(db_path) self.setup_database() def setup_database(self): self.conn.execute(''' CREATE TABLE IF NOT EXISTS events ( id INTEGER PRIMARY KEY, text TEXT, timestamp DATETIME, confidence REAL ) ''') self.conn.execute(''' CREATE TABLE IF NOT EXISTS relationships ( id INTEGER PRIMARY KEY, source_event_id INTEGER, target_event_id INTEGER, relationship_type TEXT, confidence REAL, FOREIGN KEY (source_event_id) REFERENCES events(id), FOREIGN KEY (target_event_id) REFERENCES events(id) ) ''') self.conn.commit() def find_related_events(self, event_data): # Find events with similar entities cursor = self.conn.execute(''' SELECT * FROM events WHERE text LIKE ? ORDER BY timestamp DESC LIMIT 5 ''', (f"%{event_data.get('text', '')}%",)) related_events = cursor.fetchall() return related_events def calculate_relationship_confidence(self, event1, event2): # Simple similarity-based confidence base_confidence = 0.0 # Entity overlap increases confidence if set(event1.get('entities', {}).get('people', [])) & set(event2.get('entities', {}).get('people', [])): base_confidence += 0.3 if set(event1.get('entities', {}).get('organizations', [])) & set(event2.get('entities', {}).get('organizations', [])): base_confidence += 0.3 if set(event1.get('entities', {}).get('locations', [])) & set(event2.get('entities', {}).get('locations', [])): base_confidence += 0.4 return min(base_confidence, 1.0) class EventAnalyzer: def __init__(self): self.ontology = OntologyRegistry() self.relationship_engine = RelationshipEngine() self.executor = ThreadPoolExecutor(max_workers=3) async def extract_entities(self, text): def _extract(): return ner_pipeline(text) # Run NER in thread pool ner_results = await asyncio.get_event_loop().run_in_executor( self.executor, _extract ) entities = { "people": [], "organizations": [], "locations": [], "hashtags": [word for word in text.split() if word.startswith('#')] } for item in ner_results: if item["entity"].endswith("PER"): entities["people"].append(item["word"]) elif item["entity"].endswith("ORG"): entities["organizations"].append(item["word"]) elif item["entity"].endswith("LOC"): entities["locations"].append(item["word"]) return entities async def extract_temporal(self, text): return self.ontology.validate_pattern(text, 'temporal') async def extract_locations(self, text): ml_locations = [loc for loc in await self.extract_entities(text).get('locations', [])] pattern_locations = self.ontology.validate_pattern(text, 'location') return list(set(ml_locations + pattern_locations)) async def analyze_event(self, text): try: # Parallel extraction entities_task = self.extract_entities(text) temporal_task = self.extract_temporal(text) locations_task = self.extract_locations(text) # Gather results entities, temporal, locations = await asyncio.gather( entities_task, temporal_task, locations_task ) # Merge location results entities['locations'] = locations entities['temporal'] = temporal # Calculate initial confidence confidence = min(1.0, ( 0.2 * bool(entities["people"]) + 0.2 * bool(entities["organizations"]) + 0.3 * bool(entities["locations"]) + 0.3 * bool(temporal) )) # Find related events related_events = self.relationship_engine.find_related_events({ 'text': text, 'entities': entities }) # Adjust confidence based on relationships if related_events: relationship_confidence = max( self.relationship_engine.calculate_relationship_confidence( {'entities': entities}, {'text': event[1]} # event[1] is the text field ) for event in related_events ) confidence = (confidence + relationship_confidence) / 2 result = { "text": text, "entities": entities, "confidence": confidence, "verification_needed": confidence < 0.6, "related_events": [ { "text": event[1], "timestamp": event[2], "confidence": event[3] } for event in related_events ] } # Store event if confidence is sufficient if confidence >= 0.6: self.relationship_engine.conn.execute( 'INSERT INTO events (text, timestamp, confidence) VALUES (?, ?, ?)', (text, datetime.now().isoformat(), confidence) ) self.relationship_engine.conn.commit() return result except Exception as e: return {"error": str(e)} # Initialize analyzer analyzer = EventAnalyzer() # Custom CSS for UI css = """ .container { max-width: 1200px; margin: auto; padding: 20px; } .results { padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin-top: 20px; } .confidence-high { color: #22c55e; font-weight: bold; } .confidence-low { color: #f97316; font-weight: bold; } .entity-section { margin: 15px 0; } .alert-warning { background: #fff3cd; padding: 10px; border-radius: 5px; margin: 10px 0; } .alert-success { background: #d1fae5; padding: 10px; border-radius: 5px; margin: 10px 0; } .related-events { background: #f3f4f6; padding: 15px; border-radius: 5px; margin-top: 15px; } """ def format_results(analysis_result): if "error" in analysis_result: return f"