Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import gradio as gr | |
| from transformers import pipeline | |
| import json | |
| from datetime import datetime | |
| import sqlite3 | |
| import asyncio | |
| from concurrent.futures import ThreadPoolExecutor | |
| import re | |
| # Initialize NLP pipelines | |
| ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") | |
| classifier = pipeline("zero-shot-classification") | |
| class OntologyRegistry: | |
| def __init__(self): | |
| self.temporal_patterns = [ | |
| r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b', | |
| r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b', | |
| r'\btomorrow\b', | |
| r'\bin \d+ (?:days?|weeks?|months?)\b' | |
| ] | |
| self.location_patterns = [ | |
| r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b', | |
| r'\b[A-Z][a-zA-Z]+ Base\b', | |
| r'\bHeadquarters\b', | |
| r'\bHQ\b' | |
| ] | |
| self.entity_types = { | |
| 'PER': 'person', | |
| 'ORG': 'organization', | |
| 'LOC': 'location', | |
| 'MISC': 'miscellaneous' | |
| } | |
| def validate_pattern(self, text, pattern_type): | |
| patterns = getattr(self, f"{pattern_type}_patterns", []) | |
| matches = [] | |
| for pattern in patterns: | |
| matches.extend(re.finditer(pattern, text)) | |
| return [m.group() for m in matches] | |
| class RelationshipEngine: | |
| def __init__(self, db_path=':memory:'): | |
| self.conn = sqlite3.connect(db_path) | |
| self.setup_database() | |
| def setup_database(self): | |
| self.conn.execute(''' | |
| CREATE TABLE IF NOT EXISTS events ( | |
| id INTEGER PRIMARY KEY, | |
| text TEXT, | |
| timestamp DATETIME, | |
| confidence REAL | |
| ) | |
| ''') | |
| self.conn.execute(''' | |
| CREATE TABLE IF NOT EXISTS relationships ( | |
| id INTEGER PRIMARY KEY, | |
| source_event_id INTEGER, | |
| target_event_id INTEGER, | |
| relationship_type TEXT, | |
| confidence REAL, | |
| FOREIGN KEY (source_event_id) REFERENCES events(id), | |
| FOREIGN KEY (target_event_id) REFERENCES events(id) | |
| ) | |
| ''') | |
| self.conn.commit() | |
| def find_related_events(self, event_data): | |
| # Find events with similar entities | |
| cursor = self.conn.execute(''' | |
| SELECT * FROM events | |
| WHERE text LIKE ? | |
| ORDER BY timestamp DESC | |
| LIMIT 5 | |
| ''', (f"%{event_data.get('text', '')}%",)) | |
| related_events = cursor.fetchall() | |
| return related_events | |
| def calculate_relationship_confidence(self, event1, event2): | |
| # Simple similarity-based confidence | |
| base_confidence = 0.0 | |
| # Entity overlap increases confidence | |
| if set(event1.get('entities', {}).get('people', [])) & set(event2.get('entities', {}).get('people', [])): | |
| base_confidence += 0.3 | |
| if set(event1.get('entities', {}).get('organizations', [])) & set(event2.get('entities', {}).get('organizations', [])): | |
| base_confidence += 0.3 | |
| if set(event1.get('entities', {}).get('locations', [])) & set(event2.get('entities', {}).get('locations', [])): | |
| base_confidence += 0.4 | |
| return min(base_confidence, 1.0) | |
| class EventAnalyzer: | |
| def __init__(self): | |
| self.ontology = OntologyRegistry() | |
| self.relationship_engine = RelationshipEngine() | |
| self.executor = ThreadPoolExecutor(max_workers=3) | |
| async def extract_entities(self, text): | |
| def _extract(): | |
| return ner_pipeline(text) | |
| # Run NER in thread pool | |
| ner_results = await asyncio.get_event_loop().run_in_executor( | |
| self.executor, _extract | |
| ) | |
| entities = { | |
| "people": [], | |
| "organizations": [], | |
| "locations": [], | |
| "hashtags": [word for word in text.split() if word.startswith('#')] | |
| } | |
| for item in ner_results: | |
| if item["entity"].endswith("PER"): | |
| entities["people"].append(item["word"]) | |
| elif item["entity"].endswith("ORG"): | |
| entities["organizations"].append(item["word"]) | |
| elif item["entity"].endswith("LOC"): | |
| entities["locations"].append(item["word"]) | |
| return entities | |
| async def extract_temporal(self, text): | |
| return self.ontology.validate_pattern(text, 'temporal') | |
| async def extract_locations(self, text): | |
| ml_locations = [loc for loc in await self.extract_entities(text).get('locations', [])] | |
| pattern_locations = self.ontology.validate_pattern(text, 'location') | |
| return list(set(ml_locations + pattern_locations)) | |
| async def analyze_event(self, text): | |
| try: | |
| # Parallel extraction | |
| entities_task = self.extract_entities(text) | |
| temporal_task = self.extract_temporal(text) | |
| locations_task = self.extract_locations(text) | |
| # Gather results | |
| entities, temporal, locations = await asyncio.gather( | |
| entities_task, temporal_task, locations_task | |
| ) | |
| # Merge location results | |
| entities['locations'] = locations | |
| entities['temporal'] = temporal | |
| # Calculate initial confidence | |
| confidence = min(1.0, ( | |
| 0.2 * bool(entities["people"]) + | |
| 0.2 * bool(entities["organizations"]) + | |
| 0.3 * bool(entities["locations"]) + | |
| 0.3 * bool(temporal) | |
| )) | |
| # Find related events | |
| related_events = self.relationship_engine.find_related_events({ | |
| 'text': text, | |
| 'entities': entities | |
| }) | |
| # Adjust confidence based on relationships | |
| if related_events: | |
| relationship_confidence = max( | |
| self.relationship_engine.calculate_relationship_confidence( | |
| {'entities': entities}, | |
| {'text': event[1]} # event[1] is the text field | |
| ) | |
| for event in related_events | |
| ) | |
| confidence = (confidence + relationship_confidence) / 2 | |
| result = { | |
| "text": text, | |
| "entities": entities, | |
| "confidence": confidence, | |
| "verification_needed": confidence < 0.6, | |
| "related_events": [ | |
| { | |
| "text": event[1], | |
| "timestamp": event[2], | |
| "confidence": event[3] | |
| } | |
| for event in related_events | |
| ] | |
| } | |
| # Store event if confidence is sufficient | |
| if confidence >= 0.6: | |
| self.relationship_engine.conn.execute( | |
| 'INSERT INTO events (text, timestamp, confidence) VALUES (?, ?, ?)', | |
| (text, datetime.now().isoformat(), confidence) | |
| ) | |
| self.relationship_engine.conn.commit() | |
| return result | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # Initialize analyzer | |
| analyzer = EventAnalyzer() | |
| # Custom CSS for UI | |
| css = """ | |
| .container { max-width: 1200px; margin: auto; padding: 20px; } | |
| .results { padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin-top: 20px; } | |
| .confidence-high { color: #22c55e; font-weight: bold; } | |
| .confidence-low { color: #f97316; font-weight: bold; } | |
| .entity-section { margin: 15px 0; } | |
| .alert-warning { background: #fff3cd; padding: 10px; border-radius: 5px; margin: 10px 0; } | |
| .alert-success { background: #d1fae5; padding: 10px; border-radius: 5px; margin: 10px 0; } | |
| .related-events { background: #f3f4f6; padding: 15px; border-radius: 5px; margin-top: 15px; } | |
| """ | |
| def format_results(analysis_result): | |
| if "error" in analysis_result: | |
| return f"<div style='color: red'>Error: {analysis_result['error']}</div>" | |
| confidence_class = "confidence-high" if analysis_result["confidence"] >= 0.6 else "confidence-low" | |
| html = f""" | |
| <div class="results"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;"> | |
| <h3 style="margin: 0;">Analysis Results</h3> | |
| <div> | |
| Confidence Score: <span class="{confidence_class}">{int(analysis_result['confidence'] * 100)}%</span> | |
| </div> | |
| </div> | |
| {f''' | |
| <div class="alert-warning"> | |
| β οΈ <strong>Verification Required:</strong> Low confidence score detected. Please verify the extracted information. | |
| </div> | |
| ''' if analysis_result["verification_needed"] else ''} | |
| <div class="entity-section"> | |
| <h4>π€ People Detected</h4> | |
| <ul>{''.join(f'<li>{person}</li>' for person in analysis_result['entities']['people']) or '<li>None detected</li>'}</ul> | |
| </div> | |
| <div class="entity-section"> | |
| <h4>π’ Organizations</h4> | |
| <ul>{''.join(f'<li>{org}</li>' for org in analysis_result['entities']['organizations']) or '<li>None detected</li>'}</ul> | |
| </div> | |
| <div class="entity-section"> | |
| <h4>π Locations</h4> | |
| <ul>{''.join(f'<li>{loc}</li>' for loc in analysis_result['entities']['locations']) or '<li>None detected</li>'}</ul> | |
| </div> | |
| <div class="entity-section"> | |
| <h4>π Temporal References</h4> | |
| <ul>{''.join(f'<li>{time}</li>' for time in analysis_result['entities']['temporal']) or '<li>None detected</li>'}</ul> | |
| </div> | |
| <div class="entity-section"> | |
| <h4># Hashtags</h4> | |
| <ul>{''.join(f'<li>{tag}</li>' for tag in analysis_result['entities']['hashtags']) or '<li>None detected</li>'}</ul> | |
| </div> | |
| {f''' | |
| <div class="alert-success"> | |
| β <strong>Event Validated:</strong> The extracted information meets confidence thresholds. | |
| </div> | |
| ''' if not analysis_result["verification_needed"] else ''} | |
| {f''' | |
| <div class="related-events"> | |
| <h4>Related Events</h4> | |
| <ul> | |
| {''.join(f'<li>{event["text"]} ({event["timestamp"]}) - Confidence: {int(event["confidence"] * 100)}%</li>' for event in analysis_result['related_events'])} | |
| </ul> | |
| </div> | |
| ''' if analysis_result.get('related_events') else ''} | |
| </div> | |
| """ | |
| return html | |
| async def process_input(text): | |
| result = await analyzer.analyze_event(text) | |
| return format_results(result) | |
| demo = gr.Interface( | |
| fn=process_input, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Event Text", | |
| placeholder="Enter text to analyze (e.g., 'John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech')", | |
| lines=3 | |
| ) | |
| ], | |
| outputs=gr.HTML(), | |
| title="DoD Event Analysis System", | |
| description="Analyze text to extract entities, assess confidence, and identify key event information with relationship tracking.", | |
| css=css, | |
| theme=gr.themes.Soft(), | |
| examples=[ | |
| ["John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech"], | |
| ["Sarah Johnson and Mike Smith from Defense Systems Inc. are conducting training in Norfolk, VA on June 15th #defense #training"], | |
| ["Team meeting at headquarters with @commander_smith at 0900 #briefing"] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | 
