dwb2023 commited on
Commit
4088373
·
verified ·
1 Parent(s): c0a677b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -31
app.py CHANGED
@@ -1,16 +1,117 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import json
 
 
 
 
 
4
 
5
- # Initialize NLP pipeline
6
  ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
 
7
 
8
- def analyze_event(text):
9
- try:
10
- # Process text with NER pipeline
11
- ner_results = ner_pipeline(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Group entities
14
  entities = {
15
  "people": [],
16
  "organizations": [],
@@ -25,33 +126,99 @@ def analyze_event(text):
25
  entities["organizations"].append(item["word"])
26
  elif item["entity"].endswith("LOC"):
27
  entities["locations"].append(item["word"])
28
-
29
- # Calculate confidence
30
- confidence = min(1.0, (
31
- 0.2 * bool(entities["people"]) +
32
- 0.2 * bool(entities["organizations"]) +
33
- 0.3 * bool(entities["locations"]) +
34
- 0.3 * bool(entities["hashtags"])
35
- ))
36
-
37
- return {
38
- "text": text,
39
- "entities": entities,
40
- "confidence": confidence,
41
- "verification_needed": confidence < 0.6
42
- }
43
- except Exception as e:
44
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Create Gradio interface with custom CSS and HTML
47
  css = """
48
- .container { max-width: 800px; margin: auto; padding: 20px; }
49
  .results { padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin-top: 20px; }
50
  .confidence-high { color: #22c55e; font-weight: bold; }
51
  .confidence-low { color: #f97316; font-weight: bold; }
52
  .entity-section { margin: 15px 0; }
53
  .alert-warning { background: #fff3cd; padding: 10px; border-radius: 5px; margin: 10px 0; }
54
  .alert-success { background: #d1fae5; padding: 10px; border-radius: 5px; margin: 10px 0; }
 
55
  """
56
 
57
  def format_results(analysis_result):
@@ -90,6 +257,11 @@ def format_results(analysis_result):
90
  <ul>{''.join(f'<li>{loc}</li>' for loc in analysis_result['entities']['locations']) or '<li>None detected</li>'}</ul>
91
  </div>
92
 
 
 
 
 
 
93
  <div class="entity-section">
94
  <h4># Hashtags</h4>
95
  <ul>{''.join(f'<li>{tag}</li>' for tag in analysis_result['entities']['hashtags']) or '<li>None detected</li>'}</ul>
@@ -100,28 +272,41 @@ def format_results(analysis_result):
100
  ✅ <strong>Event Validated:</strong> The extracted information meets confidence thresholds.
101
  </div>
102
  ''' if not analysis_result["verification_needed"] else ''}
 
 
 
 
 
 
 
 
 
103
  </div>
104
  """
105
  return html
106
 
 
 
 
 
107
  demo = gr.Interface(
108
- fn=lambda text: format_results(analyze_event(text)),
109
  inputs=[
110
  gr.Textbox(
111
  label="Event Text",
112
- placeholder="Enter text to analyze (e.g., 'John from Tech Corp. is attending the meeting in Washington, DC #tech')",
113
  lines=3
114
  )
115
  ],
116
  outputs=gr.HTML(),
117
  title="DoD Event Analysis System",
118
- description="Analyze text to extract entities, assess confidence, and identify key event information.",
119
  css=css,
120
  theme=gr.themes.Soft(),
121
  examples=[
122
- ["John from Tech Corp. is attending the meeting in Washington, DC tomorrow #tech"],
123
- ["Sarah Johnson and Mike Smith from Defense Systems Inc. are conducting training in Norfolk, VA #defense #training"],
124
- ["Team meeting at headquarters with @commander_smith #briefing"]
125
  ]
126
  )
127
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import json
4
+ from datetime import datetime
5
+ import sqlite3
6
+ import asyncio
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ import re
9
 
10
+ # Initialize NLP pipelines
11
  ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
12
+ classifier = pipeline("zero-shot-classification")
13
 
14
+ class OntologyRegistry:
15
+ def __init__(self):
16
+ self.temporal_patterns = [
17
+ r'\b\d{1,2}:\d{2}\s*(?:AM|PM|am|pm)?\b',
18
+ r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2}(?:st|nd|rd|th)?,? \d{4}\b',
19
+ r'\btomorrow\b',
20
+ r'\bin \d+ (?:days?|weeks?|months?)\b'
21
+ ]
22
+
23
+ self.location_patterns = [
24
+ r'\b(?:in|at|from|to) ([A-Z][a-zA-Z]+(,? [A-Z]{2})?)\b',
25
+ r'\b[A-Z][a-zA-Z]+ Base\b',
26
+ r'\bHeadquarters\b',
27
+ r'\bHQ\b'
28
+ ]
29
+
30
+ self.entity_types = {
31
+ 'PER': 'person',
32
+ 'ORG': 'organization',
33
+ 'LOC': 'location',
34
+ 'MISC': 'miscellaneous'
35
+ }
36
+
37
+ def validate_pattern(self, text, pattern_type):
38
+ patterns = getattr(self, f"{pattern_type}_patterns", [])
39
+ matches = []
40
+ for pattern in patterns:
41
+ matches.extend(re.finditer(pattern, text))
42
+ return [m.group() for m in matches]
43
+
44
+ class RelationshipEngine:
45
+ def __init__(self, db_path=':memory:'):
46
+ self.conn = sqlite3.connect(db_path)
47
+ self.setup_database()
48
+
49
+ def setup_database(self):
50
+ self.conn.execute('''
51
+ CREATE TABLE IF NOT EXISTS events (
52
+ id INTEGER PRIMARY KEY,
53
+ text TEXT,
54
+ timestamp DATETIME,
55
+ confidence REAL
56
+ )
57
+ ''')
58
+
59
+ self.conn.execute('''
60
+ CREATE TABLE IF NOT EXISTS relationships (
61
+ id INTEGER PRIMARY KEY,
62
+ source_event_id INTEGER,
63
+ target_event_id INTEGER,
64
+ relationship_type TEXT,
65
+ confidence REAL,
66
+ FOREIGN KEY (source_event_id) REFERENCES events(id),
67
+ FOREIGN KEY (target_event_id) REFERENCES events(id)
68
+ )
69
+ ''')
70
+ self.conn.commit()
71
+
72
+ def find_related_events(self, event_data):
73
+ # Find events with similar entities
74
+ cursor = self.conn.execute('''
75
+ SELECT * FROM events
76
+ WHERE text LIKE ?
77
+ ORDER BY timestamp DESC
78
+ LIMIT 5
79
+ ''', (f"%{event_data.get('text', '')}%",))
80
+
81
+ related_events = cursor.fetchall()
82
+ return related_events
83
+
84
+ def calculate_relationship_confidence(self, event1, event2):
85
+ # Simple similarity-based confidence
86
+ base_confidence = 0.0
87
+
88
+ # Entity overlap increases confidence
89
+ if set(event1.get('entities', {}).get('people', [])) & set(event2.get('entities', {}).get('people', [])):
90
+ base_confidence += 0.3
91
+
92
+ if set(event1.get('entities', {}).get('organizations', [])) & set(event2.get('entities', {}).get('organizations', [])):
93
+ base_confidence += 0.3
94
+
95
+ if set(event1.get('entities', {}).get('locations', [])) & set(event2.get('entities', {}).get('locations', [])):
96
+ base_confidence += 0.4
97
+
98
+ return min(base_confidence, 1.0)
99
+
100
+ class EventAnalyzer:
101
+ def __init__(self):
102
+ self.ontology = OntologyRegistry()
103
+ self.relationship_engine = RelationshipEngine()
104
+ self.executor = ThreadPoolExecutor(max_workers=3)
105
+
106
+ async def extract_entities(self, text):
107
+ def _extract():
108
+ return ner_pipeline(text)
109
+
110
+ # Run NER in thread pool
111
+ ner_results = await asyncio.get_event_loop().run_in_executor(
112
+ self.executor, _extract
113
+ )
114
 
 
115
  entities = {
116
  "people": [],
117
  "organizations": [],
 
126
  entities["organizations"].append(item["word"])
127
  elif item["entity"].endswith("LOC"):
128
  entities["locations"].append(item["word"])
129
+
130
+ return entities
131
+
132
+ async def extract_temporal(self, text):
133
+ return self.ontology.validate_pattern(text, 'temporal')
134
+
135
+ async def extract_locations(self, text):
136
+ ml_locations = [loc for loc in await self.extract_entities(text).get('locations', [])]
137
+ pattern_locations = self.ontology.validate_pattern(text, 'location')
138
+ return list(set(ml_locations + pattern_locations))
139
+
140
+ async def analyze_event(self, text):
141
+ try:
142
+ # Parallel extraction
143
+ entities_task = self.extract_entities(text)
144
+ temporal_task = self.extract_temporal(text)
145
+ locations_task = self.extract_locations(text)
146
+
147
+ # Gather results
148
+ entities, temporal, locations = await asyncio.gather(
149
+ entities_task, temporal_task, locations_task
150
+ )
151
+
152
+ # Merge location results
153
+ entities['locations'] = locations
154
+ entities['temporal'] = temporal
155
+
156
+ # Calculate initial confidence
157
+ confidence = min(1.0, (
158
+ 0.2 * bool(entities["people"]) +
159
+ 0.2 * bool(entities["organizations"]) +
160
+ 0.3 * bool(entities["locations"]) +
161
+ 0.3 * bool(temporal)
162
+ ))
163
+
164
+ # Find related events
165
+ related_events = self.relationship_engine.find_related_events({
166
+ 'text': text,
167
+ 'entities': entities
168
+ })
169
+
170
+ # Adjust confidence based on relationships
171
+ if related_events:
172
+ relationship_confidence = max(
173
+ self.relationship_engine.calculate_relationship_confidence(
174
+ {'entities': entities},
175
+ {'text': event[1]} # event[1] is the text field
176
+ )
177
+ for event in related_events
178
+ )
179
+ confidence = (confidence + relationship_confidence) / 2
180
+
181
+ result = {
182
+ "text": text,
183
+ "entities": entities,
184
+ "confidence": confidence,
185
+ "verification_needed": confidence < 0.6,
186
+ "related_events": [
187
+ {
188
+ "text": event[1],
189
+ "timestamp": event[2],
190
+ "confidence": event[3]
191
+ }
192
+ for event in related_events
193
+ ]
194
+ }
195
+
196
+ # Store event if confidence is sufficient
197
+ if confidence >= 0.6:
198
+ self.relationship_engine.conn.execute(
199
+ 'INSERT INTO events (text, timestamp, confidence) VALUES (?, ?, ?)',
200
+ (text, datetime.now().isoformat(), confidence)
201
+ )
202
+ self.relationship_engine.conn.commit()
203
+
204
+ return result
205
+
206
+ except Exception as e:
207
+ return {"error": str(e)}
208
+
209
+ # Initialize analyzer
210
+ analyzer = EventAnalyzer()
211
 
212
+ # Custom CSS for UI
213
  css = """
214
+ .container { max-width: 1200px; margin: auto; padding: 20px; }
215
  .results { padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin-top: 20px; }
216
  .confidence-high { color: #22c55e; font-weight: bold; }
217
  .confidence-low { color: #f97316; font-weight: bold; }
218
  .entity-section { margin: 15px 0; }
219
  .alert-warning { background: #fff3cd; padding: 10px; border-radius: 5px; margin: 10px 0; }
220
  .alert-success { background: #d1fae5; padding: 10px; border-radius: 5px; margin: 10px 0; }
221
+ .related-events { background: #f3f4f6; padding: 15px; border-radius: 5px; margin-top: 15px; }
222
  """
223
 
224
  def format_results(analysis_result):
 
257
  <ul>{''.join(f'<li>{loc}</li>' for loc in analysis_result['entities']['locations']) or '<li>None detected</li>'}</ul>
258
  </div>
259
 
260
+ <div class="entity-section">
261
+ <h4>🕒 Temporal References</h4>
262
+ <ul>{''.join(f'<li>{time}</li>' for time in analysis_result['entities']['temporal']) or '<li>None detected</li>'}</ul>
263
+ </div>
264
+
265
  <div class="entity-section">
266
  <h4># Hashtags</h4>
267
  <ul>{''.join(f'<li>{tag}</li>' for tag in analysis_result['entities']['hashtags']) or '<li>None detected</li>'}</ul>
 
272
  ✅ <strong>Event Validated:</strong> The extracted information meets confidence thresholds.
273
  </div>
274
  ''' if not analysis_result["verification_needed"] else ''}
275
+
276
+ {f'''
277
+ <div class="related-events">
278
+ <h4>Related Events</h4>
279
+ <ul>
280
+ {''.join(f'<li>{event["text"]} ({event["timestamp"]}) - Confidence: {int(event["confidence"] * 100)}%</li>' for event in analysis_result['related_events'])}
281
+ </ul>
282
+ </div>
283
+ ''' if analysis_result.get('related_events') else ''}
284
  </div>
285
  """
286
  return html
287
 
288
+ async def process_input(text):
289
+ result = await analyzer.analyze_event(text)
290
+ return format_results(result)
291
+
292
  demo = gr.Interface(
293
+ fn=process_input,
294
  inputs=[
295
  gr.Textbox(
296
  label="Event Text",
297
+ placeholder="Enter text to analyze (e.g., 'John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech')",
298
  lines=3
299
  )
300
  ],
301
  outputs=gr.HTML(),
302
  title="DoD Event Analysis System",
303
+ description="Analyze text to extract entities, assess confidence, and identify key event information with relationship tracking.",
304
  css=css,
305
  theme=gr.themes.Soft(),
306
  examples=[
307
+ ["John from Tech Corp. is attending the meeting in Washington, DC tomorrow at 14:30 #tech"],
308
+ ["Sarah Johnson and Mike Smith from Defense Systems Inc. are conducting training in Norfolk, VA on June 15th #defense #training"],
309
+ ["Team meeting at headquarters with @commander_smith at 0900 #briefing"]
310
  ]
311
  )
312