AbstractPhil commited on
Commit
0cba5a9
Β·
verified Β·
1 Parent(s): b235205

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +422 -97
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – encoder-only demo for bert-beatrix-2048
2
  # launch: python app.py
3
  # -----------------------------------------------
4
  import json, re, sys, math
@@ -40,12 +40,6 @@ with cfg_path.open("w") as f: json.dump(cfg,f,indent=2)
40
  handler, full_model, tokenizer = create_handler_from_checkpoint(LOCAL_CKPT)
41
  full_model = full_model.eval().cuda()
42
 
43
- encoder = full_model.bert.encoder
44
- embeddings = full_model.bert.embeddings
45
- emb_ln = full_model.bert.emb_ln
46
- emb_drop = full_model.bert.emb_drop
47
- mlm_head = full_model.cls # prediction head
48
-
49
  # ------------------------------------------------------------------
50
  # 2. Symbolic roles -------------------------------------------------
51
  SYMBOLIC_ROLES = [
@@ -56,112 +50,443 @@ SYMBOLIC_ROLES = [
56
  "<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
57
  "<fabric>", "<jewelry>",
58
  ]
59
- if any(tokenizer.convert_tokens_to_ids(t)==tokenizer.unk_token_id
60
- for t in SYMBOLIC_ROLES):
61
- sys.exit("❌ tokenizer missing special tokens")
62
 
63
- # Quick helpers
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  MASK = tokenizer.mask_token
 
 
 
65
 
66
 
67
  # ------------------------------------------------------------------
68
- # 3. Encoder-plus-MLM logic ---------------------------------------
69
- def cosine(a,b):
70
- return torch.nn.functional.cosine_similarity(a,b,dim=-1)
71
 
72
- def pool_accuracy(ids, logits, pool_mask):
73
  """
74
- ids : (S,) gold token ids
75
- logits : (S,V) MLM logits
76
- pool_mask : bool (S,) which tokens belong to the candidate pool
77
- returns accuracy over masked positions only (if none, return 0)
 
 
 
 
 
 
78
  """
79
- idx = pool_mask.nonzero(as_tuple=False).flatten()
80
- if idx.numel()==0: return 0.0
81
- preds = logits.argmax(-1)[idx]
82
- gold = ids[idx]
83
- return (preds==gold).float().mean().item()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  @spaces.GPU
87
- def encode_and_trace(text, selected_roles):
 
 
 
88
  if not selected_roles:
89
- selected_roles = SYMBOLIC_ROLES
90
-
91
- # Convert symbolic role tokens to IDs
92
- sel_ids = [tokenizer.convert_tokens_to_ids(t) for t in selected_roles]
93
- sel_ids_tensor = torch.tensor(sel_ids, device="cuda").unsqueeze(0) # shape: (1, R)
94
-
95
- # Tokenize user prompt
96
- batch = tokenizer(text, return_tensors="pt").to("cuda")
97
- input_ids, attention_mask = batch.input_ids, batch.attention_mask
98
- S = input_ids.shape[1]
99
-
100
- # === Shared encoder logic with RoPE ===
101
- def encode(input_ids, attn_mask):
102
- x = embeddings(input_ids) # (B, S, H)
103
- if emb_ln: x = emb_ln(x)
104
- if emb_drop: x = emb_drop(x)
105
- ext = full_model.bert.get_extended_attention_mask(attn_mask, input_ids.shape)
106
- return encoder(x, attention_mask=ext)[0] # (B, S, H)
107
-
108
- # Encode prompt
109
- encoded_prompt = encode(input_ids, attention_mask)[0] # (S, H)
110
-
111
- # Encode symbolic roles through same pipeline
112
- symbolic_attn = torch.ones_like(sel_ids_tensor)
113
- encoded_roles = encode(sel_ids_tensor, symbolic_attn)[0] # (R, H)
114
-
115
- # === Symbolic classification via cosine similarity ===
116
- # Compare each token to each symbolic role β†’ shape: (S, R)
117
- token_exp = encoded_prompt.unsqueeze(1).expand(-1, encoded_roles.size(0), -1) # (S, R, H)
118
- role_exp = encoded_roles.unsqueeze(0).expand(encoded_prompt.size(0), -1, -1) # (S, R, H)
119
- sim = F.cosine_similarity(token_exp, role_exp, dim=-1) # β†’ (S, R)
120
-
121
- argmax_ids = sim.argmax(dim=-1) # (S,)
122
- max_scores = sim.max(dim=-1).values # (S,)
123
- predicted_roles = [selected_roles[i] for i in argmax_ids.tolist()]
124
- decoded_tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
125
-
126
- # === Build readable trace
127
- role_trace = [
128
- f"{tok:<15} β†’ {role:<22} score={score:.4f}"
129
- for tok, role, score in zip(decoded_tokens, predicted_roles, max_scores.tolist())
130
- ]
131
-
132
- # === Final output
133
- res_json = {
134
- "Prompt": text,
135
- "Predicted symbolic roles": predicted_roles,
136
- "Max alignment score": f"{max_scores.max().item():.4f}",
137
- "Per-token classification": role_trace
138
- }
139
-
140
- return json.dumps(res_json, indent=2), f"{max_scores.max().item():.4f}", len(selected_roles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
 
143
  # ------------------------------------------------------------------
144
- # 4. Gradio UI -----------------------------------------------------
145
  def build_interface():
146
- with gr.Blocks(title="🧠 Symbolic Encoder Inspector") as demo:
147
- gr.Markdown("## 🧠 Symbolic Encoder Inspector")
148
-
149
- with gr.Row():
150
- with gr.Column():
151
- txt = gr.Textbox(label="Prompt", lines=3)
152
- roles= gr.CheckboxGroup(
153
- choices=SYMBOLIC_ROLES, label="Roles",
154
- value=SYMBOLIC_ROLES # pre-checked
155
- )
156
- btn = gr.Button("Run")
157
- with gr.Column():
158
- out_json = gr.Textbox(label="Result JSON")
159
- out_max = gr.Textbox(label="Max cos")
160
- out_cnt = gr.Textbox(label="# roles")
161
-
162
- btn.click(encode_and_trace, [txt,roles], [out_json,out_max,out_cnt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  return demo
164
 
165
 
166
- if __name__=="__main__":
167
- build_interface().launch()
 
 
 
 
 
 
 
 
 
1
+ # app.py – FIXED encoder-only demo for bert-beatrix-2048
2
  # launch: python app.py
3
  # -----------------------------------------------
4
  import json, re, sys, math
 
40
  handler, full_model, tokenizer = create_handler_from_checkpoint(LOCAL_CKPT)
41
  full_model = full_model.eval().cuda()
42
 
 
 
 
 
 
 
43
  # ------------------------------------------------------------------
44
  # 2. Symbolic roles -------------------------------------------------
45
  SYMBOLIC_ROLES = [
 
50
  "<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
51
  "<fabric>", "<jewelry>",
52
  ]
 
 
 
53
 
54
+ # Verify all symbolic tokens exist in tokenizer
55
+ missing_tokens = []
56
+ symbolic_token_ids = {}
57
+ for token in SYMBOLIC_ROLES:
58
+ token_id = tokenizer.convert_tokens_to_ids(token)
59
+ if token_id == tokenizer.unk_token_id:
60
+ missing_tokens.append(token)
61
+ else:
62
+ symbolic_token_ids[token] = token_id
63
+
64
+ if missing_tokens:
65
+ print(f"⚠️ Missing symbolic tokens: {missing_tokens}")
66
+ print("Available tokens will be used for classification")
67
+
68
  MASK = tokenizer.mask_token
69
+ MASK_ID = tokenizer.mask_token_id
70
+
71
+ print(f"βœ… Loaded {len(symbolic_token_ids)} symbolic tokens")
72
 
73
 
74
  # ------------------------------------------------------------------
75
+ # 3. FIXED MLM-based symbolic classification ----------------------
 
 
76
 
77
+ def get_symbolic_predictions(input_ids, attention_mask, mask_positions, selected_roles):
78
  """
79
+ Proper MLM-based prediction for symbolic tokens at masked positions
80
+
81
+ Args:
82
+ input_ids: (B, S) token IDs with [MASK] at positions to classify
83
+ attention_mask: (B, S) attention mask
84
+ mask_positions: list of positions that are masked
85
+ selected_roles: list of symbolic role tokens to consider
86
+
87
+ Returns:
88
+ predictions and probabilities for each masked position
89
  """
90
+ # Get MLM logits from the model (this is what it was trained for)
91
+ with torch.no_grad():
92
+ outputs = full_model(input_ids=input_ids, attention_mask=attention_mask)
93
+ logits = outputs.logits # (B, S, V)
94
+
95
+ # Filter to only selected symbolic role token IDs
96
+ selected_token_ids = [symbolic_token_ids[role] for role in selected_roles
97
+ if role in symbolic_token_ids]
98
+
99
+ if not selected_token_ids:
100
+ return [], []
101
+
102
+ results = []
103
+
104
+ for pos in mask_positions:
105
+ # Get logits for this masked position
106
+ pos_logits = logits[0, pos] # (V,)
107
+
108
+ # Extract logits for symbolic tokens only
109
+ symbolic_logits = pos_logits[selected_token_ids] # (num_symbolic,)
110
+
111
+ # Apply softmax to get probabilities
112
+ symbolic_probs = F.softmax(symbolic_logits, dim=-1)
113
+
114
+ # Get top predictions
115
+ top_indices = torch.argsort(symbolic_probs, descending=True)
116
+
117
+ pos_results = []
118
+ for i in top_indices:
119
+ token_idx = selected_token_ids[i]
120
+ token = tokenizer.convert_ids_to_tokens([token_idx])[0]
121
+ prob = symbolic_probs[i].item()
122
+ pos_results.append({
123
+ "token": token,
124
+ "probability": prob,
125
+ "token_id": token_idx
126
+ })
127
+
128
+ results.append({
129
+ "position": pos,
130
+ "predictions": pos_results
131
+ })
132
+
133
+ return results
134
+
135
+
136
+ def create_strategic_masks(text, tokenizer, strategy="content_words"):
137
+ """
138
+ Create strategic mask positions based on different strategies
139
+
140
+ Args:
141
+ text: input text
142
+ tokenizer: tokenizer
143
+ strategy: masking strategy
144
+
145
+ Returns:
146
+ input_ids with masks, attention_mask, original_tokens, mask_positions
147
+ """
148
+ # Tokenize original text
149
+ batch = tokenizer(text, return_tensors="pt", add_special_tokens=True)
150
+ input_ids = batch.input_ids[0] # (S,)
151
+ attention_mask = batch.attention_mask[0] # (S,)
152
+
153
+ # Get original tokens for reference
154
+ original_tokens = tokenizer.convert_ids_to_tokens(input_ids)
155
+
156
+ # Find positions to mask based on strategy
157
+ mask_positions = []
158
+
159
+ if strategy == "content_words":
160
+ # Mask content words (avoid special tokens, punctuation, common words)
161
+ skip_tokens = {
162
+ tokenizer.cls_token, tokenizer.sep_token, tokenizer.pad_token,
163
+ ".", ",", "!", "?", ":", ";", "'", '"', "-", "(", ")", "[", "]",
164
+ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to",
165
+ "for", "of", "with", "by", "is", "are", "was", "were", "be", "been"
166
+ }
167
+
168
+ for i, token in enumerate(original_tokens):
169
+ if (token not in skip_tokens and
170
+ not token.startswith("##") and # avoid subword tokens
171
+ len(token) > 2 and
172
+ token.isalpha()):
173
+ mask_positions.append(i)
174
+
175
+ elif strategy == "every_nth":
176
+ # Mask every 3rd token (avoiding special tokens)
177
+ for i in range(1, len(original_tokens) - 1, 3): # skip CLS and SEP
178
+ mask_positions.append(i)
179
+
180
+ elif strategy == "random":
181
+ # Randomly mask 15% of tokens
182
+ import random
183
+ candidates = list(range(1, len(original_tokens) - 1)) # skip CLS and SEP
184
+ num_to_mask = max(1, int(len(candidates) * 0.15))
185
+ mask_positions = random.sample(candidates, min(num_to_mask, len(candidates)))
186
+ mask_positions.sort()
187
+
188
+ elif strategy == "manual":
189
+ # For manual specification - return original for now
190
+ # Users can specify positions in the UI
191
+ pass
192
+
193
+ # Limit to reasonable number of masks
194
+ mask_positions = mask_positions[:10] # Max 10 masks for UI clarity
195
+
196
+ # Create masked input
197
+ masked_input_ids = input_ids.clone()
198
+ for pos in mask_positions:
199
+ masked_input_ids[pos] = MASK_ID
200
+
201
+ return masked_input_ids.unsqueeze(0), attention_mask.unsqueeze(0), original_tokens, mask_positions
202
 
203
 
204
  @spaces.GPU
205
+ def symbolic_classification_analysis(text, selected_roles, masking_strategy="content_words", num_predictions=5):
206
+ """
207
+ Perform symbolic classification analysis using MLM prediction
208
+ """
209
  if not selected_roles:
210
+ selected_roles = list(symbolic_token_ids.keys())
211
+
212
+ if not text.strip():
213
+ return "Please enter some text to analyze.", "", 0
214
+
215
+ try:
216
+ # Create strategically masked input
217
+ masked_input_ids, attention_mask, original_tokens, mask_positions = create_strategic_masks(
218
+ text, tokenizer, masking_strategy
219
+ )
220
+
221
+ if not mask_positions:
222
+ return "No suitable positions found for masking. Try different text or strategy.", "", 0
223
+
224
+ # Move to device
225
+ masked_input_ids = masked_input_ids.to("cuda")
226
+ attention_mask = attention_mask.to("cuda")
227
+
228
+ # Get symbolic predictions
229
+ predictions = get_symbolic_predictions(
230
+ masked_input_ids, attention_mask, mask_positions, selected_roles
231
+ )
232
+
233
+ # Build detailed analysis
234
+ analysis = {
235
+ "input_text": text,
236
+ "masking_strategy": masking_strategy,
237
+ "total_tokens": len(original_tokens),
238
+ "masked_positions": len(mask_positions),
239
+ "available_symbolic_roles": len(selected_roles),
240
+ "analysis_results": []
241
+ }
242
+
243
+ for pred_data in predictions:
244
+ pos = pred_data["position"]
245
+ original_token = original_tokens[pos]
246
+
247
+ # Show top N predictions
248
+ top_preds = pred_data["predictions"][:num_predictions]
249
+
250
+ position_analysis = {
251
+ "position": pos,
252
+ "original_token": original_token,
253
+ "top_predictions": []
254
+ }
255
+
256
+ for pred in top_preds:
257
+ position_analysis["top_predictions"].append({
258
+ "symbolic_role": pred["token"],
259
+ "probability": f"{pred['probability']:.4f}",
260
+ "confidence": "High" if pred["probability"] > 0.3 else "Medium" if pred["probability"] > 0.1 else "Low"
261
+ })
262
+
263
+ analysis["analysis_results"].append(position_analysis)
264
+
265
+ # Create readable summary
266
+ summary_lines = []
267
+ max_prob = 0
268
+ best_prediction = None
269
+
270
+ for result in analysis["analysis_results"]:
271
+ pos = result["position"]
272
+ orig = result["original_token"]
273
+ top_pred = result["top_predictions"][0] if result["top_predictions"] else None
274
+
275
+ if top_pred:
276
+ prob = float(top_pred["probability"])
277
+ role = top_pred["symbolic_role"]
278
+ summary_lines.append(
279
+ f"Position {pos:2d}: '{orig}' β†’ {role} ({top_pred['probability']}, {top_pred['confidence']})"
280
+ )
281
+
282
+ if prob > max_prob:
283
+ max_prob = prob
284
+ best_prediction = f"{role} (confidence: {top_pred['confidence']})"
285
+
286
+ summary = "\n".join(summary_lines)
287
+ if best_prediction:
288
+ summary = f"🎯 Best Match: {best_prediction}\n\n" + summary
289
+
290
+ return json.dumps(analysis, indent=2), summary, len(mask_positions)
291
+
292
+ except Exception as e:
293
+ error_msg = f"Error during analysis: {str(e)}"
294
+ print(error_msg)
295
+ return error_msg, "", 0
296
+
297
+
298
+ def create_manual_mask_analysis(text, mask_positions_str, selected_roles):
299
+ """
300
+ Allow manual specification of mask positions
301
+ """
302
+ try:
303
+ # Parse mask positions
304
+ mask_positions = [int(x.strip()) for x in mask_positions_str.split(",") if x.strip().isdigit()]
305
+
306
+ if not mask_positions:
307
+ return "Please specify valid mask positions (comma-separated numbers)", "", 0
308
+
309
+ # Tokenize text
310
+ batch = tokenizer(text, return_tensors="pt", add_special_tokens=True)
311
+ input_ids = batch.input_ids[0]
312
+ attention_mask = batch.attention_mask[0]
313
+ original_tokens = tokenizer.convert_ids_to_tokens(input_ids)
314
+
315
+ # Validate positions
316
+ valid_positions = [pos for pos in mask_positions if 0 <= pos < len(input_ids)]
317
+ if not valid_positions:
318
+ return f"Invalid positions. Text has {len(input_ids)} tokens (0-{len(input_ids)-1})", "", 0
319
+
320
+ # Create masked input
321
+ masked_input_ids = input_ids.clone()
322
+ for pos in valid_positions:
323
+ masked_input_ids[pos] = MASK_ID
324
+
325
+ # Run analysis
326
+ masked_input_ids = masked_input_ids.unsqueeze(0).to("cuda")
327
+ attention_mask = attention_mask.unsqueeze(0).to("cuda")
328
+
329
+ predictions = get_symbolic_predictions(
330
+ masked_input_ids, attention_mask, valid_positions, selected_roles
331
+ )
332
+
333
+ # Format results
334
+ results = []
335
+ for pred_data in predictions:
336
+ pos = pred_data["position"]
337
+ original = original_tokens[pos]
338
+ top_pred = pred_data["predictions"][0] if pred_data["predictions"] else None
339
+
340
+ if top_pred:
341
+ results.append(
342
+ f"Pos {pos}: '{original}' β†’ {top_pred['token']} ({top_pred['probability']:.4f})"
343
+ )
344
+
345
+ return "\n".join(results), f"Analyzed {len(valid_positions)} positions", len(valid_positions)
346
+
347
+ except Exception as e:
348
+ return f"Error: {str(e)}", "", 0
349
 
350
 
351
  # ------------------------------------------------------------------
352
+ # 4. Gradio UI -----------------------------------------------------
353
  def build_interface():
354
+ with gr.Blocks(title="🧠 MLM Symbolic Classifier", theme=gr.themes.Soft()) as demo:
355
+ gr.Markdown("# 🧠 MLM-Based Symbolic Classification")
356
+ gr.Markdown("Analyze text using masked language modeling to predict symbolic roles at specific positions.")
357
+
358
+ with gr.Tab("Automatic Analysis"):
359
+ with gr.Row():
360
+ with gr.Column():
361
+ txt_input = gr.Textbox(
362
+ label="Input Text",
363
+ lines=4,
364
+ placeholder="Enter text to analyze for symbolic role classification..."
365
+ )
366
+
367
+ with gr.Row():
368
+ masking_strategy = gr.Dropdown(
369
+ choices=["content_words", "every_nth", "random"],
370
+ value="content_words",
371
+ label="Masking Strategy"
372
+ )
373
+ num_predictions = gr.Slider(
374
+ minimum=1, maximum=10, value=5, step=1,
375
+ label="Top Predictions per Position"
376
+ )
377
+
378
+ roles_selection = gr.CheckboxGroup(
379
+ choices=list(symbolic_token_ids.keys()),
380
+ value=list(symbolic_token_ids.keys()),
381
+ label="Symbolic Roles to Consider",
382
+ max_choices=len(symbolic_token_ids)
383
+ )
384
+
385
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
386
+
387
+ with gr.Column():
388
+ summary_output = gr.Textbox(
389
+ label="Analysis Summary",
390
+ lines=10,
391
+ max_lines=15
392
+ )
393
+
394
+ with gr.Row():
395
+ positions_analyzed = gr.Number(label="Positions Analyzed", precision=0)
396
+ max_confidence = gr.Textbox(label="Best Prediction", max_lines=1)
397
+
398
+ detailed_output = gr.JSON(label="Detailed Results")
399
+
400
+ with gr.Tab("Manual Masking"):
401
+ with gr.Row():
402
+ with gr.Column():
403
+ manual_text = gr.Textbox(
404
+ label="Input Text",
405
+ lines=3,
406
+ placeholder="Enter text for manual analysis..."
407
+ )
408
+
409
+ mask_positions_input = gr.Textbox(
410
+ label="Mask Positions (comma-separated)",
411
+ placeholder="e.g., 2,5,8,12",
412
+ info="Specify token positions to mask (0-based indexing)"
413
+ )
414
+
415
+ manual_roles = gr.CheckboxGroup(
416
+ choices=list(symbolic_token_ids.keys()),
417
+ value=list(symbolic_token_ids.keys())[:10], # Default to first 10
418
+ label="Symbolic Roles"
419
+ )
420
+
421
+ manual_analyze_btn = gr.Button("🎯 Analyze Specific Positions")
422
+
423
+ with gr.Column():
424
+ manual_results = gr.Textbox(
425
+ label="Manual Analysis Results",
426
+ lines=8
427
+ )
428
+
429
+ manual_summary = gr.Textbox(label="Summary")
430
+ manual_count = gr.Number(label="Positions", precision=0)
431
+
432
+ with gr.Tab("Token Inspector"):
433
+ with gr.Row():
434
+ with gr.Column():
435
+ inspect_text = gr.Textbox(
436
+ label="Text to Inspect",
437
+ lines=2,
438
+ placeholder="Enter text to see tokenization..."
439
+ )
440
+ inspect_btn = gr.Button("πŸ” Inspect Tokens")
441
+
442
+ with gr.Column():
443
+ token_breakdown = gr.Textbox(
444
+ label="Token Breakdown",
445
+ lines=8,
446
+ info="Shows how text is tokenized with position indices"
447
+ )
448
+
449
+ # Event handlers
450
+ analyze_btn.click(
451
+ symbolic_classification_analysis,
452
+ inputs=[txt_input, roles_selection, masking_strategy, num_predictions],
453
+ outputs=[detailed_output, summary_output, positions_analyzed]
454
+ )
455
+
456
+ manual_analyze_btn.click(
457
+ create_manual_mask_analysis,
458
+ inputs=[manual_text, mask_positions_input, manual_roles],
459
+ outputs=[manual_results, manual_summary, manual_count]
460
+ )
461
+
462
+ def inspect_tokens(text):
463
+ if not text.strip():
464
+ return "Enter text to inspect tokenization"
465
+
466
+ tokens = tokenizer.tokenize(text, add_special_tokens=True)
467
+ result_lines = []
468
+
469
+ for i, token in enumerate(tokens):
470
+ result_lines.append(f"{i:2d}: '{token}'")
471
+
472
+ return "\n".join(result_lines)
473
+
474
+ inspect_btn.click(
475
+ inspect_tokens,
476
+ inputs=[inspect_text],
477
+ outputs=[token_breakdown]
478
+ )
479
+
480
  return demo
481
 
482
 
483
+ if __name__ == "__main__":
484
+ print("πŸš€ Starting MLM Symbolic Classifier...")
485
+ print(f"βœ… Model loaded with {len(symbolic_token_ids)} symbolic tokens")
486
+ print(f"🎯 Available symbolic roles: {list(symbolic_token_ids.keys())[:5]}...")
487
+
488
+ build_interface().launch(
489
+ server_name="0.0.0.0",
490
+ server_port=7860,
491
+ share=True
492
+ )