# app.py from transformers import AutoTokenizer, AutoModelForMaskedLM import torch import gradio as gr import re from pathlib import Path import spaces @spaces.GPU def mask_and_predict(text: str, selected_roles: list[str]): MASK_TOKEN = tokenizer.mask_token or "[MASK]" results = [] masked_text = text token_ids = tokenizer.encode(text, return_tensors="pt").cuda() for role in selected_roles: role_pattern = re.escape(role) masked_text = re.sub(role_pattern, MASK_TOKEN, masked_text) masked_ids = tokenizer.encode(masked_text, return_tensors="pt").cuda() with torch.no_grad(): outputs = model(input_ids=masked_ids) logits = outputs.logits[0] predictions = torch.argmax(logits, dim=-1) original_ids = tokenizer.convert_ids_to_tokens(token_ids[0]) predicted_ids = tokenizer.convert_ids_to_tokens(predictions) masked_ids_tokens = tokenizer.convert_ids_to_tokens(masked_ids[0]) for i, token in enumerate(masked_ids_tokens): if token == MASK_TOKEN: results.append({ "Position": i, "Masked Token": MASK_TOKEN, "Predicted": predicted_ids[i], "Original": original_ids[i] if i < len(original_ids) else "", "Match": "✅" if predicted_ids[i] == original_ids[i] else "❌" }) accuracy = sum(1 for r in results if r["Match"] == "✅") / max(len(results), 1) return results, f"Accuracy: {accuracy:.1%}" symbolic_roles = [ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "