Spaces:
Running
on
Zero
Running
on
Zero
# app.py | |
from transformers import AutoTokenizer, AutoModelForMaskedLM | |
import torch | |
import gradio as gr | |
import re | |
from pathlib import Path | |
import spaces | |
def mask_and_predict(text: str, selected_roles: list[str]): | |
MASK_TOKEN = tokenizer.mask_token or "[MASK]" | |
results = [] | |
masked_text = text | |
token_ids = tokenizer.encode(text, return_tensors="pt").cuda() | |
for role in selected_roles: | |
role_pattern = re.escape(role) | |
masked_text = re.sub(role_pattern, MASK_TOKEN, masked_text) | |
masked_ids = tokenizer.encode(masked_text, return_tensors="pt").cuda() | |
with torch.no_grad(): | |
outputs = model(input_ids=masked_ids) | |
logits = outputs.logits[0] | |
predictions = torch.argmax(logits, dim=-1) | |
original_ids = tokenizer.convert_ids_to_tokens(token_ids[0]) | |
predicted_ids = tokenizer.convert_ids_to_tokens(predictions) | |
masked_ids_tokens = tokenizer.convert_ids_to_tokens(masked_ids[0]) | |
for i, token in enumerate(masked_ids_tokens): | |
if token == MASK_TOKEN: | |
results.append({ | |
"Position": i, | |
"Masked Token": MASK_TOKEN, | |
"Predicted": predicted_ids[i], | |
"Original": original_ids[i] if i < len(original_ids) else "", | |
"Match": "β " if predicted_ids[i] == original_ids[i] else "β" | |
}) | |
accuracy = sum(1 for r in results if r["Match"] == "β ") / max(len(results), 1) | |
return results, f"Accuracy: {accuracy:.1%}" | |
symbolic_roles = [ | |
"<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>", | |
"<surface>", "<lighting>", "<material>", "<accessory>", "<footwear>", | |
"<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>", | |
"<texture>", "<pattern>", "<grid>", "<zone>", "<offset>", | |
"<object_left>", "<object_right>", "<relation>", "<intent>", "<style>", | |
"<fabric>", "<jewelry>" | |
] | |
REPO_ID = "AbstractPhil/bert-beatrix-2048" | |
REVISION = "main" | |
tokenizer = AutoTokenizer.from_pretrained(REPO_ID, revision=REVISION) | |
model = AutoModelForMaskedLM.from_pretrained( | |
REPO_ID, | |
revision=REVISION, | |
trust_remote_code=True, | |
ignore_mismatched_sizes=True | |
).eval().cuda() | |
def build_interface(): | |
with gr.Blocks() as demo: | |
gr.Markdown("## π Symbolic BERT Inference Test") | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox(label="Symbolic Input Caption", lines=3) | |
selected_roles = gr.CheckboxGroup( | |
choices=symbolic_roles, | |
label="Mask these symbolic roles" | |
) | |
run_btn = gr.Button("Run Mask Inference") | |
with gr.Column(): | |
output_table = gr.Dataframe(headers=["Position", "Masked Token", "Predicted", "Original", "Match"], interactive=False) | |
accuracy_score = gr.Textbox(label="Mask Accuracy") | |
run_btn.click(fn=mask_and_predict, inputs=[input_text, selected_roles], outputs=[output_table, accuracy_score]) | |
return demo | |
if __name__ == "__main__": | |
demo = build_interface() | |
demo.launch() | |