File size: 3,116 Bytes
535151e
 
 
 
ea2994b
535151e
 
 
 
41c0c30
535151e
785df91
535151e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785df91
 
 
 
 
 
 
 
 
 
 
 
 
bbf3f7c
 
 
85441c2
785df91
535151e
785df91
535151e
 
 
 
 
 
785df91
535151e
 
 
 
 
 
 
 
 
 
ea2994b
 
535151e
ea2994b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# app.py

from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
import gradio as gr
import re
from pathlib import Path
import spaces

@spaces.GPU
def mask_and_predict(text: str, selected_roles: list[str]):
    MASK_TOKEN = tokenizer.mask_token or "[MASK]"
    results = []
    masked_text = text
    token_ids = tokenizer.encode(text, return_tensors="pt").cuda()

    for role in selected_roles:
        role_pattern = re.escape(role)
        masked_text = re.sub(role_pattern, MASK_TOKEN, masked_text)

    masked_ids = tokenizer.encode(masked_text, return_tensors="pt").cuda()
    with torch.no_grad():
        outputs = model(input_ids=masked_ids)
        logits = outputs.logits[0]
        predictions = torch.argmax(logits, dim=-1)

    original_ids = tokenizer.convert_ids_to_tokens(token_ids[0])
    predicted_ids = tokenizer.convert_ids_to_tokens(predictions)
    masked_ids_tokens = tokenizer.convert_ids_to_tokens(masked_ids[0])

    for i, token in enumerate(masked_ids_tokens):
        if token == MASK_TOKEN:
            results.append({
                "Position": i,
                "Masked Token": MASK_TOKEN,
                "Predicted": predicted_ids[i],
                "Original": original_ids[i] if i < len(original_ids) else "",
                "Match": "βœ…" if predicted_ids[i] == original_ids[i] else "❌"
            })

    accuracy = sum(1 for r in results if r["Match"] == "βœ…") / max(len(results), 1)
    return results, f"Accuracy: {accuracy:.1%}"

symbolic_roles = [
    "<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>",
    "<surface>", "<lighting>", "<material>", "<accessory>", "<footwear>",
    "<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>",
    "<texture>", "<pattern>", "<grid>", "<zone>", "<offset>",
    "<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
    "<fabric>", "<jewelry>"
]

REPO_ID = "AbstractPhil/bert-beatrix-2048"
REVISION = "main"
tokenizer = AutoTokenizer.from_pretrained(REPO_ID, revision=REVISION)
model = AutoModelForMaskedLM.from_pretrained(
    REPO_ID,
    revision=REVISION,
    trust_remote_code=True,
    ignore_mismatched_sizes=True
).eval().cuda()

def build_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## πŸ”Ž Symbolic BERT Inference Test")
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Symbolic Input Caption", lines=3)
                selected_roles = gr.CheckboxGroup(
                    choices=symbolic_roles,
                    label="Mask these symbolic roles"
                )
                run_btn = gr.Button("Run Mask Inference")
            with gr.Column():
                output_table = gr.Dataframe(headers=["Position", "Masked Token", "Predicted", "Original", "Match"], interactive=False)
                accuracy_score = gr.Textbox(label="Mask Accuracy")

        run_btn.click(fn=mask_and_predict, inputs=[input_text, selected_roles], outputs=[output_table, accuracy_score])

    return demo

if __name__ == "__main__":
    demo = build_interface()
    demo.launch()