|
|
|
|
|
from huggingface_hub import snapshot_download |
|
from transformers import AutoTokenizer, AutoModelForMaskedLM |
|
import torch |
|
import gradio as gr |
|
import re |
|
from dataclasses import dataclass |
|
from pathlib import Path |
|
import spaces |
|
|
|
@spaces.GPU |
|
@dataclass |
|
class SymbolicConfig: |
|
repo_id: str = "AbstractPhil/bert-beatrix-2048" |
|
symbolic_roles: list = ( |
|
"<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>", |
|
"<surface>", "<lighting>", "<material>", "<accessory>", "<footwear>", |
|
"<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>", |
|
"<texture>", "<pattern>", "<grid>", "<zone>", "<offset>", |
|
"<object_left>", "<object_right>", "<relation>", "<intent>", "<style>", |
|
"<fabric>", "<jewelry>" |
|
) |
|
|
|
config = SymbolicConfig() |
|
model_dir = snapshot_download(repo_id=config.repo_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_dir) |
|
model = AutoModelForMaskedLM.from_pretrained(model_dir).eval().cuda() |
|
|
|
|
|
MASK_TOKEN = tokenizer.mask_token or "[MASK]" |
|
|
|
def mask_and_predict(text: str, selected_roles: list[str]): |
|
results = [] |
|
masked_text = text |
|
token_ids = tokenizer.encode(text, return_tensors="pt").cuda() |
|
|
|
for role in selected_roles: |
|
role_pattern = re.escape(role) |
|
masked_text = re.sub(role_pattern, MASK_TOKEN, masked_text) |
|
|
|
masked_ids = tokenizer.encode(masked_text, return_tensors="pt").cuda() |
|
with torch.no_grad(): |
|
outputs = model(input_ids=masked_ids) |
|
logits = outputs.logits[0] |
|
predictions = torch.argmax(logits, dim=-1) |
|
|
|
original_ids = tokenizer.convert_ids_to_tokens(token_ids[0]) |
|
predicted_ids = tokenizer.convert_ids_to_tokens(predictions) |
|
masked_ids_tokens = tokenizer.convert_ids_to_tokens(masked_ids[0]) |
|
|
|
for i, token in enumerate(masked_ids_tokens): |
|
if token == MASK_TOKEN: |
|
results.append({ |
|
"Position": i, |
|
"Masked Token": MASK_TOKEN, |
|
"Predicted": predicted_ids[i], |
|
"Original": original_ids[i] if i < len(original_ids) else "", |
|
"Match": "β
" if predicted_ids[i] == original_ids[i] else "β" |
|
}) |
|
|
|
accuracy = sum(1 for r in results if r["Match"] == "β
") / max(len(results), 1) |
|
return results, f"Accuracy: {accuracy:.1%}" |
|
|
|
def build_interface(): |
|
role_checkboxes = [gr.Checkbox(label=role, value=False) for role in config.symbolic_roles] |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## π Symbolic BERT Inference Test") |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_text = gr.Textbox(label="Symbolic Input Caption", lines=3) |
|
selected_roles = gr.CheckboxGroup( |
|
choices=config.symbolic_roles, |
|
label="Mask these symbolic roles" |
|
) |
|
run_btn = gr.Button("Run Mask Inference") |
|
with gr.Column(): |
|
output_table = gr.Dataframe(headers=["Position", "Masked Token", "Predicted", "Original", "Match"], interactive=False) |
|
accuracy_score = gr.Textbox(label="Mask Accuracy") |
|
|
|
run_btn.click(fn=mask_and_predict, inputs=[input_text, selected_roles], outputs=[output_table, accuracy_score]) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = build_interface() |
|
demo.launch() |
|
|