Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,082 Bytes
535151e ea2994b 535151e 41c0c30 535151e 785df91 535151e 785df91 535151e 785df91 535151e 785df91 535151e ea2994b 535151e ea2994b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# app.py
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
import gradio as gr
import re
from pathlib import Path
import spaces
@spaces.GPU
def mask_and_predict(text: str, selected_roles: list[str]):
MASK_TOKEN = tokenizer.mask_token or "[MASK]"
results = []
masked_text = text
token_ids = tokenizer.encode(text, return_tensors="pt").cuda()
for role in selected_roles:
role_pattern = re.escape(role)
masked_text = re.sub(role_pattern, MASK_TOKEN, masked_text)
masked_ids = tokenizer.encode(masked_text, return_tensors="pt").cuda()
with torch.no_grad():
outputs = model(input_ids=masked_ids)
logits = outputs.logits[0]
predictions = torch.argmax(logits, dim=-1)
original_ids = tokenizer.convert_ids_to_tokens(token_ids[0])
predicted_ids = tokenizer.convert_ids_to_tokens(predictions)
masked_ids_tokens = tokenizer.convert_ids_to_tokens(masked_ids[0])
for i, token in enumerate(masked_ids_tokens):
if token == MASK_TOKEN:
results.append({
"Position": i,
"Masked Token": MASK_TOKEN,
"Predicted": predicted_ids[i],
"Original": original_ids[i] if i < len(original_ids) else "",
"Match": "β
" if predicted_ids[i] == original_ids[i] else "β"
})
accuracy = sum(1 for r in results if r["Match"] == "β
") / max(len(results), 1)
return results, f"Accuracy: {accuracy:.1%}"
symbolic_roles = [
"<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>",
"<surface>", "<lighting>", "<material>", "<accessory>", "<footwear>",
"<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>",
"<texture>", "<pattern>", "<grid>", "<zone>", "<offset>",
"<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
"<fabric>", "<jewelry>"
]
REPO_ID = "AbstractPhil/bert-beatrix-2048"
REVISION = "main"
tokenizer = AutoTokenizer.from_pretrained(REPO_ID, revision=REVISION)
model = AutoModelForMaskedLM.from_pretrained(
REPO_ID,
revision=REVISION,
trust_remote_code=True
).eval().cuda()
def build_interface():
with gr.Blocks() as demo:
gr.Markdown("## π Symbolic BERT Inference Test")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Symbolic Input Caption", lines=3)
selected_roles = gr.CheckboxGroup(
choices=symbolic_roles,
label="Mask these symbolic roles"
)
run_btn = gr.Button("Run Mask Inference")
with gr.Column():
output_table = gr.Dataframe(headers=["Position", "Masked Token", "Predicted", "Original", "Match"], interactive=False)
accuracy_score = gr.Textbox(label="Mask Accuracy")
run_btn.click(fn=mask_and_predict, inputs=[input_text, selected_roles], outputs=[output_table, accuracy_score])
return demo
if __name__ == "__main__":
demo = build_interface()
demo.launch()
|