Spaces:

AbstractPhil
/

bert-beatrix-2048-testing

Running on Zero

App Files Files Community

AbstractPhil commited on 9 days ago

Commit

872b08b

verified ·

1 Parent(s): 71b4610

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -39

app.py CHANGED Viewed

@@ -1,62 +1,68 @@
 # app.py – encoder-only demo for bert-beatrix-2048
 # -----------------------------------------------
 # launch:  python app.py
 import spaces
 import torch
-import gradio as gr
-import json
 from huggingface_hub import snapshot_download
 from bert_handler import create_handler_from_checkpoint
-from pathlib import Path
 # ------------------------------------------------------------------
-# 1.  Download *once* and load locally  -----------------------------
 # ------------------------------------------------------------------
-LOCAL_CKPT = snapshot_download(
-    repo_id="AbstractPhil/bert-beatrix-2048",
     revision="main",
-    local_dir="bert-beatrix-2048",
     local_dir_use_symlinks=False,
 )
 cfg_path = Path(LOCAL_CKPT) / "config.json"
-with open(cfg_path) as f:
     cfg = json.load(f)
 auto_map = cfg.get("auto_map", {})
 changed = False
 for k, v in auto_map.items():
-    # v  looks like  "AbstractPhil/bert-beatrix-2048--modeling_hf_nomic_bert.…"
-    if "--" in v:
         auto_map[k] = PurePosixPath(v.split("--", 1)[1]).as_posix()
         changed = True
 if changed:
     cfg["auto_map"] = auto_map
-    with open(cfg_path, "w") as f:
         json.dump(cfg, f, indent=2)
-    print("🔧 Patched auto_map → now points to local modules only")
-# also drop any *previously* imported remote modules in this session
-for name in list(sys.modules):
-    if name.startswith("transformers_modules.AbstractPhil.bert-beatrix-2048"):
-        del sys.modules[name]
 # ------------------------------------------------------------------
-# 1.  normal load via BERTHandler  ---------------------------------
 # ------------------------------------------------------------------
-from bert_handler import create_handler_from_checkpoint
 handler, full_model, tokenizer = create_handler_from_checkpoint(LOCAL_CKPT)
 full_model = full_model.eval().cuda()
-# --- pull encoder & embeddings only --------------------------------
 encoder     = full_model.bert.encoder
 embeddings  = full_model.bert.embeddings
 emb_ln      = full_model.bert.emb_ln
 emb_drop    = full_model.bert.emb_drop
 # ------------------------------------------------------------------
-# 2.  Symbolic token list  ------------------------------------------
 # ------------------------------------------------------------------
 SYMBOLIC_ROLES = [
     "<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>",
@@ -64,17 +70,18 @@ SYMBOLIC_ROLES = [
     "<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>",
     "<texture>", "<pattern>", "<grid>", "<zone>", "<offset>",
     "<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
-    "<fabric>", "<jewelry>"
 ]
-# Sanity-check: every role must be known by the tokenizer
-missing = [t for t in SYMBOLIC_ROLES
-           if tokenizer.convert_tokens_to_ids(t) == tokenizer.unk_token_id]
 if missing:
-    raise RuntimeError(f"Tokenizer is missing special tokens: {missing}")
 # ------------------------------------------------------------------
-# 3.  Encoder-only inference util  ----------------------------------
 # ------------------------------------------------------------------
 @spaces.GPU
 def encode_and_trace(text: str, selected_roles: list[str]):
@@ -85,41 +92,60 @@ def encode_and_trace(text: str, selected_roles: list[str]):
         x = emb_drop(emb_ln(embeddings(ids)))
         ext_mask = full_model.bert.get_extended_attention_mask(mask, x.shape[:-1])
-        enc = encoder(x, attention_mask=ext_mask)               # (1, S, H)
-        want = {tokenizer.convert_tokens_to_ids(t) for t in selected_roles}
-        keep = torch.tensor([tid in want for tid in ids[0]], device=enc.device)
-        found = [tokenizer.convert_ids_to_tokens([tid])[0] for tid in ids[0] if tid in want]
-        if keep.any():
-            vec = enc[0][keep].mean(0)
             norm = f"{vec.norm().item():.4f}"
         else:
             norm = "0.0000"
         return {
             "Symbolic Tokens": ", ".join(found) or "(none)",
-            "Mean Norm": norm,
-            "Token Count": int(keep.sum().item()),
         }
 # ------------------------------------------------------------------
-# 4.  Gradio UI  -----------------------------------------------------
 # ------------------------------------------------------------------
 def build_interface():
     with gr.Blocks(title="🧠 Symbolic Encoder Inspector") as demo:
-        gr.Markdown("## 🧠 Symbolic Encoder Inspector")
         with gr.Row():
             with gr.Column():
-                txt = gr.Textbox(label="Input with Symbolic Tokens", lines=3)
-                chk = gr.CheckboxGroup(choices=SYMBOLIC_ROLES, label="Trace these roles")
                 btn = gr.Button("Encode & Trace")
             with gr.Column():
                 out_tok  = gr.Textbox(label="Symbolic Tokens Found")
                 out_norm = gr.Textbox(label="Mean Norm")
                 out_cnt  = gr.Textbox(label="Token Count")
-        btn.click(encode_and_trace, [txt, chk], [out_tok, out_norm, out_cnt])
     return demo
 if __name__ == "__main__":
     build_interface().launch()

 # app.py – encoder-only demo for bert-beatrix-2048
 # -----------------------------------------------
 # launch:  python app.py
+# (gradio UI appears at http://localhost:7860)
+import json
+import re
+import sys
+from pathlib import Path, PurePosixPath   # ← PurePosixPath import added
+import gradio as gr
 import spaces
 import torch
 from huggingface_hub import snapshot_download
 from bert_handler import create_handler_from_checkpoint
 # ------------------------------------------------------------------
+# 0.  Download & patch config.json  --------------------------------
 # ------------------------------------------------------------------
+REPO_ID = "AbstractPhil/bert-beatrix-2048"
+LOCAL_CKPT = "bert-beatrix-2048"          # cached dir name
+snapshot_download(
+    repo_id=REPO_ID,
     revision="main",
+    local_dir=LOCAL_CKPT,
     local_dir_use_symlinks=False,
 )
+# ── one-time patch: strip the “repo--” prefix that confuses AutoModel ──
 cfg_path = Path(LOCAL_CKPT) / "config.json"
+with cfg_path.open() as f:
     cfg = json.load(f)
 auto_map = cfg.get("auto_map", {})
 changed = False
 for k, v in auto_map.items():
+    if "--" in v:                         # v looks like  "repo--module.Class"
         auto_map[k] = PurePosixPath(v.split("--", 1)[1]).as_posix()
         changed = True
 if changed:
     cfg["auto_map"] = auto_map
+    with cfg_path.open("w") as f:
         json.dump(cfg, f, indent=2)
+    print("🛠️  Patched config.json → auto_map now points at local modules")
 # ------------------------------------------------------------------
+# 1.  Model / tokenizer  -------------------------------------------
 # ------------------------------------------------------------------
 handler, full_model, tokenizer = create_handler_from_checkpoint(LOCAL_CKPT)
 full_model = full_model.eval().cuda()
+# Grab encoder + embedding stack only
 encoder     = full_model.bert.encoder
 embeddings  = full_model.bert.embeddings
 emb_ln      = full_model.bert.emb_ln
 emb_drop    = full_model.bert.emb_drop
 # ------------------------------------------------------------------
+# 2.  Symbolic token set  ------------------------------------------
 # ------------------------------------------------------------------
 SYMBOLIC_ROLES = [
     "<subject>", "<subject1>", "<subject2>", "<pose>", "<emotion>",
     "<upper_body_clothing>", "<hair_style>", "<hair_length>", "<headwear>",
     "<texture>", "<pattern>", "<grid>", "<zone>", "<offset>",
     "<object_left>", "<object_right>", "<relation>", "<intent>", "<style>",
+    "<fabric>", "<jewelry>",
 ]
+# quick sanity check
+missing = [tok for tok in SYMBOLIC_ROLES
+           if tokenizer.convert_tokens_to_ids(tok) == tokenizer.unk_token_id]
 if missing:
+    sys.exit(f"❌ Tokenizer is missing {missing}")
 # ------------------------------------------------------------------
+# 3.  Encoder-only inference util  ---------------------------------
 # ------------------------------------------------------------------
 @spaces.GPU
 def encode_and_trace(text: str, selected_roles: list[str]):
         x = emb_drop(emb_ln(embeddings(ids)))
         ext_mask = full_model.bert.get_extended_attention_mask(mask, x.shape[:-1])
+        enc = encoder(x, attention_mask=ext_mask)              # (1, S, H)
+        sel_ids = {tokenizer.convert_tokens_to_ids(t) for t in selected_roles}
+        flags   = torch.tensor([tid in sel_ids for tid in ids[0].tolist()],
+                               device=enc.device)
+        found = [tokenizer.convert_ids_to_tokens([tid])[0]
+                 for tid in ids[0].tolist() if tid in sel_ids]
+        if flags.any():
+            vec = enc[0][flags].mean(0)
             norm = f"{vec.norm().item():.4f}"
         else:
             norm = "0.0000"
         return {
             "Symbolic Tokens": ", ".join(found) or "(none)",
+            "Embedding Norm":  norm,
+            "Symbolic Token Count": int(flags.sum().item()),
         }
 # ------------------------------------------------------------------
+# 4.  Gradio UI  ----------------------------------------------------
 # ------------------------------------------------------------------
 def build_interface():
     with gr.Blocks(title="🧠 Symbolic Encoder Inspector") as demo:
+        gr.Markdown(
+            "## 🧠 Symbolic Encoder Inspector\n"
+            "Paste some text containing the special `<role>` tokens and "
+            "inspect their encoder representations."
+        )
         with gr.Row():
             with gr.Column():
+                txt = gr.Textbox(
+                    label="Input with Symbolic Tokens",
+                    placeholder="Example: A <subject> wearing <upper_body_clothing> …",
+                    lines=3,
+                )
+                roles = gr.CheckboxGroup(
+                    choices=SYMBOLIC_ROLES,
+                    label="Trace these symbolic roles",
+                )
                 btn = gr.Button("Encode & Trace")
             with gr.Column():
                 out_tok  = gr.Textbox(label="Symbolic Tokens Found")
                 out_norm = gr.Textbox(label="Mean Norm")
                 out_cnt  = gr.Textbox(label="Token Count")
+        btn.click(encode_and_trace, [txt, roles], [out_tok, out_norm, out_cnt])
     return demo
 if __name__ == "__main__":
     build_interface().launch()