Spaces:

ericanthonymitchell
/

model-editing

Runtime error

App Files Files Community

Charles Lin commited on May 5, 2022

Commit

8335d0c

1 Parent(s): a9853a7

All algs except KE working.

Browse files

Files changed (4) hide show

algs/lu.py +37 -53
app.py +26 -12
config.py +9 -6
utils.py +7 -1

algs/lu.py CHANGED Viewed

@@ -15,56 +15,45 @@ class LU(EditableModel):
     def __init__(self, model, config, model_constructor, memory=None):
         super().__init__(model, config, model_constructor)
         self.memory = memory
-    def forward(self, *inputs, **kwargs):
-        if "bert" in self.config.model.name.lower():
-            output, encoder_states = self.model(*inputs, **kwargs, output_hidden_states=True)
-        else:
-            model_output = self.model(*inputs, **kwargs, output_hidden_states=True)
-            encoder_states = _last_encoder_state(model_output)
-            output = _logits(model_output)
-        if self.memory is not None:
-            for i, encoder_state in enumerate(encoder_states):
-                if "gpt2" in self.config.model.name.lower():
-                    # NOTE: broken
-                    memory_prefixes, memory_labels = self.memory
-                    prefix_means = encoder_state.cumsum(0).detach() / torch.arange(1, encoder_state.shape[0] + 1, device=encoder_state.device).view(-1, 1)
-                    dist_mat = (prefix_means.unsqueeze(1) - memory_prefixes.unsqueeze(0)).norm(2, dim=-1)
-                    min_dists, min_idxs = dist_mat.min(-1)
-                    memory_mask = (min_dists < self.config.lu.threshold)
-                    onehot_logits = self.config.lu.onehot_logit * F.one_hot(memory_labels[min_idxs], output.shape[-1]).float()
-                    output[i, memory_mask] = onehot_logits[memory_mask]
-                elif "bart" in self.config.model.name.lower() or "t5" in self.config.model.name.lower():
-                    avg_encoder_state = encoder_state.detach().mean(0)
-                    memory_keys, memory_labels = self.memory
-                    dists = torch.norm(avg_encoder_state - memory_keys, dim=-1)
-                    closest_dist = dists.min()
-                    closest_idx = dists.argmin()
-                    closest_v = memory_labels[closest_idx]
-                    if closest_dist < self.config.lu.threshold:
-                        output[i] = torch.zeros((1, kwargs['labels'].shape[1], output.shape[2]), device=output.device)
-                        for j, idx in enumerate(closest_v):
-                            if j >= output.shape[1]:
-                                break
-                            output[i, j, idx] = self.config.lu.onehot_logit
-                        if "t5" not in self.config.model.name.lower():
-                            # T5 does not shift targets in the loss
-                            output[i] = output[i].roll(-1, -2)
-                else:
-                    avg_encoder_state = encoder_state.detach().mean(0)
-                    memory_keys, memory_labels = self.memory
-                    dists = torch.norm(avg_encoder_state - memory_keys, dim=-1)
-                    closest_dist = dists.min()
-                    closest_idx = dists.argmin()
-                    closest_v = memory_labels[closest_idx]
-                    if closest_dist < self.config.lu.threshold:
-                        output[i] = self.config.lu.onehot_logit * (2 * closest_v - 1)  # Return onehot_logit or -onehot_logit
         return output
     def edit(self, batch, condition=None, detach_history=False):
@@ -77,14 +66,9 @@ class LU(EditableModel):
         memory_keys = []
         memory_labels = []
         for encoder_state, label in zip(encoder_states, batch["labels"]):
-            if "gpt2" in self.config.model.name.lower():
-                # NOTE: broken
-                avg_encoder_states = (encoder_state.cumsum(0).detach() / torch.arange(1, encoder_state.shape[0] + 1, device=encoder_state.device).view(-1, 1))[-10:, :]
-                memory = (avg_encoder_states, label[-10:])
-            else:
-                avg_encoder_state = encoder_state.detach().mean(0)
-                memory_keys.append(avg_encoder_state)
-                memory_labels.append(label)
         memory = (torch.stack(memory_keys), torch.stack(memory_labels))
         return LU(self.model.eval(), self.config, self.model_constructor, memory), {}

     def __init__(self, model, config, model_constructor, memory=None):
         super().__init__(model, config, model_constructor)
+        if "t5" not in self.config.model.name.lower():
+            raise NotImplementedError
         self.memory = memory
+    def lookup_replace(self, output, encoder_states):
+        for i, encoder_state in enumerate(encoder_states):
+            avg_encoder_state = encoder_state.detach().mean(0)
+            memory_keys, memory_labels = self.memory
+            dists = torch.norm(avg_encoder_state - memory_keys, dim=-1)
+            closest_dist = dists.min()
+            closest_idx = dists.argmin()
+            closest_v = memory_labels[closest_idx]
+            if closest_dist < self.config.lu.threshold:
+                output[i] = torch.zeros((1, output.shape[1], output.shape[2]), device=output.device)
+                for j, idx in enumerate(closest_v):
+                    if j >= output.shape[1]:
+                        break
+                    output[i, j, idx] = self.config.lu.onehot_logit
+                if "t5" not in self.config.model.name.lower():
+                    # T5 does not shift targets in the loss
+                    output[i] = output[i].roll(-1, -2)
+        return output
+    def generate(self, *inputs, **kwargs):
+        model_output = self.model.generate(*inputs, **kwargs, output_hidden_states=True,
+                                           output_scores=True, return_dict_in_generate=True)
+        encoder_states = _last_encoder_state(model_output)
+        output = _logits(model_output)
+        if self.memory is not None:
+            output = self.lookup_replace(output, encoder_states)
+        return output.argmax(-1)
+    def forward(self, *inputs, **kwargs):
+        model_output = self.model(*inputs, **kwargs, output_hidden_states=True)
+        encoder_states = _last_encoder_state(model_output)
+        output = _logits(model_output)
+        if self.memory is not None:
+            output = self.lookup_replace(output, encoder_states)
         return output
     def edit(self, batch, condition=None, detach_history=False):
         memory_keys = []
         memory_labels = []
         for encoder_state, label in zip(encoder_states, batch["labels"]):
+            avg_encoder_state = encoder_state.detach().mean(0)
+            memory_keys.append(avg_encoder_state)
+            memory_labels.append(label)
         memory = (torch.stack(memory_keys), torch.stack(memory_labels))
         return LU(self.model.eval(), self.config, self.model_constructor, memory), {}

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from torch.cuda import is_available as use_cuda
 import algs
 import config
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 EDIT_ALGS = [
@@ -19,6 +20,26 @@ EDIT_ALGS = [
     "LU: Lookup Cache",
 ]
 def generate(ids):
     output_ids = st.session_state.editable_model.generate(input_ids=ids, max_new_tokens=20, min_length=1,
                                                           num_return_sequences=1, num_beams=3)
@@ -30,15 +51,7 @@ def reset():
     selected_alg = st.session_state.alg_selector
     alg_abbrv = selected_alg[:selected_alg.index(":")]
-    alg_module = importlib.import_module(f"algs.{alg_abbrv.lower()}")
-    alg_class = getattr(alg_module, alg_abbrv.upper())
-    st.session_state.config = getattr(config, f"{alg_abbrv.lower()}_config")
-    with st.spinner('Loading model...'):
-        st.session_state.editable_model = alg_class(
-            st.session_state.model,
-            st.session_state.config,
-            lambda: copy.deepcopy(st.session_state.model),
-        ).eval()
 def apply_edit():
     st.session_state.edits.loc[len(st.session_state.edits)] = [str(edit_input), str(edit_label)]
@@ -67,12 +80,13 @@ if "init" not in st.session_state:
     st.session_state.edits = pd.DataFrame([], columns=["Edit input", "Edit label"])
     st.session_state.model_outputs = pd.DataFrame([], columns=["Input", "Output", "N edits", "Alg"])
     st.session_state.init = True
-    st.session_state.config = None
-    st.session_state.device = "cuda" if use_cuda() else "cpu"
     with st.spinner('Loading model...'):
         st.session_state.tokenizer = AutoTokenizer.from_pretrained("google/t5-large-ssm-nq")
         st.session_state.model = AutoModelForSeq2SeqLM.from_pretrained("google/t5-large-ssm-nq").to(st.session_state.device).eval()
-        st.session_state.editable_model = None
 ########################
 #### Interface code ####

 import algs
 import config
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import utils
 EDIT_ALGS = [
     "LU: Lookup Cache",
 ]
+def get_alg_class(alg_abbrv):
+    alg_module = importlib.import_module(f"algs.{alg_abbrv.lower()}")
+    alg_class = getattr(alg_module, alg_abbrv.upper())
+    return alg_class
+def load_editable_model(alg_abbrv):
+    alg_module = importlib.import_module(f"algs.{alg_abbrv.lower()}")
+    alg_class = getattr(alg_module, alg_abbrv.upper())
+    st.session_state.config = getattr(config, f"{alg_abbrv.lower()}_config")
+    with st.spinner('Loading model...'):
+        st.session_state.editable_model = alg_class(
+            st.session_state.model,
+            st.session_state.config,
+            lambda: copy.deepcopy(st.session_state.model),
+        ).eval()
+        if "archive" in st.session_state.config:
+            archive, st.session_state.config.archive = utils.load_archive(str(st.session_state.config.archive))
+            print(f"Loading archive from {st.session_state.config.archive}")
+            st.session_state.editable_model.load_state_dict(archive["model"])
 def generate(ids):
     output_ids = st.session_state.editable_model.generate(input_ids=ids, max_new_tokens=20, min_length=1,
                                                           num_return_sequences=1, num_beams=3)
     selected_alg = st.session_state.alg_selector
     alg_abbrv = selected_alg[:selected_alg.index(":")]
+    load_editable_model(alg_abbrv)
 def apply_edit():
     st.session_state.edits.loc[len(st.session_state.edits)] = [str(edit_input), str(edit_label)]
     st.session_state.edits = pd.DataFrame([], columns=["Edit input", "Edit label"])
     st.session_state.model_outputs = pd.DataFrame([], columns=["Input", "Output", "N edits", "Alg"])
     st.session_state.init = True
+    st.session_state.device = "cpu"  # "cuda" if use_cuda() else "cpu"
     with st.spinner('Loading model...'):
         st.session_state.tokenizer = AutoTokenizer.from_pretrained("google/t5-large-ssm-nq")
         st.session_state.model = AutoModelForSeq2SeqLM.from_pretrained("google/t5-large-ssm-nq").to(st.session_state.device).eval()
+    # There is a "Loading model..." spinner in load_editable_model
+    alg_abbrv = "MEND"  # Default initial alg of dropdown selector
+    load_editable_model(alg_abbrv)
 ########################
 #### Interface code ####

config.py CHANGED Viewed

@@ -21,7 +21,7 @@ model_config = {
 }
 ft_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "edit_lr": 5e-6,
   "train_base": False,
   "grad_clip": 100,
@@ -43,7 +43,7 @@ ft_config = OmegaConf.create({
 })
 lu_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "lu": {
     "threshold": 2.75,
     "onehot_logit": 1,
@@ -52,14 +52,14 @@ lu_config = OmegaConf.create({
 })
 ke_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "train_base": False,
   "lr": 1e-5,
   "model": model_config,
 })
 enn_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "lr": 1e-5,
   "edit_lr": 1e-2,
   "lr_lr": 1e-3,
@@ -72,10 +72,11 @@ enn_config = OmegaConf.create({
     "n_edit_steps": 1,
   },
   "model": model_config,
 })
 mend_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "lr": 1e-6,
   "edit_lr": 1e-4,
   "lr_lr": 1e-4,
@@ -99,10 +100,11 @@ mend_config = OmegaConf.create({
     "descent": False,
   },
   "model": model_config,
 })
 serac_config = OmegaConf.create({
-  "device": "cuda" if use_cuda() else "cpu",
   "lr": 1e-5,
   "edit_lr": 1e-2,
   "lr_lr": 0,
@@ -128,4 +130,5 @@ serac_config = OmegaConf.create({
     "cache_embeds": True,
   },
   "model": model_config,
 })

 }
 ft_config = OmegaConf.create({
+  "device": "cpu",
   "edit_lr": 5e-6,
   "train_base": False,
   "grad_clip": 100,
 })
 lu_config = OmegaConf.create({
+  "device": "cpu",
   "lu": {
     "threshold": 2.75,
     "onehot_logit": 1,
 })
 ke_config = OmegaConf.create({
+  "device": "cpu",
   "train_base": False,
   "lr": 1e-5,
   "model": model_config,
 })
 enn_config = OmegaConf.create({
+  "device": "cpu",
   "lr": 1e-5,
   "edit_lr": 1e-2,
   "lr_lr": 1e-3,
     "n_edit_steps": 1,
   },
   "model": model_config,
+  "archive": 8684705655, # "/iris/u/clin/code/efk/outputs/2022-02-09_05-48-20_8684705655/models/t5-large-ssm-nq.2022-02-09_05-48-20_8684705655",
 })
 mend_config = OmegaConf.create({
+  "device": "cpu",
   "lr": 1e-6,
   "edit_lr": 1e-4,
   "lr_lr": 1e-4,
     "descent": False,
   },
   "model": model_config,
+  "archive": 5940349945, # "/iris/u/clin/code/efk/outputs/2022-02-09_11-47-28_5940349945/models/t5-large-ssm-nq.2022-02-09_11-47-28_5940349945",
 })
 serac_config = OmegaConf.create({
+  "device": "cpu", # "device": "cuda" if use_cuda() else "cpu",
   "lr": 1e-5,
   "edit_lr": 1e-2,
   "lr_lr": 0,
     "cache_embeds": True,
   },
   "model": model_config,
+  "archive": 4719776130, # "/iris/u/clin/code/efk/outputs/2022-02-09_14-05-56_4719776130/models/t5-large-ssm-nq.2022-02-09_14-05-56_4719776130",
 })

utils.py CHANGED Viewed

@@ -156,12 +156,18 @@ def safe_backward(loss, parameters, accumulate=1, allow_unused=False, backward=F
 def _logits(x):
-    return x if not hasattr(x, "logits") else x.logits
 def _last_encoder_state(x):
     if hasattr(x, "encoder_last_hidden_state"):
         return x.encoder_last_hidden_state
     else:
         return x.hidden_states[-1]

 def _logits(x):
+    if hasattr(x, "logits"):
+        return x.logits
+    elif hasattr(x, "scores"):
+        return torch.cat(x.scores).unsqueeze(0)
+    return x
 def _last_encoder_state(x):
     if hasattr(x, "encoder_last_hidden_state"):
         return x.encoder_last_hidden_state
+    elif hasattr(x, "encoder_hidden_states"):
+        return x.encoder_hidden_states[-1]
     else:
         return x.hidden_states[-1]