somosnlp
/

gua-a

@@ -30,32 +30,32 @@ class KeeperModelForCausalLM(PreTrainedModel):
         self.bert = None
         self.llm = None
-        if cfg:
-            print("Initializing KeeperModelForCausalLM from cfg")
-            # Inicialización con configuración
-            self.bert = AutoModel.from_pretrained(cfg.retriever_config['_name_or_path'])
-            bnb_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.bfloat16
-            )
-            self.llm = AutoModelForCausalLM.from_pretrained(
-                cfg.model_config['_name_or_path'],
-                device_map=cfg.device_map,
-                torch_dtype=torch.bfloat16,
-                quantization_config=bnb_config
-            )
-            # Almacena kwargs para la serialización y carga futura
-            # self.init_kwargs = {'cfg': cfg}
-            print("Initialization complete")
-        else:
-            # Si cfg no se proporciona, esto se manejará en el método from_pretrained
-            print("Initializing KeeperTokenizer without cfg")
         self.n_cands = n_cands
         self.update_both = update_both
@@ -81,6 +81,10 @@ class KeeperModelForCausalLM(PreTrainedModel):
                 self.prompt_right = state_dict["prompt_right"].to(device)
             if "respuesta" in state_dict:
                 self.respuesta = state_dict["respuesta"].to(device)
         else:
             # Optionally handle the case where CUDA is not available
             print("CUDA is not available. Tensors will remain on CPU.")

         self.bert = None
         self.llm = None
+        # if cfg:
+        #     print("Initializing KeeperModelForCausalLM from cfg")
+        #     # Inicialización con configuración
+        #     self.bert = AutoModel.from_pretrained(cfg.retriever_config['_name_or_path'])
+        #     bnb_config = BitsAndBytesConfig(
+        #         load_in_4bit=True,
+        #         bnb_4bit_quant_type="nf4",
+        #         bnb_4bit_compute_dtype=torch.bfloat16
+        #     )
+        #     self.llm = AutoModelForCausalLM.from_pretrained(
+        #         cfg.model_config['_name_or_path'],
+        #         device_map=cfg.device_map,
+        #         torch_dtype=torch.bfloat16,
+        #         quantization_config=bnb_config
+        #     )
+        #     # Almacena kwargs para la serialización y carga futura
+        #     # self.init_kwargs = {'cfg': cfg}
+        #     print("Initialization complete")
+        # else:
+        #     # Si cfg no se proporciona, esto se manejará en el método from_pretrained
+        #     print("Initializing KeeperTokenizer without cfg")
         self.n_cands = n_cands
         self.update_both = update_both
                 self.prompt_right = state_dict["prompt_right"].to(device)
             if "respuesta" in state_dict:
                 self.respuesta = state_dict["respuesta"].to(device)
+            if "bert" in state_dict:
+                self.bert = state_dict["bert"].to(device)
+            if "llm" in state_dict:
+                self.llm = state_dict["llm"].to(device)
         else:
             # Optionally handle the case where CUDA is not available
             print("CUDA is not available. Tensors will remain on CPU.")