Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	sync from github
Browse files
    	
        src/backend/moe_infinity.py
    CHANGED
    
    | @@ -31,16 +31,20 @@ class MoEHFLM(HFLMWithMeasurement): | |
| 31 | 
             
                    self.use_chat_template = use_chat_template
         | 
| 32 | 
             
                    if "device" in kwargs:
         | 
| 33 | 
             
                        kwargs.pop("device")
         | 
|  | |
|  | |
| 34 | 
             
                    kwargs["device_map"] = "cuda:0"
         | 
| 35 | 
             
                    super().__init__(
         | 
| 36 | 
             
                        *args, **kwargs, pretrained=pretrained
         | 
| 37 | 
             
                    )  # Assuming HFLM accepts a 'pretrained' arg and handles it
         | 
| 38 | 
             
                    # self._create_model()
         | 
| 39 | 
            -
                    shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
         | 
| 40 |  | 
| 41 | 
             
                def __del__(self):
         | 
| 42 | 
            -
                    #  | 
| 43 | 
            -
                     | 
|  | |
|  | |
|  | |
| 44 |  | 
| 45 | 
             
                def _create_model(self, *args, **kwargs):
         | 
| 46 | 
             
                    """
         | 
|  | |
| 31 | 
             
                    self.use_chat_template = use_chat_template
         | 
| 32 | 
             
                    if "device" in kwargs:
         | 
| 33 | 
             
                        kwargs.pop("device")
         | 
| 34 | 
            +
                    if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
         | 
| 35 | 
            +
                        shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
         | 
| 36 | 
             
                    kwargs["device_map"] = "cuda:0"
         | 
| 37 | 
             
                    super().__init__(
         | 
| 38 | 
             
                        *args, **kwargs, pretrained=pretrained
         | 
| 39 | 
             
                    )  # Assuming HFLM accepts a 'pretrained' arg and handles it
         | 
| 40 | 
             
                    # self._create_model()
         | 
|  | |
| 41 |  | 
| 42 | 
             
                def __del__(self):
         | 
| 43 | 
            +
                    self._model.engine.clean_up() # clean up hooks
         | 
| 44 | 
            +
                    self._model.engine.archer_engine.clean_up_resources() # clean up resources
         | 
| 45 | 
            +
                    if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
         | 
| 46 | 
            +
                        shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads")) # clean up offload model
         | 
| 47 | 
            +
             | 
| 48 |  | 
| 49 | 
             
                def _create_model(self, *args, **kwargs):
         | 
| 50 | 
             
                    """
         |