Spaces:
Runtime error
Runtime error
sync from github
Browse files
src/backend/moe_infinity.py
CHANGED
|
@@ -31,16 +31,20 @@ class MoEHFLM(HFLMWithMeasurement):
|
|
| 31 |
self.use_chat_template = use_chat_template
|
| 32 |
if "device" in kwargs:
|
| 33 |
kwargs.pop("device")
|
|
|
|
|
|
|
| 34 |
kwargs["device_map"] = "cuda:0"
|
| 35 |
super().__init__(
|
| 36 |
*args, **kwargs, pretrained=pretrained
|
| 37 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
| 38 |
# self._create_model()
|
| 39 |
-
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
| 40 |
|
| 41 |
def __del__(self):
|
| 42 |
-
#
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
def _create_model(self, *args, **kwargs):
|
| 46 |
"""
|
|
|
|
| 31 |
self.use_chat_template = use_chat_template
|
| 32 |
if "device" in kwargs:
|
| 33 |
kwargs.pop("device")
|
| 34 |
+
if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
|
| 35 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
| 36 |
kwargs["device_map"] = "cuda:0"
|
| 37 |
super().__init__(
|
| 38 |
*args, **kwargs, pretrained=pretrained
|
| 39 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
| 40 |
# self._create_model()
|
|
|
|
| 41 |
|
| 42 |
def __del__(self):
|
| 43 |
+
self._model.engine.clean_up() # clean up hooks
|
| 44 |
+
self._model.engine.archer_engine.clean_up_resources() # clean up resources
|
| 45 |
+
if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
|
| 46 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads")) # clean up offload model
|
| 47 |
+
|
| 48 |
|
| 49 |
def _create_model(self, *args, **kwargs):
|
| 50 |
"""
|