Julian Bilcke
commited on
Commit
·
e45a45b
1
Parent(s):
a120d0f
fix attempt for the download weights button
Browse files
vms/ui/project/services/previewing.py
CHANGED
|
@@ -46,7 +46,7 @@ class PreviewingService:
|
|
| 46 |
if not checkpoints:
|
| 47 |
return None
|
| 48 |
|
| 49 |
-
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split("
|
| 50 |
lora_path = latest_checkpoint / "pytorch_lora_weights.safetensors"
|
| 51 |
|
| 52 |
if lora_path.exists():
|
|
|
|
| 46 |
if not checkpoints:
|
| 47 |
return None
|
| 48 |
|
| 49 |
+
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split("_")[-1]))
|
| 50 |
lora_path = latest_checkpoint / "pytorch_lora_weights.safetensors"
|
| 51 |
|
| 52 |
if lora_path.exists():
|
vms/ui/project/services/training.py
CHANGED
|
@@ -1480,16 +1480,33 @@ class TrainingService:
|
|
| 1480 |
self.append_log(f"Error uploading to hub: {str(e)}")
|
| 1481 |
return False
|
| 1482 |
|
| 1483 |
-
def get_model_output_safetensors(self) -> str:
|
| 1484 |
"""Return the path to the model safetensors
|
| 1485 |
|
| 1486 |
-
|
| 1487 |
Returns:
|
| 1488 |
-
Path to
|
| 1489 |
"""
|
| 1490 |
|
|
|
|
| 1491 |
model_output_safetensors_path = self.app.output_path / "pytorch_lora_weights.safetensors"
|
| 1492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1493 |
|
| 1494 |
def create_training_dataset_zip(self) -> str:
|
| 1495 |
"""Create a ZIP file containing all training data
|
|
|
|
| 1480 |
self.append_log(f"Error uploading to hub: {str(e)}")
|
| 1481 |
return False
|
| 1482 |
|
| 1483 |
+
def get_model_output_safetensors(self) -> Optional[str]:
|
| 1484 |
"""Return the path to the model safetensors
|
| 1485 |
|
|
|
|
| 1486 |
Returns:
|
| 1487 |
+
Path to safetensors file or None if not found
|
| 1488 |
"""
|
| 1489 |
|
| 1490 |
+
# Check if the root level file exists (this should be the primary location)
|
| 1491 |
model_output_safetensors_path = self.app.output_path / "pytorch_lora_weights.safetensors"
|
| 1492 |
+
if model_output_safetensors_path.exists():
|
| 1493 |
+
return str(model_output_safetensors_path)
|
| 1494 |
+
|
| 1495 |
+
# If not found in root, log the issue and return None
|
| 1496 |
+
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|
| 1497 |
+
logger.info(f"Checking output directory contents: {list(self.app.output_path.glob('*'))}")
|
| 1498 |
+
|
| 1499 |
+
# Check if there are any checkpoint directories as a fallback
|
| 1500 |
+
checkpoints = list(self.app.output_path.glob("finetrainers_step_*"))
|
| 1501 |
+
if checkpoints:
|
| 1502 |
+
logger.info(f"Found {len(checkpoints)} checkpoint directories, but main weights file is missing")
|
| 1503 |
+
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split("_")[-1]))
|
| 1504 |
+
checkpoint_weights = latest_checkpoint / "pytorch_lora_weights.safetensors"
|
| 1505 |
+
if checkpoint_weights.exists():
|
| 1506 |
+
logger.info(f"Found weights in latest checkpoint: {checkpoint_weights}")
|
| 1507 |
+
return str(checkpoint_weights)
|
| 1508 |
+
|
| 1509 |
+
return None
|
| 1510 |
|
| 1511 |
def create_training_dataset_zip(self) -> str:
|
| 1512 |
"""Create a ZIP file containing all training data
|