Spaces:
Running
Running
File size: 7,591 Bytes
4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde 4862c84 f850bde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
"""
Utilities for persistent storage in Hugging Face Spaces.
This module provides utilities for managing persistent storage in Hugging Face Spaces,
including data directories, cache management, and file operations.
"""
import os
import shutil
from pathlib import Path
from typing import Optional, Union
import tempfile
def get_persistent_data_dir() -> Optional[Path]:
"""Get the persistent data directory if available.
In Hugging Face Spaces, this will be `/data/app_data`.
Returns None if persistent storage is not available.
Returns:
Path to persistent storage directory if available, None otherwise.
"""
if os.path.isdir("/data"):
data_dir = Path("/data/app_data")
data_dir.mkdir(exist_ok=True)
return data_dir
return None
def get_cache_dir() -> Path:
"""Get the appropriate cache directory (persistent if available, temp otherwise).
In Hugging Face Spaces, this will be `/data/.cache`.
Falls back to temp directory in local development.
Returns:
Path to cache directory.
"""
if os.path.isdir("/data"):
cache_dir = Path("/data/.cache")
cache_dir.mkdir(exist_ok=True)
return cache_dir
else:
# Fallback to temp directory
return Path(tempfile.gettempdir()) / "app_cache"
def get_hf_home_dir() -> Path:
"""Get the Hugging Face home directory for model caching.
In Hugging Face Spaces, this will be `/data/.huggingface`.
Falls back to default ~/.cache/huggingface in local development.
Returns:
Path to HF home directory.
"""
if os.path.isdir("/data"):
hf_home = Path("/data/.huggingface")
hf_home.mkdir(exist_ok=True)
return hf_home
else:
# Fallback to default location
return Path.home() / ".cache" / "huggingface"
def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]:
"""Save an uploaded file to persistent storage.
Args:
uploaded_file: Gradio uploaded file object
filename: Name to save the file as
Returns:
Path to saved file if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir and uploaded_file:
save_path = persistent_dir / filename
save_path.parent.mkdir(parents=True, exist_ok=True)
# Copy the uploaded file to persistent storage
if hasattr(uploaded_file, 'name'):
# Gradio file object
shutil.copy2(uploaded_file.name, save_path)
else:
# Direct file path
shutil.copy2(uploaded_file, save_path)
return save_path
return None
def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]:
"""Save binary data to persistent storage.
Args:
data: Binary data to save
filename: Name to save the file as
subdirectory: Optional subdirectory within persistent storage
Returns:
Path to saved file if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
save_dir = persistent_dir / subdirectory
save_dir.mkdir(exist_ok=True)
else:
save_dir = persistent_dir
save_path = save_dir / filename
save_path.parent.mkdir(parents=True, exist_ok=True)
with open(save_path, 'wb') as f:
f.write(data)
return save_path
return None
def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]:
"""Load binary data from persistent storage.
Args:
filename: Name of the file to load
subdirectory: Optional subdirectory within persistent storage
Returns:
Binary data if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
load_path = persistent_dir / subdirectory / filename
else:
load_path = persistent_dir / filename
if load_path.exists():
with open(load_path, 'rb') as f:
return f.read()
return None
def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]:
"""List files in persistent storage.
Args:
subdirectory: Optional subdirectory within persistent storage
pattern: Glob pattern to match files (e.g., "*.json", "data_*")
Returns:
List of Path objects for matching files.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
search_dir = persistent_dir / subdirectory
else:
search_dir = persistent_dir
if search_dir.exists():
return list(search_dir.glob(pattern))
return []
def delete_persistent_file(filename: str, subdirectory: str = "") -> bool:
"""Delete a file from persistent storage.
Args:
filename: Name of the file to delete
subdirectory: Optional subdirectory within persistent storage
Returns:
True if successful, False otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
file_path = persistent_dir / subdirectory / filename
else:
file_path = persistent_dir / filename
if file_path.exists():
file_path.unlink()
return True
return False
def is_persistent_storage_available() -> bool:
"""Check if persistent storage is available.
Returns:
True if persistent storage is available, False otherwise.
"""
return os.path.isdir("/data")
def get_persistent_results_dir() -> Optional[Path]:
"""Get the persistent results directory for storing pipeline results.
Returns:
Path to persistent results directory if available, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
results_dir = persistent_dir / "results"
results_dir.mkdir(exist_ok=True)
return results_dir
return None
def get_persistent_logs_dir() -> Optional[Path]:
"""Get the persistent logs directory for storing application logs.
Returns:
Path to persistent logs directory if available, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
logs_dir = persistent_dir / "logs"
logs_dir.mkdir(exist_ok=True)
return logs_dir
return None
def get_storage_info() -> dict:
"""Get information about available storage.
Returns:
Dictionary with storage information.
"""
info = {
"persistent_available": is_persistent_storage_available(),
"data_dir": None,
"cache_dir": str(get_cache_dir()),
"hf_home": str(get_hf_home_dir()),
"storage_paths": {}
}
if info["persistent_available"]:
data_dir = get_persistent_data_dir()
info["data_dir"] = str(data_dir)
# Check available space
try:
total, used, free = shutil.disk_usage(data_dir)
info["storage_paths"] = {
"total_gb": round(total / (1024**3), 2),
"used_gb": round(used / (1024**3), 2),
"free_gb": round(free / (1024**3), 2)
}
except OSError:
pass
return info |