Spaces:
Running
Running
Lisa Dunlap
Add persistent storage support for Hugging Face Spaces - Enhanced app.py with automatic persistent storage detection - Added comprehensive persistent storage utilities - Added documentation and examples - Automatic HF_HOME and cache configuration for /data directory
f850bde
| """ | |
| Utilities for persistent storage in Hugging Face Spaces. | |
| This module provides utilities for managing persistent storage in Hugging Face Spaces, | |
| including data directories, cache management, and file operations. | |
| """ | |
| import os | |
| import shutil | |
| from pathlib import Path | |
| from typing import Optional, Union | |
| import tempfile | |
| def get_persistent_data_dir() -> Optional[Path]: | |
| """Get the persistent data directory if available. | |
| In Hugging Face Spaces, this will be `/data/app_data`. | |
| Returns None if persistent storage is not available. | |
| Returns: | |
| Path to persistent storage directory if available, None otherwise. | |
| """ | |
| if os.path.isdir("/data"): | |
| data_dir = Path("/data/app_data") | |
| data_dir.mkdir(exist_ok=True) | |
| return data_dir | |
| return None | |
| def get_cache_dir() -> Path: | |
| """Get the appropriate cache directory (persistent if available, temp otherwise). | |
| In Hugging Face Spaces, this will be `/data/.cache`. | |
| Falls back to temp directory in local development. | |
| Returns: | |
| Path to cache directory. | |
| """ | |
| if os.path.isdir("/data"): | |
| cache_dir = Path("/data/.cache") | |
| cache_dir.mkdir(exist_ok=True) | |
| return cache_dir | |
| else: | |
| # Fallback to temp directory | |
| return Path(tempfile.gettempdir()) / "app_cache" | |
| def get_hf_home_dir() -> Path: | |
| """Get the Hugging Face home directory for model caching. | |
| In Hugging Face Spaces, this will be `/data/.huggingface`. | |
| Falls back to default ~/.cache/huggingface in local development. | |
| Returns: | |
| Path to HF home directory. | |
| """ | |
| if os.path.isdir("/data"): | |
| hf_home = Path("/data/.huggingface") | |
| hf_home.mkdir(exist_ok=True) | |
| return hf_home | |
| else: | |
| # Fallback to default location | |
| return Path.home() / ".cache" / "huggingface" | |
| def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]: | |
| """Save an uploaded file to persistent storage. | |
| Args: | |
| uploaded_file: Gradio uploaded file object | |
| filename: Name to save the file as | |
| Returns: | |
| Path to saved file if successful, None otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir and uploaded_file: | |
| save_path = persistent_dir / filename | |
| save_path.parent.mkdir(parents=True, exist_ok=True) | |
| # Copy the uploaded file to persistent storage | |
| if hasattr(uploaded_file, 'name'): | |
| # Gradio file object | |
| shutil.copy2(uploaded_file.name, save_path) | |
| else: | |
| # Direct file path | |
| shutil.copy2(uploaded_file, save_path) | |
| return save_path | |
| return None | |
| def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]: | |
| """Save binary data to persistent storage. | |
| Args: | |
| data: Binary data to save | |
| filename: Name to save the file as | |
| subdirectory: Optional subdirectory within persistent storage | |
| Returns: | |
| Path to saved file if successful, None otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| if subdirectory: | |
| save_dir = persistent_dir / subdirectory | |
| save_dir.mkdir(exist_ok=True) | |
| else: | |
| save_dir = persistent_dir | |
| save_path = save_dir / filename | |
| save_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(save_path, 'wb') as f: | |
| f.write(data) | |
| return save_path | |
| return None | |
| def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]: | |
| """Load binary data from persistent storage. | |
| Args: | |
| filename: Name of the file to load | |
| subdirectory: Optional subdirectory within persistent storage | |
| Returns: | |
| Binary data if successful, None otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| if subdirectory: | |
| load_path = persistent_dir / subdirectory / filename | |
| else: | |
| load_path = persistent_dir / filename | |
| if load_path.exists(): | |
| with open(load_path, 'rb') as f: | |
| return f.read() | |
| return None | |
| def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]: | |
| """List files in persistent storage. | |
| Args: | |
| subdirectory: Optional subdirectory within persistent storage | |
| pattern: Glob pattern to match files (e.g., "*.json", "data_*") | |
| Returns: | |
| List of Path objects for matching files. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| if subdirectory: | |
| search_dir = persistent_dir / subdirectory | |
| else: | |
| search_dir = persistent_dir | |
| if search_dir.exists(): | |
| return list(search_dir.glob(pattern)) | |
| return [] | |
| def delete_persistent_file(filename: str, subdirectory: str = "") -> bool: | |
| """Delete a file from persistent storage. | |
| Args: | |
| filename: Name of the file to delete | |
| subdirectory: Optional subdirectory within persistent storage | |
| Returns: | |
| True if successful, False otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| if subdirectory: | |
| file_path = persistent_dir / subdirectory / filename | |
| else: | |
| file_path = persistent_dir / filename | |
| if file_path.exists(): | |
| file_path.unlink() | |
| return True | |
| return False | |
| def is_persistent_storage_available() -> bool: | |
| """Check if persistent storage is available. | |
| Returns: | |
| True if persistent storage is available, False otherwise. | |
| """ | |
| return os.path.isdir("/data") | |
| def get_persistent_results_dir() -> Optional[Path]: | |
| """Get the persistent results directory for storing pipeline results. | |
| Returns: | |
| Path to persistent results directory if available, None otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| results_dir = persistent_dir / "results" | |
| results_dir.mkdir(exist_ok=True) | |
| return results_dir | |
| return None | |
| def get_persistent_logs_dir() -> Optional[Path]: | |
| """Get the persistent logs directory for storing application logs. | |
| Returns: | |
| Path to persistent logs directory if available, None otherwise. | |
| """ | |
| persistent_dir = get_persistent_data_dir() | |
| if persistent_dir: | |
| logs_dir = persistent_dir / "logs" | |
| logs_dir.mkdir(exist_ok=True) | |
| return logs_dir | |
| return None | |
| def get_storage_info() -> dict: | |
| """Get information about available storage. | |
| Returns: | |
| Dictionary with storage information. | |
| """ | |
| info = { | |
| "persistent_available": is_persistent_storage_available(), | |
| "data_dir": None, | |
| "cache_dir": str(get_cache_dir()), | |
| "hf_home": str(get_hf_home_dir()), | |
| "storage_paths": {} | |
| } | |
| if info["persistent_available"]: | |
| data_dir = get_persistent_data_dir() | |
| info["data_dir"] = str(data_dir) | |
| # Check available space | |
| try: | |
| total, used, free = shutil.disk_usage(data_dir) | |
| info["storage_paths"] = { | |
| "total_gb": round(total / (1024**3), 2), | |
| "used_gb": round(used / (1024**3), 2), | |
| "free_gb": round(free / (1024**3), 2) | |
| } | |
| except OSError: | |
| pass | |
| return info |