""" Utilities for persistent storage in Hugging Face Spaces. This module provides utilities for managing persistent storage in Hugging Face Spaces, including data directories, cache management, and file operations. """ import os import shutil from pathlib import Path from typing import Optional, Union import tempfile def get_persistent_data_dir() -> Optional[Path]: """Get the persistent data directory if available. In Hugging Face Spaces, this will be `/data/app_data`. Returns None if persistent storage is not available. Returns: Path to persistent storage directory if available, None otherwise. """ if os.path.isdir("/data"): data_dir = Path("/data/app_data") data_dir.mkdir(exist_ok=True) return data_dir return None def get_cache_dir() -> Path: """Get the appropriate cache directory (persistent if available, temp otherwise). In Hugging Face Spaces, this will be `/data/.cache`. Falls back to temp directory in local development. Returns: Path to cache directory. """ if os.path.isdir("/data"): cache_dir = Path("/data/.cache") cache_dir.mkdir(exist_ok=True) return cache_dir else: # Fallback to temp directory return Path(tempfile.gettempdir()) / "app_cache" def get_hf_home_dir() -> Path: """Get the Hugging Face home directory for model caching. In Hugging Face Spaces, this will be `/data/.huggingface`. Falls back to default ~/.cache/huggingface in local development. Returns: Path to HF home directory. """ if os.path.isdir("/data"): hf_home = Path("/data/.huggingface") hf_home.mkdir(exist_ok=True) return hf_home else: # Fallback to default location return Path.home() / ".cache" / "huggingface" def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]: """Save an uploaded file to persistent storage. Args: uploaded_file: Gradio uploaded file object filename: Name to save the file as Returns: Path to saved file if successful, None otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir and uploaded_file: save_path = persistent_dir / filename save_path.parent.mkdir(parents=True, exist_ok=True) # Copy the uploaded file to persistent storage if hasattr(uploaded_file, 'name'): # Gradio file object shutil.copy2(uploaded_file.name, save_path) else: # Direct file path shutil.copy2(uploaded_file, save_path) return save_path return None def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]: """Save binary data to persistent storage. Args: data: Binary data to save filename: Name to save the file as subdirectory: Optional subdirectory within persistent storage Returns: Path to saved file if successful, None otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir: if subdirectory: save_dir = persistent_dir / subdirectory save_dir.mkdir(exist_ok=True) else: save_dir = persistent_dir save_path = save_dir / filename save_path.parent.mkdir(parents=True, exist_ok=True) with open(save_path, 'wb') as f: f.write(data) return save_path return None def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]: """Load binary data from persistent storage. Args: filename: Name of the file to load subdirectory: Optional subdirectory within persistent storage Returns: Binary data if successful, None otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir: if subdirectory: load_path = persistent_dir / subdirectory / filename else: load_path = persistent_dir / filename if load_path.exists(): with open(load_path, 'rb') as f: return f.read() return None def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]: """List files in persistent storage. Args: subdirectory: Optional subdirectory within persistent storage pattern: Glob pattern to match files (e.g., "*.json", "data_*") Returns: List of Path objects for matching files. """ persistent_dir = get_persistent_data_dir() if persistent_dir: if subdirectory: search_dir = persistent_dir / subdirectory else: search_dir = persistent_dir if search_dir.exists(): return list(search_dir.glob(pattern)) return [] def delete_persistent_file(filename: str, subdirectory: str = "") -> bool: """Delete a file from persistent storage. Args: filename: Name of the file to delete subdirectory: Optional subdirectory within persistent storage Returns: True if successful, False otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir: if subdirectory: file_path = persistent_dir / subdirectory / filename else: file_path = persistent_dir / filename if file_path.exists(): file_path.unlink() return True return False def is_persistent_storage_available() -> bool: """Check if persistent storage is available. Returns: True if persistent storage is available, False otherwise. """ return os.path.isdir("/data") def get_persistent_results_dir() -> Optional[Path]: """Get the persistent results directory for storing pipeline results. Returns: Path to persistent results directory if available, None otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir: results_dir = persistent_dir / "results" results_dir.mkdir(exist_ok=True) return results_dir return None def get_persistent_logs_dir() -> Optional[Path]: """Get the persistent logs directory for storing application logs. Returns: Path to persistent logs directory if available, None otherwise. """ persistent_dir = get_persistent_data_dir() if persistent_dir: logs_dir = persistent_dir / "logs" logs_dir.mkdir(exist_ok=True) return logs_dir return None def get_storage_info() -> dict: """Get information about available storage. Returns: Dictionary with storage information. """ info = { "persistent_available": is_persistent_storage_available(), "data_dir": None, "cache_dir": str(get_cache_dir()), "hf_home": str(get_hf_home_dir()), "storage_paths": {} } if info["persistent_available"]: data_dir = get_persistent_data_dir() info["data_dir"] = str(data_dir) # Check available space try: total, used, free = shutil.disk_usage(data_dir) info["storage_paths"] = { "total_gb": round(total / (1024**3), 2), "used_gb": round(used / (1024**3), 2), "free_gb": round(free / (1024**3), 2) } except OSError: pass return info