File size: 7,591 Bytes
4862c84
 
f850bde
 
 
4862c84
 
f850bde
4862c84
f850bde
 
4862c84
 
 
 
 
f850bde
 
 
4862c84
 
 
 
 
 
 
 
 
 
 
 
 
f850bde
 
 
4862c84
 
 
 
 
 
 
 
 
 
 
 
f850bde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4862c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f850bde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4862c84
 
 
 
f850bde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4862c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f850bde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
"""
Utilities for persistent storage in Hugging Face Spaces.

This module provides utilities for managing persistent storage in Hugging Face Spaces,
including data directories, cache management, and file operations.
"""
import os
import shutil
from pathlib import Path
from typing import Optional, Union
import tempfile


def get_persistent_data_dir() -> Optional[Path]:
    """Get the persistent data directory if available.
    
    In Hugging Face Spaces, this will be `/data/app_data`.
    Returns None if persistent storage is not available.
    
    Returns:
        Path to persistent storage directory if available, None otherwise.
    """
    if os.path.isdir("/data"):
        data_dir = Path("/data/app_data")
        data_dir.mkdir(exist_ok=True)
        return data_dir
    return None


def get_cache_dir() -> Path:
    """Get the appropriate cache directory (persistent if available, temp otherwise).
    
    In Hugging Face Spaces, this will be `/data/.cache`.
    Falls back to temp directory in local development.
    
    Returns:
        Path to cache directory.
    """
    if os.path.isdir("/data"):
        cache_dir = Path("/data/.cache")
        cache_dir.mkdir(exist_ok=True)
        return cache_dir
    else:
        # Fallback to temp directory
        return Path(tempfile.gettempdir()) / "app_cache"


def get_hf_home_dir() -> Path:
    """Get the Hugging Face home directory for model caching.
    
    In Hugging Face Spaces, this will be `/data/.huggingface`.
    Falls back to default ~/.cache/huggingface in local development.
    
    Returns:
        Path to HF home directory.
    """
    if os.path.isdir("/data"):
        hf_home = Path("/data/.huggingface")
        hf_home.mkdir(exist_ok=True)
        return hf_home
    else:
        # Fallback to default location
        return Path.home() / ".cache" / "huggingface"


def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]:
    """Save an uploaded file to persistent storage.
    
    Args:
        uploaded_file: Gradio uploaded file object
        filename: Name to save the file as
        
    Returns:
        Path to saved file if successful, None otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir and uploaded_file:
        save_path = persistent_dir / filename
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Copy the uploaded file to persistent storage
        if hasattr(uploaded_file, 'name'):
            # Gradio file object
            shutil.copy2(uploaded_file.name, save_path)
        else:
            # Direct file path
            shutil.copy2(uploaded_file, save_path)
        return save_path
    return None


def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]:
    """Save binary data to persistent storage.
    
    Args:
        data: Binary data to save
        filename: Name to save the file as
        subdirectory: Optional subdirectory within persistent storage
        
    Returns:
        Path to saved file if successful, None otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        if subdirectory:
            save_dir = persistent_dir / subdirectory
            save_dir.mkdir(exist_ok=True)
        else:
            save_dir = persistent_dir
        
        save_path = save_dir / filename
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(save_path, 'wb') as f:
            f.write(data)
        return save_path
    return None


def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]:
    """Load binary data from persistent storage.
    
    Args:
        filename: Name of the file to load
        subdirectory: Optional subdirectory within persistent storage
        
    Returns:
        Binary data if successful, None otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        if subdirectory:
            load_path = persistent_dir / subdirectory / filename
        else:
            load_path = persistent_dir / filename
        
        if load_path.exists():
            with open(load_path, 'rb') as f:
                return f.read()
    return None


def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]:
    """List files in persistent storage.
    
    Args:
        subdirectory: Optional subdirectory within persistent storage
        pattern: Glob pattern to match files (e.g., "*.json", "data_*")
        
    Returns:
        List of Path objects for matching files.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        if subdirectory:
            search_dir = persistent_dir / subdirectory
        else:
            search_dir = persistent_dir
        
        if search_dir.exists():
            return list(search_dir.glob(pattern))
    return []


def delete_persistent_file(filename: str, subdirectory: str = "") -> bool:
    """Delete a file from persistent storage.
    
    Args:
        filename: Name of the file to delete
        subdirectory: Optional subdirectory within persistent storage
        
    Returns:
        True if successful, False otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        if subdirectory:
            file_path = persistent_dir / subdirectory / filename
        else:
            file_path = persistent_dir / filename
        
        if file_path.exists():
            file_path.unlink()
            return True
    return False


def is_persistent_storage_available() -> bool:
    """Check if persistent storage is available.
    
    Returns:
        True if persistent storage is available, False otherwise.
    """
    return os.path.isdir("/data")


def get_persistent_results_dir() -> Optional[Path]:
    """Get the persistent results directory for storing pipeline results.
    
    Returns:
        Path to persistent results directory if available, None otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        results_dir = persistent_dir / "results"
        results_dir.mkdir(exist_ok=True)
        return results_dir
    return None


def get_persistent_logs_dir() -> Optional[Path]:
    """Get the persistent logs directory for storing application logs.
    
    Returns:
        Path to persistent logs directory if available, None otherwise.
    """
    persistent_dir = get_persistent_data_dir()
    if persistent_dir:
        logs_dir = persistent_dir / "logs"
        logs_dir.mkdir(exist_ok=True)
        return logs_dir
    return None


def get_storage_info() -> dict:
    """Get information about available storage.
    
    Returns:
        Dictionary with storage information.
    """
    info = {
        "persistent_available": is_persistent_storage_available(),
        "data_dir": None,
        "cache_dir": str(get_cache_dir()),
        "hf_home": str(get_hf_home_dir()),
        "storage_paths": {}
    }
    
    if info["persistent_available"]:
        data_dir = get_persistent_data_dir()
        info["data_dir"] = str(data_dir)
        
        # Check available space
        try:
            total, used, free = shutil.disk_usage(data_dir)
            info["storage_paths"] = {
                "total_gb": round(total / (1024**3), 2),
                "used_gb": round(used / (1024**3), 2),
                "free_gb": round(free / (1024**3), 2)
            }
        except OSError:
            pass
    
    return info