Spaces:
Sleeping
Sleeping
Lisa Dunlap
Add persistent storage support for Hugging Face Spaces - Enhanced app.py with automatic persistent storage detection - Added comprehensive persistent storage utilities - Added documentation and examples - Automatic HF_HOME and cache configuration for /data directory
f850bde
""" | |
Example usage of persistent storage utilities for Hugging Face Spaces. | |
This file demonstrates how to use the persistent storage utilities | |
for saving and loading data in Hugging Face Spaces. | |
""" | |
import json | |
import pandas as pd | |
from pathlib import Path | |
from .persistent_storage import ( | |
get_persistent_data_dir, | |
get_cache_dir, | |
get_hf_home_dir, | |
save_data_to_persistent, | |
load_data_from_persistent, | |
save_uploaded_file, | |
list_persistent_files, | |
delete_persistent_file, | |
is_persistent_storage_available, | |
get_storage_info | |
) | |
def example_save_results(results_data: dict, experiment_name: str): | |
"""Example: Save pipeline results to persistent storage. | |
Args: | |
results_data: Dictionary containing pipeline results | |
experiment_name: Name of the experiment | |
""" | |
if not is_persistent_storage_available(): | |
print("β οΈ Persistent storage not available - skipping save") | |
return None | |
# Save results as JSON | |
results_json = json.dumps(results_data, indent=2) | |
results_bytes = results_json.encode('utf-8') | |
filename = f"{experiment_name}_results.json" | |
saved_path = save_data_to_persistent( | |
data=results_bytes, | |
filename=filename, | |
subdirectory="experiments" | |
) | |
if saved_path: | |
print(f"β Saved results to: {saved_path}") | |
return saved_path | |
else: | |
print("β Failed to save results") | |
return None | |
def example_load_results(experiment_name: str): | |
"""Example: Load pipeline results from persistent storage. | |
Args: | |
experiment_name: Name of the experiment | |
Returns: | |
Dictionary containing the loaded results or None | |
""" | |
filename = f"{experiment_name}_results.json" | |
results_bytes = load_data_from_persistent( | |
filename=filename, | |
subdirectory="experiments" | |
) | |
if results_bytes: | |
results_data = json.loads(results_bytes.decode('utf-8')) | |
print(f"β Loaded results from: {filename}") | |
return results_data | |
else: | |
print(f"β No results found for: {filename}") | |
return None | |
def example_save_dataframe(df: pd.DataFrame, filename: str): | |
"""Example: Save a pandas DataFrame to persistent storage. | |
Args: | |
df: DataFrame to save | |
filename: Name of the file (with .parquet extension) | |
""" | |
if not is_persistent_storage_available(): | |
print("β οΈ Persistent storage not available - skipping save") | |
return None | |
# Convert DataFrame to parquet bytes | |
try: | |
parquet_bytes = df.to_parquet() | |
saved_path = save_data_to_persistent( | |
data=parquet_bytes, | |
filename=filename, | |
subdirectory="dataframes" | |
) | |
if saved_path: | |
print(f"β Saved DataFrame to: {saved_path}") | |
return saved_path | |
else: | |
print("β Failed to save DataFrame") | |
return None | |
except Exception as e: | |
print(f"β Error saving DataFrame: {e}") | |
return None | |
def example_list_saved_files(): | |
"""Example: List all files saved in persistent storage.""" | |
if not is_persistent_storage_available(): | |
print("β οΈ Persistent storage not available") | |
return [] | |
print("π Files in persistent storage:") | |
# List all files | |
all_files = list_persistent_files() | |
if all_files: | |
for file in all_files: | |
print(f" - {file.name}") | |
else: | |
print(" No files found") | |
# List experiment files | |
experiment_files = list_persistent_files(subdirectory="experiments", pattern="*.json") | |
if experiment_files: | |
print("\n㪠Experiment files:") | |
for file in experiment_files: | |
print(f" - {file.name}") | |
# List dataframe files | |
dataframe_files = list_persistent_files(subdirectory="dataframes", pattern="*.parquet") | |
if dataframe_files: | |
print("\nπ DataFrame files:") | |
for file in dataframe_files: | |
print(f" - {file.name}") | |
return all_files | |
def example_storage_cleanup(days_old: int = 30): | |
"""Example: Clean up old files from persistent storage. | |
Args: | |
days_old: Delete files older than this many days | |
""" | |
if not is_persistent_storage_available(): | |
print("β οΈ Persistent storage not available") | |
return | |
import time | |
from datetime import datetime, timedelta | |
cutoff_time = time.time() - (days_old * 24 * 60 * 60) | |
print(f"π§Ή Cleaning up files older than {days_old} days...") | |
# List all files and check their modification time | |
all_files = list_persistent_files() | |
deleted_count = 0 | |
for file in all_files: | |
if file.stat().st_mtime < cutoff_time: | |
if delete_persistent_file(file.name): | |
print(f"ποΈ Deleted: {file.name}") | |
deleted_count += 1 | |
print(f"β Cleanup complete - deleted {deleted_count} files") | |
def example_storage_info(): | |
"""Example: Display information about persistent storage.""" | |
info = get_storage_info() | |
print("π Persistent Storage Information:") | |
print(f" Available: {info['persistent_available']}") | |
if info['persistent_available']: | |
print(f" Data directory: {info['data_dir']}") | |
print(f" Cache directory: {info['cache_dir']}") | |
print(f" HF Home: {info['hf_home']}") | |
if info['storage_paths']: | |
print(f" Total storage: {info['storage_paths']['total_gb']:.1f}GB") | |
print(f" Used storage: {info['storage_paths']['used_gb']:.1f}GB") | |
print(f" Free storage: {info['storage_paths']['free_gb']:.1f}GB") | |
# Calculate usage percentage | |
usage_pct = (info['storage_paths']['used_gb'] / info['storage_paths']['total_gb']) * 100 | |
print(f" Usage: {usage_pct:.1f}%") | |
# Example usage in a Gradio app | |
def example_gradio_integration(): | |
"""Example: How to integrate persistent storage with Gradio.""" | |
def save_uploaded_data(uploaded_file): | |
"""Save a file uploaded through Gradio.""" | |
if uploaded_file: | |
saved_path = save_uploaded_file(uploaded_file, "user_upload.txt") | |
if saved_path: | |
return f"β File saved to persistent storage: {saved_path.name}" | |
else: | |
return "β Failed to save file - persistent storage not available" | |
return "β οΈ No file uploaded" | |
def load_user_data(): | |
"""Load previously uploaded data.""" | |
data_bytes = load_data_from_persistent("user_upload.txt") | |
if data_bytes: | |
return data_bytes.decode('utf-8') | |
return "No data found" | |
# This would be used in a Gradio interface like: | |
# import gradio as gr | |
# | |
# with gr.Blocks() as demo: | |
# file_input = gr.File(label="Upload file") | |
# upload_btn = gr.Button("Save to persistent storage") | |
# download_btn = gr.Button("Load from persistent storage") | |
# | |
# upload_btn.click(save_uploaded_data, inputs=[file_input]) | |
# download_btn.click(load_user_data) | |
if __name__ == "__main__": | |
# Run examples | |
print("π Persistent Storage Examples") | |
print("=" * 40) | |
example_storage_info() | |
print() | |
example_list_saved_files() | |
print() | |
# Example: Save some test data | |
test_data = {"experiment": "test", "results": [1, 2, 3], "timestamp": "2024-01-01"} | |
example_save_results(test_data, "test_experiment") | |
print() | |
# Example: Load the test data | |
loaded_data = example_load_results("test_experiment") | |
if loaded_data: | |
print(f"π Loaded data: {loaded_data}") | |
print() | |
# Example: List files again | |
example_list_saved_files() |