Spaces:
Running
on
Zero
Running
on
Zero
# utils/storage.py | |
__version__ = "0.1.0" # Added version | |
import os | |
import urllib.parse | |
import tempfile | |
import shutil | |
import json | |
import base64 | |
from huggingface_hub import login, upload_folder, hf_hub_download, HfApi | |
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError | |
from utils.constants import HF_API_TOKEN, upload_file_types, model_extensions, image_extensions, HF_REPO_ID, SHORTENER_JSON_FILE | |
from typing import Any, Dict, List, Tuple, Union | |
# see storage.md for detailed information about the storage module and its functions. | |
def generate_permalink(valid_files, base_url_external, permalink_viewer_url="surn-3d-viewer.hf.space"): | |
""" | |
Given a list of valid files, checks if they contain exactly 1 model file and 2 image files. | |
Constructs and returns a permalink URL with query parameters if the criteria is met. | |
Otherwise, returns None. | |
""" | |
model_link = None | |
images_links = [] | |
for f in valid_files: | |
filename = os.path.basename(f) | |
ext = os.path.splitext(filename)[1].lower() | |
if ext in model_extensions: | |
if model_link is None: | |
model_link = f"{base_url_external}/{filename}" | |
elif ext in image_extensions: | |
images_links.append(f"{base_url_external}/{filename}") | |
if model_link and len(images_links) == 2: | |
# Construct a permalink to the viewer project with query parameters. | |
permalink_viewer_url = f"https://{permalink_viewer_url}/" | |
params = {"3d": model_link, "hm": images_links[0], "image": images_links[1]} | |
query_str = urllib.parse.urlencode(params) | |
return f"{permalink_viewer_url}?{query_str}" | |
return None | |
def generate_permalink_from_urls(model_url, hm_url, img_url, permalink_viewer_url="surn-3d-viewer.hf.space"): | |
""" | |
Constructs and returns a permalink URL with query string parameters for the viewer. | |
Each parameter is passed separately so that the image positions remain consistent. | |
Parameters: | |
model_url (str): Processed URL for the 3D model. | |
hm_url (str): Processed URL for the height map image. | |
img_url (str): Processed URL for the main image. | |
permalink_viewer_url (str): The base viewer URL. | |
Returns: | |
str: The generated permalink URL. | |
""" | |
import urllib.parse | |
params = {"3d": model_url, "hm": hm_url, "image": img_url} | |
query_str = urllib.parse.urlencode(params) | |
return f"https://{permalink_viewer_url}/?{query_str}" | |
def upload_files_to_repo( | |
files: List[Any], | |
repo_id: str, | |
folder_name: str, | |
create_permalink: bool = False, | |
repo_type: str = "dataset", | |
permalink_viewer_url: str = "surn-3d-viewer.hf.space" | |
) -> Union[Dict[str, Any], List[Tuple[Any, str]]]: | |
""" | |
Uploads multiple files to a Hugging Face repository using a batch upload approach via upload_folder. | |
Parameters: | |
files (list): A list of file paths (str) to upload. | |
repo_id (str): The repository ID on Hugging Face for storage, e.g. "Surn/Storage". | |
folder_name (str): The subfolder within the repository where files will be saved. | |
create_permalink (bool): If True and if exactly three files are uploaded (1 model and 2 images), | |
returns a single permalink to the project with query parameters. | |
Otherwise, returns individual permalinks for each file. | |
repo_type (str): Repository type ("space", "dataset", etc.). Default is "dataset". | |
permalink_viewer_url (str): The base viewer URL. | |
Returns: | |
Union[Dict[str, Any], List[Tuple[Any, str]]]: | |
If create_permalink is True and files match the criteria: | |
dict: { | |
"response": <upload response>, | |
"permalink": <full_permalink URL>, | |
"short_permalink": <shortened permalink URL> | |
} | |
Otherwise: | |
list: A list of tuples (response, permalink) for each file. | |
""" | |
# Log in using the HF API token. | |
login(token=HF_API_TOKEN) | |
valid_files = [] | |
permalink_short = None | |
# Ensure folder_name does not have a trailing slash. | |
folder_name = folder_name.rstrip("/") | |
# Filter for valid files based on allowed extensions. | |
for f in files: | |
file_name = f if isinstance(f, str) else f.name if hasattr(f, "name") else None | |
if file_name is None: | |
continue | |
ext = os.path.splitext(file_name)[1].lower() | |
if ext in upload_file_types: | |
valid_files.append(f) | |
if not valid_files: | |
# Return a dictionary with None values for permalinks if create_permalink was True | |
if create_permalink: | |
return { | |
"response": "No valid files to upload.", | |
"permalink": None, | |
"short_permalink": None | |
} | |
return [] | |
# Create a temporary directory; copy valid files directly into it. | |
with tempfile.TemporaryDirectory(dir=os.getenv("TMPDIR", "/tmp")) as temp_dir: | |
for file_path in valid_files: | |
filename = os.path.basename(file_path) | |
dest_path = os.path.join(temp_dir, filename) | |
shutil.copy(file_path, dest_path) | |
# Batch upload all files in the temporary folder. | |
# Files will be uploaded under the folder (path_in_repo) given by folder_name. | |
response = upload_folder( | |
folder_path=temp_dir, | |
repo_id=repo_id, | |
repo_type=repo_type, | |
path_in_repo=folder_name, | |
commit_message="Batch upload files" | |
) | |
# Construct external URLs for each uploaded file. | |
base_url_external = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{folder_name}" | |
individual_links = [] | |
for file_path in valid_files: | |
filename = os.path.basename(file_path) | |
link = f"{base_url_external}/{filename}" | |
individual_links.append(link) | |
# If permalink creation is requested and exactly 3 valid files are provided, | |
# try to generate a permalink using generate_permalink(). | |
if create_permalink: # No need to check len(valid_files) == 3 here, generate_permalink will handle it | |
permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url) | |
if permalink: | |
status, short_id = gen_full_url( | |
full_url=permalink, | |
repo_id=HF_REPO_ID, # This comes from constants | |
json_file=SHORTENER_JSON_FILE # This comes from constants | |
) | |
if status in ["created_short", "success_retrieved_short", "exists_match"]: | |
permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}" | |
else: # Shortening failed or conflict not resolved to a usable short_id | |
permalink_short = None | |
print(f"URL shortening status: {status} for {permalink}") | |
return { | |
"response": response, | |
"permalink": permalink, | |
"short_permalink": permalink_short | |
} | |
else: # generate_permalink returned None (criteria not met) | |
return { | |
"response": response, # Still return upload response | |
"permalink": None, | |
"short_permalink": None | |
} | |
# Otherwise, return individual tuples for each file. | |
return [(response, link) for link in individual_links] | |
def _generate_short_id(length=8): | |
"""Generates a random base64 URL-safe string.""" | |
return base64.urlsafe_b64encode(os.urandom(length * 2))[:length].decode('utf-8') | |
def _get_json_from_repo(repo_id, json_file_name, repo_type="dataset"): | |
"""Downloads and loads the JSON file from the repo. Returns empty list if not found or error.""" | |
try: | |
login(token=HF_API_TOKEN) | |
json_path = hf_hub_download( | |
repo_id=repo_id, | |
filename=json_file_name, | |
repo_type=repo_type, | |
token=HF_API_TOKEN # Added token for consistency, though login might suffice | |
) | |
with open(json_path, 'r') as f: | |
data = json.load(f) | |
os.remove(json_path) # Clean up downloaded file | |
return data | |
except RepositoryNotFoundError: | |
print(f"Repository {repo_id} not found.") | |
return [] | |
except EntryNotFoundError: | |
print(f"JSON file {json_file_name} not found in {repo_id}. Initializing with empty list.") | |
return [] | |
except json.JSONDecodeError: | |
print(f"Error decoding JSON from {json_file_name}. Returning empty list.") | |
return [] | |
except Exception as e: | |
print(f"An unexpected error occurred while fetching {json_file_name}: {e}") | |
return [] | |
def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"): | |
"""Uploads the JSON data to the specified file in the repo.""" | |
try: | |
login(token=HF_API_TOKEN) | |
api = HfApi() | |
# Use a temporary directory specified by TMPDIR or default to system temp | |
temp_dir_for_json = os.getenv("TMPDIR", tempfile.gettempdir()) | |
os.makedirs(temp_dir_for_json, exist_ok=True) | |
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", dir=temp_dir_for_json) as tmp_file: | |
json.dump(data, tmp_file, indent=2) | |
tmp_file_path = tmp_file.name | |
api.upload_file( | |
path_or_fileobj=tmp_file_path, | |
path_in_repo=json_file_name, | |
repo_id=repo_id, | |
repo_type=repo_type, | |
commit_message=f"Update {json_file_name}" | |
) | |
os.remove(tmp_file_path) # Clean up temporary file | |
return True | |
except Exception as e: | |
print(f"Failed to upload {json_file_name} to {repo_id}: {e}") | |
if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path): | |
os.remove(tmp_file_path) # Ensure cleanup on error too | |
return False | |
def _find_url_in_json(data, short_url=None, full_url=None): | |
""" | |
Searches the JSON data. | |
If short_url is provided, returns the corresponding full_url or None. | |
If full_url is provided, returns the corresponding short_url or None. | |
""" | |
if not data: # Handles cases where data might be None or empty | |
return None | |
if short_url: | |
for item in data: | |
if item.get("short_url") == short_url: | |
return item.get("full_url") | |
if full_url: | |
for item in data: | |
if item.get("full_url") == full_url: | |
return item.get("short_url") | |
return None | |
def _add_url_to_json(data, short_url, full_url): | |
"""Adds a new short_url/full_url pair to the data. Returns updated data.""" | |
if data is None: | |
data = [] | |
data.append({"short_url": short_url, "full_url": full_url}) | |
return data | |
def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset", permalink_viewer_url="surn-3d-viewer.hf.space", json_file="shortener.json"): | |
""" | |
Manages short URLs and their corresponding full URLs in a JSON file stored in a Hugging Face repository. | |
- If short_url is provided, attempts to retrieve and return the full_url. | |
- If full_url is provided, attempts to retrieve an existing short_url or creates a new one, stores it, and returns the short_url. | |
- If both are provided, checks for consistency or creates a new entry. | |
- If neither is provided, or repo_id is missing, returns an error status. | |
Returns: | |
tuple: (status_message, result_url) | |
status_message can be "success", "created", "exists", "error", "not_found". | |
result_url is the relevant URL (short or full) or None if an error occurs or not found. | |
""" | |
if not repo_id: | |
return "error_repo_id_missing", None | |
if not short_url and not full_url: | |
return "error_no_input", None | |
login(token=HF_API_TOKEN) # Ensure login at the beginning | |
url_data = _get_json_from_repo(repo_id, json_file, repo_type) | |
# Case 1: Only short_url provided (lookup full_url) | |
if short_url and not full_url: | |
found_full_url = _find_url_in_json(url_data, short_url=short_url) | |
return ("success_retrieved_full", found_full_url) if found_full_url else ("not_found_short", None) | |
# Case 2: Only full_url provided (lookup or create short_url) | |
if full_url and not short_url: | |
existing_short_url = _find_url_in_json(url_data, full_url=full_url) | |
if existing_short_url: | |
return "success_retrieved_short", existing_short_url | |
else: | |
# Create new short_url | |
new_short_id = _generate_short_id() | |
url_data = _add_url_to_json(url_data, new_short_id, full_url) | |
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type): | |
return "created_short", new_short_id | |
else: | |
return "error_upload", None | |
# Case 3: Both short_url and full_url provided | |
if short_url and full_url: | |
found_full_for_short = _find_url_in_json(url_data, short_url=short_url) | |
found_short_for_full = _find_url_in_json(url_data, full_url=full_url) | |
if found_full_for_short == full_url: | |
return "exists_match", short_url | |
if found_full_for_short is not None and found_full_for_short != full_url: | |
return "error_conflict_short_exists_different_full", short_url | |
if found_short_for_full is not None and found_short_for_full != short_url: | |
return "error_conflict_full_exists_different_short", found_short_for_full | |
# If short_url is provided and not found, or full_url is provided and not found, | |
# or neither is found, then create a new entry with the provided short_url and full_url. | |
# This effectively allows specifying a custom short_url if it's not already taken. | |
url_data = _add_url_to_json(url_data, short_url, full_url) | |
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type): | |
return "created_specific_pair", short_url | |
else: | |
return "error_upload", None | |
return "error_unhandled_case", None # Should not be reached | |