Spaces:
Running
on
Zero
Running
on
Zero
File size: 14,259 Bytes
3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 3124586 5cc62a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
# utils/storage.py
__version__ = "0.1.0" # Added version
import os
import urllib.parse
import tempfile
import shutil
import json
import base64
from huggingface_hub import login, upload_folder, hf_hub_download, HfApi
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
from utils.constants import HF_API_TOKEN, upload_file_types, model_extensions, image_extensions, HF_REPO_ID, SHORTENER_JSON_FILE
from typing import Any, Dict, List, Tuple, Union
# see storage.md for detailed information about the storage module and its functions.
def generate_permalink(valid_files, base_url_external, permalink_viewer_url="surn-3d-viewer.hf.space"):
"""
Given a list of valid files, checks if they contain exactly 1 model file and 2 image files.
Constructs and returns a permalink URL with query parameters if the criteria is met.
Otherwise, returns None.
"""
model_link = None
images_links = []
for f in valid_files:
filename = os.path.basename(f)
ext = os.path.splitext(filename)[1].lower()
if ext in model_extensions:
if model_link is None:
model_link = f"{base_url_external}/{filename}"
elif ext in image_extensions:
images_links.append(f"{base_url_external}/{filename}")
if model_link and len(images_links) == 2:
# Construct a permalink to the viewer project with query parameters.
permalink_viewer_url = f"https://{permalink_viewer_url}/"
params = {"3d": model_link, "hm": images_links[0], "image": images_links[1]}
query_str = urllib.parse.urlencode(params)
return f"{permalink_viewer_url}?{query_str}"
return None
def generate_permalink_from_urls(model_url, hm_url, img_url, permalink_viewer_url="surn-3d-viewer.hf.space"):
"""
Constructs and returns a permalink URL with query string parameters for the viewer.
Each parameter is passed separately so that the image positions remain consistent.
Parameters:
model_url (str): Processed URL for the 3D model.
hm_url (str): Processed URL for the height map image.
img_url (str): Processed URL for the main image.
permalink_viewer_url (str): The base viewer URL.
Returns:
str: The generated permalink URL.
"""
import urllib.parse
params = {"3d": model_url, "hm": hm_url, "image": img_url}
query_str = urllib.parse.urlencode(params)
return f"https://{permalink_viewer_url}/?{query_str}"
def upload_files_to_repo(
files: List[Any],
repo_id: str,
folder_name: str,
create_permalink: bool = False,
repo_type: str = "dataset",
permalink_viewer_url: str = "surn-3d-viewer.hf.space"
) -> Union[Dict[str, Any], List[Tuple[Any, str]]]:
"""
Uploads multiple files to a Hugging Face repository using a batch upload approach via upload_folder.
Parameters:
files (list): A list of file paths (str) to upload.
repo_id (str): The repository ID on Hugging Face for storage, e.g. "Surn/Storage".
folder_name (str): The subfolder within the repository where files will be saved.
create_permalink (bool): If True and if exactly three files are uploaded (1 model and 2 images),
returns a single permalink to the project with query parameters.
Otherwise, returns individual permalinks for each file.
repo_type (str): Repository type ("space", "dataset", etc.). Default is "dataset".
permalink_viewer_url (str): The base viewer URL.
Returns:
Union[Dict[str, Any], List[Tuple[Any, str]]]:
If create_permalink is True and files match the criteria:
dict: {
"response": <upload response>,
"permalink": <full_permalink URL>,
"short_permalink": <shortened permalink URL>
}
Otherwise:
list: A list of tuples (response, permalink) for each file.
"""
# Log in using the HF API token.
login(token=HF_API_TOKEN)
valid_files = []
permalink_short = None
# Ensure folder_name does not have a trailing slash.
folder_name = folder_name.rstrip("/")
# Filter for valid files based on allowed extensions.
for f in files:
file_name = f if isinstance(f, str) else f.name if hasattr(f, "name") else None
if file_name is None:
continue
ext = os.path.splitext(file_name)[1].lower()
if ext in upload_file_types:
valid_files.append(f)
if not valid_files:
# Return a dictionary with None values for permalinks if create_permalink was True
if create_permalink:
return {
"response": "No valid files to upload.",
"permalink": None,
"short_permalink": None
}
return []
# Create a temporary directory; copy valid files directly into it.
with tempfile.TemporaryDirectory(dir=os.getenv("TMPDIR", "/tmp")) as temp_dir:
for file_path in valid_files:
filename = os.path.basename(file_path)
dest_path = os.path.join(temp_dir, filename)
shutil.copy(file_path, dest_path)
# Batch upload all files in the temporary folder.
# Files will be uploaded under the folder (path_in_repo) given by folder_name.
response = upload_folder(
folder_path=temp_dir,
repo_id=repo_id,
repo_type=repo_type,
path_in_repo=folder_name,
commit_message="Batch upload files"
)
# Construct external URLs for each uploaded file.
base_url_external = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{folder_name}"
individual_links = []
for file_path in valid_files:
filename = os.path.basename(file_path)
link = f"{base_url_external}/{filename}"
individual_links.append(link)
# If permalink creation is requested and exactly 3 valid files are provided,
# try to generate a permalink using generate_permalink().
if create_permalink: # No need to check len(valid_files) == 3 here, generate_permalink will handle it
permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
if permalink:
status, short_id = gen_full_url(
full_url=permalink,
repo_id=HF_REPO_ID, # This comes from constants
json_file=SHORTENER_JSON_FILE # This comes from constants
)
if status in ["created_short", "success_retrieved_short", "exists_match"]:
permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}"
else: # Shortening failed or conflict not resolved to a usable short_id
permalink_short = None
print(f"URL shortening status: {status} for {permalink}")
return {
"response": response,
"permalink": permalink,
"short_permalink": permalink_short
}
else: # generate_permalink returned None (criteria not met)
return {
"response": response, # Still return upload response
"permalink": None,
"short_permalink": None
}
# Otherwise, return individual tuples for each file.
return [(response, link) for link in individual_links]
def _generate_short_id(length=8):
"""Generates a random base64 URL-safe string."""
return base64.urlsafe_b64encode(os.urandom(length * 2))[:length].decode('utf-8')
def _get_json_from_repo(repo_id, json_file_name, repo_type="dataset"):
"""Downloads and loads the JSON file from the repo. Returns empty list if not found or error."""
try:
login(token=HF_API_TOKEN)
json_path = hf_hub_download(
repo_id=repo_id,
filename=json_file_name,
repo_type=repo_type,
token=HF_API_TOKEN # Added token for consistency, though login might suffice
)
with open(json_path, 'r') as f:
data = json.load(f)
os.remove(json_path) # Clean up downloaded file
return data
except RepositoryNotFoundError:
print(f"Repository {repo_id} not found.")
return []
except EntryNotFoundError:
print(f"JSON file {json_file_name} not found in {repo_id}. Initializing with empty list.")
return []
except json.JSONDecodeError:
print(f"Error decoding JSON from {json_file_name}. Returning empty list.")
return []
except Exception as e:
print(f"An unexpected error occurred while fetching {json_file_name}: {e}")
return []
def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"):
"""Uploads the JSON data to the specified file in the repo."""
try:
login(token=HF_API_TOKEN)
api = HfApi()
# Use a temporary directory specified by TMPDIR or default to system temp
temp_dir_for_json = os.getenv("TMPDIR", tempfile.gettempdir())
os.makedirs(temp_dir_for_json, exist_ok=True)
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", dir=temp_dir_for_json) as tmp_file:
json.dump(data, tmp_file, indent=2)
tmp_file_path = tmp_file.name
api.upload_file(
path_or_fileobj=tmp_file_path,
path_in_repo=json_file_name,
repo_id=repo_id,
repo_type=repo_type,
commit_message=f"Update {json_file_name}"
)
os.remove(tmp_file_path) # Clean up temporary file
return True
except Exception as e:
print(f"Failed to upload {json_file_name} to {repo_id}: {e}")
if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path):
os.remove(tmp_file_path) # Ensure cleanup on error too
return False
def _find_url_in_json(data, short_url=None, full_url=None):
"""
Searches the JSON data.
If short_url is provided, returns the corresponding full_url or None.
If full_url is provided, returns the corresponding short_url or None.
"""
if not data: # Handles cases where data might be None or empty
return None
if short_url:
for item in data:
if item.get("short_url") == short_url:
return item.get("full_url")
if full_url:
for item in data:
if item.get("full_url") == full_url:
return item.get("short_url")
return None
def _add_url_to_json(data, short_url, full_url):
"""Adds a new short_url/full_url pair to the data. Returns updated data."""
if data is None:
data = []
data.append({"short_url": short_url, "full_url": full_url})
return data
def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset", permalink_viewer_url="surn-3d-viewer.hf.space", json_file="shortener.json"):
"""
Manages short URLs and their corresponding full URLs in a JSON file stored in a Hugging Face repository.
- If short_url is provided, attempts to retrieve and return the full_url.
- If full_url is provided, attempts to retrieve an existing short_url or creates a new one, stores it, and returns the short_url.
- If both are provided, checks for consistency or creates a new entry.
- If neither is provided, or repo_id is missing, returns an error status.
Returns:
tuple: (status_message, result_url)
status_message can be "success", "created", "exists", "error", "not_found".
result_url is the relevant URL (short or full) or None if an error occurs or not found.
"""
if not repo_id:
return "error_repo_id_missing", None
if not short_url and not full_url:
return "error_no_input", None
login(token=HF_API_TOKEN) # Ensure login at the beginning
url_data = _get_json_from_repo(repo_id, json_file, repo_type)
# Case 1: Only short_url provided (lookup full_url)
if short_url and not full_url:
found_full_url = _find_url_in_json(url_data, short_url=short_url)
return ("success_retrieved_full", found_full_url) if found_full_url else ("not_found_short", None)
# Case 2: Only full_url provided (lookup or create short_url)
if full_url and not short_url:
existing_short_url = _find_url_in_json(url_data, full_url=full_url)
if existing_short_url:
return "success_retrieved_short", existing_short_url
else:
# Create new short_url
new_short_id = _generate_short_id()
url_data = _add_url_to_json(url_data, new_short_id, full_url)
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
return "created_short", new_short_id
else:
return "error_upload", None
# Case 3: Both short_url and full_url provided
if short_url and full_url:
found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
found_short_for_full = _find_url_in_json(url_data, full_url=full_url)
if found_full_for_short == full_url:
return "exists_match", short_url
if found_full_for_short is not None and found_full_for_short != full_url:
return "error_conflict_short_exists_different_full", short_url
if found_short_for_full is not None and found_short_for_full != short_url:
return "error_conflict_full_exists_different_short", found_short_for_full
# If short_url is provided and not found, or full_url is provided and not found,
# or neither is found, then create a new entry with the provided short_url and full_url.
# This effectively allows specifying a custom short_url if it's not already taken.
url_data = _add_url_to_json(url_data, short_url, full_url)
if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
return "created_specific_pair", short_url
else:
return "error_upload", None
return "error_unhandled_case", None # Should not be reached
|