File size: 14,259 Bytes
3124586
5cc62a6
3124586
 
 
 
5cc62a6
 
 
 
 
 
 
 
3124586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc62a6
 
 
 
 
 
 
 
3124586
 
 
 
 
 
 
 
 
 
 
5cc62a6
3124586
 
5cc62a6
 
 
 
 
 
 
 
 
3124586
 
 
 
 
5cc62a6
3124586
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc62a6
 
 
 
 
 
 
 
3124586
 
5cc62a6
3124586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc62a6
 
3124586
5cc62a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3124586
 
 
5cc62a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# utils/storage.py
__version__ = "0.1.0" # Added version
import os
import urllib.parse
import tempfile
import shutil
import json
import base64
from huggingface_hub import login, upload_folder, hf_hub_download, HfApi
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
from utils.constants import HF_API_TOKEN, upload_file_types, model_extensions, image_extensions, HF_REPO_ID, SHORTENER_JSON_FILE
from typing import Any, Dict, List, Tuple, Union

# see storage.md for detailed information about the storage module and its functions.

def generate_permalink(valid_files, base_url_external, permalink_viewer_url="surn-3d-viewer.hf.space"):
    """
    Given a list of valid files, checks if they contain exactly 1 model file and 2 image files.
    Constructs and returns a permalink URL with query parameters if the criteria is met.
    Otherwise, returns None.
    """
    model_link = None
    images_links = []
    for f in valid_files:
        filename = os.path.basename(f)
        ext = os.path.splitext(filename)[1].lower()
        if ext in model_extensions:
            if model_link is None:
                model_link = f"{base_url_external}/{filename}"
        elif ext in image_extensions:
            images_links.append(f"{base_url_external}/{filename}")
    if model_link and len(images_links) == 2:
        # Construct a permalink to the viewer project with query parameters.
        permalink_viewer_url = f"https://{permalink_viewer_url}/"
        params = {"3d": model_link, "hm": images_links[0], "image": images_links[1]}
        query_str = urllib.parse.urlencode(params)
        return f"{permalink_viewer_url}?{query_str}"
    return None

def generate_permalink_from_urls(model_url, hm_url, img_url, permalink_viewer_url="surn-3d-viewer.hf.space"):
    """
    Constructs and returns a permalink URL with query string parameters for the viewer.
    Each parameter is passed separately so that the image positions remain consistent.
    
    Parameters:
        model_url (str): Processed URL for the 3D model.
        hm_url (str): Processed URL for the height map image.
        img_url (str): Processed URL for the main image.
        permalink_viewer_url (str): The base viewer URL.
    
    Returns:
        str: The generated permalink URL.
    """
    import urllib.parse
    params = {"3d": model_url, "hm": hm_url, "image": img_url}
    query_str = urllib.parse.urlencode(params)
    return f"https://{permalink_viewer_url}/?{query_str}"

def upload_files_to_repo(
    files: List[Any],
    repo_id: str,
    folder_name: str,
    create_permalink: bool = False,
    repo_type: str = "dataset",
    permalink_viewer_url: str = "surn-3d-viewer.hf.space"
) -> Union[Dict[str, Any], List[Tuple[Any, str]]]:
    """
    Uploads multiple files to a Hugging Face repository using a batch upload approach via upload_folder.

    Parameters:
        files (list): A list of file paths (str) to upload.
        repo_id (str): The repository ID on Hugging Face for storage, e.g. "Surn/Storage".
        folder_name (str): The subfolder within the repository where files will be saved.
        create_permalink (bool): If True and if exactly three files are uploaded (1 model and 2 images),
                                 returns a single permalink to the project with query parameters.
                                 Otherwise, returns individual permalinks for each file.
        repo_type (str): Repository type ("space", "dataset", etc.). Default is "dataset".
        permalink_viewer_url (str): The base viewer URL.

    Returns:
        Union[Dict[str, Any], List[Tuple[Any, str]]]:
            If create_permalink is True and files match the criteria:
                dict: {
                    "response": <upload response>,
                    "permalink": <full_permalink URL>,
                    "short_permalink": <shortened permalink URL>
                }
            Otherwise:
                list: A list of tuples (response, permalink) for each file.
    """
    # Log in using the HF API token.
    login(token=HF_API_TOKEN)
    
    valid_files = []
    permalink_short = None
    
    # Ensure folder_name does not have a trailing slash.
    folder_name = folder_name.rstrip("/")
    
    # Filter for valid files based on allowed extensions.
    for f in files:
        file_name = f if isinstance(f, str) else f.name if hasattr(f, "name") else None
        if file_name is None:
            continue
        ext = os.path.splitext(file_name)[1].lower()
        if ext in upload_file_types:
            valid_files.append(f)
    
    if not valid_files:
        # Return a dictionary with None values for permalinks if create_permalink was True
        if create_permalink:
            return {
                "response": "No valid files to upload.",
                "permalink": None,
                "short_permalink": None
            }
        return [] 
    
    # Create a temporary directory; copy valid files directly into it.
    with tempfile.TemporaryDirectory(dir=os.getenv("TMPDIR", "/tmp")) as temp_dir:
        for file_path in valid_files:
            filename = os.path.basename(file_path)
            dest_path = os.path.join(temp_dir, filename)
            shutil.copy(file_path, dest_path)
        
        # Batch upload all files in the temporary folder.
        # Files will be uploaded under the folder (path_in_repo) given by folder_name.
        response = upload_folder(
            folder_path=temp_dir,
            repo_id=repo_id,
            repo_type=repo_type,
            path_in_repo=folder_name,
            commit_message="Batch upload files"
        )
    
    # Construct external URLs for each uploaded file.
    base_url_external = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{folder_name}"
    individual_links = []
    for file_path in valid_files:
        filename = os.path.basename(file_path)
        link = f"{base_url_external}/{filename}"
        individual_links.append(link)
    
    # If permalink creation is requested and exactly 3 valid files are provided,
    # try to generate a permalink using generate_permalink().
    if create_permalink: # No need to check len(valid_files) == 3 here, generate_permalink will handle it
        permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
        if permalink:
            status, short_id = gen_full_url(
                full_url=permalink,
                repo_id=HF_REPO_ID, # This comes from constants
                json_file=SHORTENER_JSON_FILE # This comes from constants
            )
            if status in ["created_short", "success_retrieved_short", "exists_match"]:
                permalink_short = f"https://{permalink_viewer_url}/?sid={short_id}"
            else: # Shortening failed or conflict not resolved to a usable short_id
                permalink_short = None 
                print(f"URL shortening status: {status} for {permalink}")

            return {
                "response": response,
                "permalink": permalink,
                "short_permalink": permalink_short
            }
        else: # generate_permalink returned None (criteria not met)
            return {
                "response": response, # Still return upload response
                "permalink": None,
                "short_permalink": None
            }

    # Otherwise, return individual tuples for each file.
    return [(response, link) for link in individual_links]

def _generate_short_id(length=8):
    """Generates a random base64 URL-safe string."""
    return base64.urlsafe_b64encode(os.urandom(length * 2))[:length].decode('utf-8')

def _get_json_from_repo(repo_id, json_file_name, repo_type="dataset"):
    """Downloads and loads the JSON file from the repo. Returns empty list if not found or error."""
    try:
        login(token=HF_API_TOKEN)
        json_path = hf_hub_download(
            repo_id=repo_id,
            filename=json_file_name,
            repo_type=repo_type,
            token=HF_API_TOKEN  # Added token for consistency, though login might suffice
        )
        with open(json_path, 'r') as f:
            data = json.load(f)
        os.remove(json_path) # Clean up downloaded file
        return data
    except RepositoryNotFoundError:
        print(f"Repository {repo_id} not found.")
        return []
    except EntryNotFoundError:
        print(f"JSON file {json_file_name} not found in {repo_id}. Initializing with empty list.")
        return []
    except json.JSONDecodeError:
        print(f"Error decoding JSON from {json_file_name}. Returning empty list.")
        return []
    except Exception as e:
        print(f"An unexpected error occurred while fetching {json_file_name}: {e}")
        return []

def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"):
    """Uploads the JSON data to the specified file in the repo."""
    try:
        login(token=HF_API_TOKEN)
        api = HfApi()
        # Use a temporary directory specified by TMPDIR or default to system temp
        temp_dir_for_json = os.getenv("TMPDIR", tempfile.gettempdir())
        os.makedirs(temp_dir_for_json, exist_ok=True)

        with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", dir=temp_dir_for_json) as tmp_file:
            json.dump(data, tmp_file, indent=2)
            tmp_file_path = tmp_file.name
        
        api.upload_file(
            path_or_fileobj=tmp_file_path,
            path_in_repo=json_file_name,
            repo_id=repo_id,
            repo_type=repo_type,
            commit_message=f"Update {json_file_name}"
        )
        os.remove(tmp_file_path) # Clean up temporary file
        return True
    except Exception as e:
        print(f"Failed to upload {json_file_name} to {repo_id}: {e}")
        if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path):
            os.remove(tmp_file_path) # Ensure cleanup on error too
        return False

def _find_url_in_json(data, short_url=None, full_url=None):
    """
    Searches the JSON data.
    If short_url is provided, returns the corresponding full_url or None.
    If full_url is provided, returns the corresponding short_url or None.
    """
    if not data: # Handles cases where data might be None or empty
        return None
    if short_url:
        for item in data:
            if item.get("short_url") == short_url:
                return item.get("full_url")
    if full_url:
        for item in data:
            if item.get("full_url") == full_url:
                return item.get("short_url")
    return None

def _add_url_to_json(data, short_url, full_url):
    """Adds a new short_url/full_url pair to the data. Returns updated data."""
    if data is None: 
        data = []
    data.append({"short_url": short_url, "full_url": full_url})
    return data

def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset", permalink_viewer_url="surn-3d-viewer.hf.space", json_file="shortener.json"):
    """
    Manages short URLs and their corresponding full URLs in a JSON file stored in a Hugging Face repository.

    - If short_url is provided, attempts to retrieve and return the full_url.
    - If full_url is provided, attempts to retrieve an existing short_url or creates a new one, stores it, and returns the short_url.
    - If both are provided, checks for consistency or creates a new entry.
    - If neither is provided, or repo_id is missing, returns an error status.

    Returns:
        tuple: (status_message, result_url)
               status_message can be "success", "created", "exists", "error", "not_found".
               result_url is the relevant URL (short or full) or None if an error occurs or not found.
    """
    if not repo_id:
        return "error_repo_id_missing", None
    if not short_url and not full_url:
        return "error_no_input", None

    login(token=HF_API_TOKEN) # Ensure login at the beginning
    url_data = _get_json_from_repo(repo_id, json_file, repo_type)

    # Case 1: Only short_url provided (lookup full_url)
    if short_url and not full_url:
        found_full_url = _find_url_in_json(url_data, short_url=short_url)
        return ("success_retrieved_full", found_full_url) if found_full_url else ("not_found_short", None)

    # Case 2: Only full_url provided (lookup or create short_url)
    if full_url and not short_url:
        existing_short_url = _find_url_in_json(url_data, full_url=full_url)
        if existing_short_url:
            return "success_retrieved_short", existing_short_url
        else:
            # Create new short_url
            new_short_id = _generate_short_id()
            url_data = _add_url_to_json(url_data, new_short_id, full_url)
            if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
                return "created_short", new_short_id 
            else:
                return "error_upload", None

    # Case 3: Both short_url and full_url provided
    if short_url and full_url:
        found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
        found_short_for_full = _find_url_in_json(url_data, full_url=full_url)

        if found_full_for_short == full_url: 
            return "exists_match", short_url 
        if found_full_for_short is not None and found_full_for_short != full_url: 
            return "error_conflict_short_exists_different_full", short_url
        if found_short_for_full is not None and found_short_for_full != short_url:
            return "error_conflict_full_exists_different_short", found_short_for_full
        
        # If short_url is provided and not found, or full_url is provided and not found,
        # or neither is found, then create a new entry with the provided short_url and full_url.
        # This effectively allows specifying a custom short_url if it's not already taken.
        url_data = _add_url_to_json(url_data, short_url, full_url)
        if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
            return "created_specific_pair", short_url
        else:
            return "error_upload", None
                
    return "error_unhandled_case", None # Should not be reached