Update app.py
Browse files
app.py
CHANGED
@@ -127,45 +127,27 @@ def extract_full_paper_with_labels(pdf_path, progress=None):
|
|
127 |
|
128 |
def upload_with_progress(file_path, repo_id, token, progress):
|
129 |
"""
|
130 |
-
Upload file to Hugging Face Dataset with progress tracking.
|
131 |
"""
|
132 |
-
|
|
|
|
|
|
|
133 |
print(f"📤 Starting upload of Parquet: {file_path}")
|
134 |
file_size = os.path.getsize(file_path)
|
135 |
|
136 |
api = HfApi()
|
137 |
|
138 |
try:
|
139 |
-
#
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
#
|
145 |
-
|
146 |
-
|
147 |
-
}
|
148 |
-
|
149 |
-
# Construct upload URL
|
150 |
-
upload_url = f"https://huggingface.co/api/datasets/{repo_id}/upload"
|
151 |
-
|
152 |
-
while True:
|
153 |
-
chunk = f.read(chunk_size)
|
154 |
-
if not chunk:
|
155 |
-
break # Finished reading file
|
156 |
-
|
157 |
-
# Upload chunk
|
158 |
-
response = requests.put(upload_url, headers=headers, data=chunk)
|
159 |
-
|
160 |
-
if response.status_code != 200:
|
161 |
-
raise Exception(f"Upload failed: {response.text}")
|
162 |
-
|
163 |
-
# Update progress
|
164 |
-
uploaded += len(chunk)
|
165 |
-
if progress is not None:
|
166 |
-
progress(uploaded / file_size, desc=f"Uploading... {uploaded // (1024 * 1024)}MB/{file_size // (1024 * 1024)}MB")
|
167 |
|
168 |
-
# Final progress update
|
169 |
if progress is not None:
|
170 |
progress(1, desc="✅ Upload Complete")
|
171 |
|
|
|
127 |
|
128 |
def upload_with_progress(file_path, repo_id, token, progress):
|
129 |
"""
|
130 |
+
Upload file to Hugging Face Dataset using the official SDK with progress tracking.
|
131 |
"""
|
132 |
+
from huggingface_hub import HfApi
|
133 |
+
from huggingface_hub.utils import HfHubHTTPError
|
134 |
+
import os
|
135 |
+
|
136 |
print(f"📤 Starting upload of Parquet: {file_path}")
|
137 |
file_size = os.path.getsize(file_path)
|
138 |
|
139 |
api = HfApi()
|
140 |
|
141 |
try:
|
142 |
+
# ✅ Official upload method from huggingface_hub
|
143 |
+
api.upload_file(
|
144 |
+
path_or_fileobj=file_path,
|
145 |
+
path_in_repo=os.path.basename(file_path),
|
146 |
+
repo_id=repo_id,
|
147 |
+
repo_type="dataset", # Important to specify it's a dataset repo
|
148 |
+
token=token
|
149 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
|
|
151 |
if progress is not None:
|
152 |
progress(1, desc="✅ Upload Complete")
|
153 |
|