Jobey1 commited on
Commit
a42bf4a
·
verified ·
1 Parent(s): b7a9a73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -31
app.py CHANGED
@@ -127,45 +127,27 @@ def extract_full_paper_with_labels(pdf_path, progress=None):
127
 
128
  def upload_with_progress(file_path, repo_id, token, progress):
129
  """
130
- Upload file to Hugging Face Dataset with progress tracking.
131
  """
132
- import requests # Ensure this is imported if not already
 
 
 
133
  print(f"📤 Starting upload of Parquet: {file_path}")
134
  file_size = os.path.getsize(file_path)
135
 
136
  api = HfApi()
137
 
138
  try:
139
- # Open the file in binary read mode
140
- with open(file_path, 'rb') as f:
141
- chunk_size = 1024 * 1024 # 1 MB chunks
142
- uploaded = 0
143
-
144
- # Prepare headers
145
- headers = {
146
- "Authorization": f"Bearer {token}"
147
- }
148
-
149
- # Construct upload URL
150
- upload_url = f"https://huggingface.co/api/datasets/{repo_id}/upload"
151
-
152
- while True:
153
- chunk = f.read(chunk_size)
154
- if not chunk:
155
- break # Finished reading file
156
-
157
- # Upload chunk
158
- response = requests.put(upload_url, headers=headers, data=chunk)
159
-
160
- if response.status_code != 200:
161
- raise Exception(f"Upload failed: {response.text}")
162
-
163
- # Update progress
164
- uploaded += len(chunk)
165
- if progress is not None:
166
- progress(uploaded / file_size, desc=f"Uploading... {uploaded // (1024 * 1024)}MB/{file_size // (1024 * 1024)}MB")
167
 
168
- # Final progress update
169
  if progress is not None:
170
  progress(1, desc="✅ Upload Complete")
171
 
 
127
 
128
  def upload_with_progress(file_path, repo_id, token, progress):
129
  """
130
+ Upload file to Hugging Face Dataset using the official SDK with progress tracking.
131
  """
132
+ from huggingface_hub import HfApi
133
+ from huggingface_hub.utils import HfHubHTTPError
134
+ import os
135
+
136
  print(f"📤 Starting upload of Parquet: {file_path}")
137
  file_size = os.path.getsize(file_path)
138
 
139
  api = HfApi()
140
 
141
  try:
142
+ # Official upload method from huggingface_hub
143
+ api.upload_file(
144
+ path_or_fileobj=file_path,
145
+ path_in_repo=os.path.basename(file_path),
146
+ repo_id=repo_id,
147
+ repo_type="dataset", # Important to specify it's a dataset repo
148
+ token=token
149
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
 
151
  if progress is not None:
152
  progress(1, desc="✅ Upload Complete")
153