Jobey1 commited on
Commit
e263e01
Β·
verified Β·
1 Parent(s): a42bf4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -13
app.py CHANGED
@@ -127,27 +127,42 @@ def extract_full_paper_with_labels(pdf_path, progress=None):
127
 
128
  def upload_with_progress(file_path, repo_id, token, progress):
129
  """
130
- Upload file to Hugging Face Dataset using the official SDK with progress tracking.
131
  """
132
- from huggingface_hub import HfApi
133
- from huggingface_hub.utils import HfHubHTTPError
134
- import os
135
 
136
  print(f"πŸ“€ Starting upload of Parquet: {file_path}")
137
  file_size = os.path.getsize(file_path)
138
-
139
  api = HfApi()
140
 
 
 
 
141
  try:
142
- # βœ… Official upload method from huggingface_hub
143
- api.upload_file(
144
- path_or_fileobj=file_path,
145
- path_in_repo=os.path.basename(file_path),
146
- repo_id=repo_id,
147
- repo_type="dataset", # Important to specify it's a dataset repo
148
- token=token
149
- )
 
 
 
 
 
150
 
 
 
 
 
 
 
 
 
 
 
 
151
  if progress is not None:
152
  progress(1, desc="βœ… Upload Complete")
153
 
@@ -162,6 +177,7 @@ def upload_with_progress(file_path, repo_id, token, progress):
162
  return f"❌ Unexpected error: {str(e)}"
163
 
164
 
 
165
  def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choice, progress=gr.Progress()):
166
  upload_message = ""
167
 
 
127
 
128
  def upload_with_progress(file_path, repo_id, token, progress):
129
  """
130
+ Upload file to Hugging Face Dataset with progress tracking.
131
  """
 
 
 
132
 
133
  print(f"πŸ“€ Starting upload of Parquet: {file_path}")
134
  file_size = os.path.getsize(file_path)
 
135
  api = HfApi()
136
 
137
+ # Get the proper upload URL from the Hugging Face API
138
+ upload_url = f"https://huggingface.co/api/datasets/{repo_id}/upload"
139
+
140
  try:
141
+ with open(file_path, 'rb') as f:
142
+ chunk_size = 1024 * 1024 # 1 MB chunks
143
+ uploaded = 0
144
+
145
+ headers = {
146
+ "Authorization": f"Bearer {token}",
147
+ "Content-Type": "application/octet-stream"
148
+ }
149
+
150
+ while True:
151
+ chunk = f.read(chunk_size)
152
+ if not chunk:
153
+ break # Finished reading file
154
 
155
+ response = requests.put(upload_url, headers=headers, data=chunk)
156
+
157
+ if response.status_code != 200:
158
+ raise Exception(f"Upload failed: {response.text}")
159
+
160
+ # Update progress after each chunk
161
+ uploaded += len(chunk)
162
+ if progress is not None:
163
+ progress(uploaded / file_size, desc=f"Uploading... {uploaded // (1024 * 1024)}MB/{file_size // (1024 * 1024)}MB")
164
+
165
+ # Final progress update
166
  if progress is not None:
167
  progress(1, desc="βœ… Upload Complete")
168
 
 
177
  return f"❌ Unexpected error: {str(e)}"
178
 
179
 
180
+
181
  def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choice, progress=gr.Progress()):
182
  upload_message = ""
183