Jobey1 commited on
Commit
09053ce
Β·
verified Β·
1 Parent(s): c8cd30b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -154,22 +154,20 @@ def upload_with_progress(file_path, repo_id, token, progress):
154
  print(f"❌ Unexpected error: {e}")
155
  return f"❌ Unexpected error: {str(e)}"
156
 
157
- def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choice, progress=gr.Progress()):
158
  all_data = []
159
 
160
  total_files = len(pdf_files)
161
  print("πŸš€ Starting PDF to Parquet Conversion Process")
162
 
163
  for idx, pdf_file in enumerate(pdf_files):
164
- if progress is not None:
165
  progress(idx / total_files, desc=f"Processing File {idx + 1}/{total_files}")
166
 
167
- # βœ… Step 1: Process PDF with Full Labels
168
  extracted_data = extract_full_paper_with_labels(pdf_file.name, progress=progress)
169
  all_data.append(extracted_data)
170
 
171
  print("🟑 Converting Processed Data to Parquet")
172
- # βœ… Step 2: Convert to Parquet
173
  df = pd.DataFrame(all_data)
174
  parquet_file = 'fully_labeled_papers.parquet'
175
 
@@ -178,11 +176,11 @@ def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choic
178
  print("βœ… Parquet Conversion Completed")
179
  except Exception as e:
180
  print(f"❌ Parquet Conversion Failed: {str(e)}")
181
- return None, f"❌ Parquet Conversion Failed: {str(e)}"
182
 
183
  upload_message = "Skipped Upload"
184
 
185
- # βœ… Step 3: Upload Parquet (if selected)
186
  if action_choice in ["Upload to Hugging Face", "Both"]:
187
  try:
188
  upload_message = upload_with_progress(parquet_file, dataset_repo_id, hf_token, progress)
@@ -191,7 +189,13 @@ def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choic
191
  upload_message = f"❌ Upload failed: {str(e)}"
192
 
193
  print("🏁 Process Completed")
194
- return parquet_file, upload_message
 
 
 
 
 
 
195
 
196
  # βœ… Gradio Interface
197
  iface = gr.Interface(
 
154
  print(f"❌ Unexpected error: {e}")
155
  return f"❌ Unexpected error: {str(e)}"
156
 
157
+ def pdf_to_parquet_and_upload(pdf_files, hf_token, dataset_repo_id, action_choice, state, progress=gr.Progress()):
158
  all_data = []
159
 
160
  total_files = len(pdf_files)
161
  print("πŸš€ Starting PDF to Parquet Conversion Process")
162
 
163
  for idx, pdf_file in enumerate(pdf_files):
164
+ if progress:
165
  progress(idx / total_files, desc=f"Processing File {idx + 1}/{total_files}")
166
 
 
167
  extracted_data = extract_full_paper_with_labels(pdf_file.name, progress=progress)
168
  all_data.append(extracted_data)
169
 
170
  print("🟑 Converting Processed Data to Parquet")
 
171
  df = pd.DataFrame(all_data)
172
  parquet_file = 'fully_labeled_papers.parquet'
173
 
 
176
  print("βœ… Parquet Conversion Completed")
177
  except Exception as e:
178
  print(f"❌ Parquet Conversion Failed: {str(e)}")
179
+ return None, f"❌ Parquet Conversion Failed: {str(e)}", state
180
 
181
  upload_message = "Skipped Upload"
182
 
183
+ # βœ… Upload Parquet if selected
184
  if action_choice in ["Upload to Hugging Face", "Both"]:
185
  try:
186
  upload_message = upload_with_progress(parquet_file, dataset_repo_id, hf_token, progress)
 
189
  upload_message = f"❌ Upload failed: {str(e)}"
190
 
191
  print("🏁 Process Completed")
192
+
193
+ # βœ… Clear Uploaded PDFs and Parquet File
194
+ if os.path.exists(parquet_file):
195
+ os.remove(parquet_file)
196
+ print("πŸ—‘οΈ Parquet file cleared after processing.")
197
+
198
+ return None, upload_message, state
199
 
200
  # βœ… Gradio Interface
201
  iface = gr.Interface(