Jobey1 commited on
Commit
b891f10
·
verified ·
1 Parent(s): 43a7a2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -125,19 +125,18 @@ def extract_full_paper_with_labels(pdf_path, progress=None):
125
  def process_pdf_file(pdf_file, api_key, repo_address):
126
  if pdf_file is None:
127
  return None, "No PDF file uploaded."
128
- # Extract content from PDF.
129
- # pdf_file can be a file-like object or a dict depending on how Gradio returns it.
130
  file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file['name']
131
  result = extract_full_paper_with_labels(file_path)
132
 
133
- # Convert the result dictionary into a DataFrame and write it to a parquet file.
134
  df = pd.DataFrame([result])
135
  base = os.path.splitext(result['filename'])[0]
136
  parquet_filename = f"{base}.parquet"
137
  df.to_parquet(parquet_filename, index=False)
138
 
139
  repo_status = ""
140
- # If API key and repo address are provided, attempt to upload the parquet file.
141
  if api_key and repo_address:
142
  api = HfApi()
143
  try:
@@ -153,10 +152,9 @@ def process_pdf_file(pdf_file, api_key, repo_address):
153
  else:
154
  repo_status = "API key or repo address not provided, skipping repo upload."
155
 
156
- # Return the parquet file for local download and the status message.
157
  return parquet_filename, repo_status
158
 
159
- # Function to clear only file-related inputs/outputs, preserving the API key and repo address.
160
  def clear_files():
161
  return None, None, ""
162
 
@@ -178,7 +176,6 @@ with gr.Blocks() as demo:
178
  inputs=[pdf_file_input, api_key_input, repo_address_input],
179
  outputs=[download_file_output, repo_status_output]
180
  )
181
- # The clear button now only clears file-related components; API key and Repo Address remain unchanged.
182
  clear_button.click(
183
  clear_files,
184
  inputs=None,
@@ -186,3 +183,4 @@ with gr.Blocks() as demo:
186
  )
187
 
188
  demo.launch()
 
 
125
  def process_pdf_file(pdf_file, api_key, repo_address):
126
  if pdf_file is None:
127
  return None, "No PDF file uploaded."
128
+
129
+ # Determine file path (Gradio returns a file object or dict)
130
  file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file['name']
131
  result = extract_full_paper_with_labels(file_path)
132
 
133
+ # Convert the result dictionary to a DataFrame and write it as a parquet file.
134
  df = pd.DataFrame([result])
135
  base = os.path.splitext(result['filename'])[0]
136
  parquet_filename = f"{base}.parquet"
137
  df.to_parquet(parquet_filename, index=False)
138
 
139
  repo_status = ""
 
140
  if api_key and repo_address:
141
  api = HfApi()
142
  try:
 
152
  else:
153
  repo_status = "API key or repo address not provided, skipping repo upload."
154
 
 
155
  return parquet_filename, repo_status
156
 
157
+ # Clear only file-related inputs/outputs, preserving API key and repo address.
158
  def clear_files():
159
  return None, None, ""
160
 
 
176
  inputs=[pdf_file_input, api_key_input, repo_address_input],
177
  outputs=[download_file_output, repo_status_output]
178
  )
 
179
  clear_button.click(
180
  clear_files,
181
  inputs=None,
 
183
  )
184
 
185
  demo.launch()
186
+