C2MV commited on
Commit
0e74018
·
verified ·
1 Parent(s): 707bdc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -283,7 +283,6 @@ class PaperDownloader:
283
  logger.error(f"Error processing {doi}: {e}")
284
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
285
 
286
-
287
  async def download_multiple_dois_async(self, dois_text):
288
  """Downloads multiple papers from a list of DOIs"""
289
  if not dois_text:
@@ -299,13 +298,12 @@ class PaperDownloader:
299
  for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
300
  filepath, success_message, fail_message = await self.download_single_doi_async(doi)
301
  if filepath:
302
- # Unique filename for zip
303
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
304
  filepath_unique = os.path.join(self.output_dir, filename)
305
  os.rename(filepath, filepath_unique)
306
  downloaded_files.append(filepath_unique)
307
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
308
-
309
  else:
310
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
311
 
@@ -318,7 +316,6 @@ class PaperDownloader:
318
 
319
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
320
 
321
-
322
  async def process_bibtex_async(self, bib_file):
323
  """Process BibTeX file and download papers with multiple strategies"""
324
  # Read BibTeX file content from the uploaded object
@@ -327,14 +324,14 @@ class PaperDownloader:
327
  bib_content = f.read()
328
  except Exception as e:
329
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
330
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None
331
 
332
  # Parse BibTeX data
333
  try:
334
  bib_database = bibtexparser.loads(bib_content)
335
  except Exception as e:
336
  logger.error(f"Error parsing BibTeX data: {e}")
337
- return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
338
 
339
  # Extract DOIs
340
  dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
@@ -346,7 +343,7 @@ class PaperDownloader:
346
  downloaded_links = []
347
 
348
  # Download PDFs
349
- for i,doi in enumerate(tqdm(dois, desc="Downloading papers")):
350
  try:
351
  # Try to download with multiple methods with retries
352
  pdf_content = await self.download_with_retry_async(doi)
@@ -354,7 +351,7 @@ class PaperDownloader:
354
  # Save PDF
355
  if pdf_content:
356
  if doi is None:
357
- return None, "Error: DOI not provided", "Error: DOI not provided", None
358
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
359
  filepath = os.path.join(self.output_dir, filename)
360
 
@@ -379,8 +376,7 @@ class PaperDownloader:
379
  zipf.write(file_path, arcname=os.path.basename(file_path))
380
  logger.info(f"ZIP file created: {zip_filename}")
381
 
382
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
383
-
384
 
385
  def create_gradio_interface():
386
  """Create Gradio interface for Paper Downloader"""
@@ -392,7 +388,7 @@ def create_gradio_interface():
392
  if not bib_file.name.lower().endswith('.bib'):
393
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
394
 
395
- zip_path, downloaded_dois, failed_dois, = await downloader.process_bibtex_async(bib_file)
396
  return zip_path, downloaded_dois, failed_dois, None
397
  elif doi_input:
398
  filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input)
@@ -403,6 +399,7 @@ def create_gradio_interface():
403
  else:
404
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
405
 
 
406
  # Gradio Interface
407
  interface = gr.Interface(
408
  fn=download_papers,
@@ -429,7 +426,7 @@ def create_gradio_interface():
429
  <div id="failed-dois"></div>
430
  </div>
431
  """),
432
- gr.File(label="Downloaded Single PDF")
433
  ],
434
  title="🔬 Academic Paper Batch Downloader",
435
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
 
283
  logger.error(f"Error processing {doi}: {e}")
284
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
285
 
 
286
  async def download_multiple_dois_async(self, dois_text):
287
  """Downloads multiple papers from a list of DOIs"""
288
  if not dois_text:
 
298
  for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
299
  filepath, success_message, fail_message = await self.download_single_doi_async(doi)
300
  if filepath:
301
+ # Unique filename for zip
302
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
303
  filepath_unique = os.path.join(self.output_dir, filename)
304
  os.rename(filepath, filepath_unique)
305
  downloaded_files.append(filepath_unique)
306
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
 
307
  else:
308
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
309
 
 
316
 
317
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
318
 
 
319
  async def process_bibtex_async(self, bib_file):
320
  """Process BibTeX file and download papers with multiple strategies"""
321
  # Read BibTeX file content from the uploaded object
 
324
  bib_content = f.read()
325
  except Exception as e:
326
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
327
+ return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
328
 
329
  # Parse BibTeX data
330
  try:
331
  bib_database = bibtexparser.loads(bib_content)
332
  except Exception as e:
333
  logger.error(f"Error parsing BibTeX data: {e}")
334
+ return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
335
 
336
  # Extract DOIs
337
  dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
 
343
  downloaded_links = []
344
 
345
  # Download PDFs
346
+ for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
347
  try:
348
  # Try to download with multiple methods with retries
349
  pdf_content = await self.download_with_retry_async(doi)
 
351
  # Save PDF
352
  if pdf_content:
353
  if doi is None:
354
+ return None, "Error: DOI not provided", "Error: DOI not provided"
355
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
356
  filepath = os.path.join(self.output_dir, filename)
357
 
 
376
  zipf.write(file_path, arcname=os.path.basename(file_path))
377
  logger.info(f"ZIP file created: {zip_filename}")
378
 
379
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois)
 
380
 
381
  def create_gradio_interface():
382
  """Create Gradio interface for Paper Downloader"""
 
388
  if not bib_file.name.lower().endswith('.bib'):
389
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
390
 
391
+ zip_path, downloaded_dois, failed_dois = await downloader.process_bibtex_async(bib_file)
392
  return zip_path, downloaded_dois, failed_dois, None
393
  elif doi_input:
394
  filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input)
 
399
  else:
400
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
401
 
402
+
403
  # Gradio Interface
404
  interface = gr.Interface(
405
  fn=download_papers,
 
426
  <div id="failed-dois"></div>
427
  </div>
428
  """),
429
+ gr.File(label="Downloaded Single PDF")
430
  ],
431
  title="🔬 Academic Paper Batch Downloader",
432
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",