C2MV commited on
Commit
131ee16
verified
1 Parent(s): a600bee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -84
app.py CHANGED
@@ -338,6 +338,7 @@ class PaperDownloader:
338
  logger.info(f"Starting download process for DOI: {doi}")
339
 
340
  try:
 
341
  pdf_content = await self.download_with_retry_async(doi)
342
  if pdf_content:
343
  logger.info(f"Downloaded PDF for DOI: {doi}")
@@ -346,9 +347,9 @@ class PaperDownloader:
346
 
347
  # Escribir contenido del PDF
348
 
349
- with open(filepath, 'wb') as f: # context is useful here
350
- f.write(pdf_content)
351
-
352
  logger.info(f"Saved PDF to file: {filepath}")
353
 
354
  logger.info(f"Descarga exitosa: {filename}")
@@ -360,13 +361,14 @@ class PaperDownloader:
360
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
361
 
362
  except CancelledError:
363
- logger.info(f"Download Cancelled DOI: {doi}")
364
- return None, f"Download cancelled {doi}","Download Cancelled"
365
 
366
  except Exception as e:
367
- logger.error(f"Error processing {doi}: {e}")
368
- return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
369
-
 
370
  async def download_multiple_dois(self, dois_text, cancel_event):
371
  """Download multiple DOIs"""
372
  if not dois_text:
@@ -387,47 +389,46 @@ class PaperDownloader:
387
 
388
  for doi in dois:
389
 
390
- result = await self._download_single_doi(doi,cancel_event)
391
-
392
  if cancel_event.is_set():
393
- logger.info("Downloads cancelled on multiple dois download")
394
- return None, "Downloads cancelled","Downloads cancelled","" # early return on cancelled
395
 
396
  if result is None:
397
- continue
 
398
 
399
  if isinstance(result, Exception):
400
- # Excepci贸n inesperada
401
- error_msg = f"Unexpected error: {str(result)}"
402
- logger.error(f"Error downloading {doi}: {error_msg}")
403
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
404
-
405
  elif result[0] is None:
406
- # Descarga fallida (resultado de download_single_doi_async)
407
  error_msg = result[1]
408
  logger.warning(f"Failed to download {doi}: {error_msg}")
409
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
410
 
411
  else:
412
  # Descarga exitosa
413
- filepath = result[0]
414
 
415
  # Generar nombre de archivo 煤nico
416
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
417
  filepath_unique = os.path.join(self.output_dir, filename)
418
 
419
  try:
420
- # Renombrar archivo
421
  os.rename(filepath, filepath_unique)
422
-
423
- # A帽adir a lista de archivos descargados
424
  downloaded_files.append(filepath_unique)
425
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
426
 
427
  except Exception as rename_error:
428
- logger.error(f"Error renaming file for {doi}: {rename_error}")
429
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
430
-
431
 
432
 
433
  # Crear archivo ZIP si hay archivos descargados
@@ -442,9 +443,10 @@ class PaperDownloader:
442
  lambda: self.create_zip(zip_filename, downloaded_files)
443
  )
444
  logger.info(f"ZIP file created: {zip_filename}")
 
445
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
446
-
447
- async def process_bibtex(self, bib_file, cancel_event):# removed async here
448
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
449
  # Read BibTeX file content from the uploaded object
450
  try:
@@ -452,7 +454,7 @@ class PaperDownloader:
452
  bib_content = f.read()
453
  except Exception as e:
454
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
455
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
456
 
457
  # Parse BibTeX data
458
  try:
@@ -469,47 +471,47 @@ class PaperDownloader:
469
  downloaded_files = []
470
  failed_dois = []
471
  downloaded_links = []
472
-
473
  for doi in dois:
474
- result = await self._download_single_doi(doi, cancel_event)
475
-
476
- if cancel_event.is_set():
477
  logger.info("Download Cancelled in bibtex mode")
478
- return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
479
-
480
- if result is None:
481
- continue
482
 
483
- if isinstance(result, Exception):
484
- # Excepci贸n inesperada
485
- error_msg = f"Unexpected error: {str(result)}"
486
- logger.error(f"Error downloading {doi}: {error_msg}")
487
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
488
 
489
- elif result[0] is None:
490
- # Descarga fallida (resultado de download_single_doi_async)
491
- error_msg = result[1]
492
- logger.warning(f"Failed to download {doi}: {error_msg}")
493
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
494
 
495
- else:
496
- # Descarga exitosa
497
- filepath = result[0]
498
-
499
- # Unique filename for zip
500
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
501
- filepath_unique = os.path.join(self.output_dir, filename)
502
- os.rename(filepath, filepath_unique)
503
- downloaded_files.append(filepath_unique)
504
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
505
-
506
  if downloaded_files:
507
  zip_filename = 'papers.zip'
508
  loop = asyncio.get_running_loop()
509
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
510
  logger.info(f"ZIP file created: {zip_filename}")
511
-
512
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
513
 
514
  def create_zip(self, zip_filename, files):
515
  """Crea un archivo zip con los pdfs descargados"""
@@ -526,34 +528,34 @@ class PaperDownloader:
526
  def create_gradio_interface():
527
  """Create Gradio interface for Paper Downloader"""
528
  downloader = PaperDownloader()
 
 
529
  def update_progress( message="", logs=""):
530
- return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
531
 
532
 
533
  async def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
534
- cancel_event = asyncio.Event() # Create cancellation event for every submission.
535
- downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
536
- if bib_file:
537
- # Check file type
538
- if not bib_file.name.lower().endswith('.bib'):
539
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
540
-
541
- zip_file, downloaded_dois, failed_dois, logs_text = await downloader.process_bibtex(bib_file, cancel_event)
542
-
543
- return zip_file, downloaded_dois, failed_dois, logs_text, None
544
- elif doi_input:
545
 
546
- filepath, message, error = await downloader._download_single_doi(doi_input,cancel_event)
 
 
547
 
548
- return None, message, error,"",filepath
 
 
 
549
 
550
- elif dois_input:
551
- zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input, cancel_event)
552
- return zip_file, downloaded_dois, failed_dois, logs_text, None
553
-
554
- else:
555
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None # direct return values, no callbacks
556
-
557
 
558
  with gr.Blocks(theme="Hev832/Applio", css="""
559
  .gradio-container {
@@ -612,10 +614,11 @@ def create_gradio_interface():
612
  stop_button.click(lambda: downloader.cancel_download(), outputs=None) # added function in object downloader
613
 
614
  submit_button.click(
615
- download_papers,
616
- inputs=[bib_file, doi_input, dois_input],
617
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
618
  )
 
619
  interface.title="馃敩 Academic Paper Batch Downloader"
620
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
621
 
 
338
  logger.info(f"Starting download process for DOI: {doi}")
339
 
340
  try:
341
+
342
  pdf_content = await self.download_with_retry_async(doi)
343
  if pdf_content:
344
  logger.info(f"Downloaded PDF for DOI: {doi}")
 
347
 
348
  # Escribir contenido del PDF
349
 
350
+ with open(filepath, 'wb') as f:
351
+ f.write(pdf_content)
352
+
353
  logger.info(f"Saved PDF to file: {filepath}")
354
 
355
  logger.info(f"Descarga exitosa: {filename}")
 
361
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
362
 
363
  except CancelledError:
364
+ logger.info(f"Download Cancelled DOI: {doi}")
365
+ return None, f"Download cancelled {doi}","Download Cancelled"
366
 
367
  except Exception as e:
368
+ logger.error(f"Error processing {doi}: {e}")
369
+ return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
370
+
371
+
372
  async def download_multiple_dois(self, dois_text, cancel_event):
373
  """Download multiple DOIs"""
374
  if not dois_text:
 
389
 
390
  for doi in dois:
391
 
392
+ result = await self._download_single_doi(doi,cancel_event) #await all of it and only collect results
 
393
  if cancel_event.is_set():
394
+ logger.info("Downloads cancelled on multiple dois download")
395
+ return None, "Downloads cancelled","Downloads cancelled", ""# early return on cancelled
396
 
397
  if result is None:
398
+ continue
399
+
400
 
401
  if isinstance(result, Exception):
402
+ # Excepci贸n inesperada
403
+ error_msg = f"Unexpected error: {str(result)}"
404
+ logger.error(f"Error downloading {doi}: {error_msg}")
405
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
406
+
407
  elif result[0] is None:
408
+ # Descarga fallida (resultado de download_single_doi_async)
409
  error_msg = result[1]
410
  logger.warning(f"Failed to download {doi}: {error_msg}")
411
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
412
 
413
  else:
414
  # Descarga exitosa
415
+ filepath = result[0]
416
 
417
  # Generar nombre de archivo 煤nico
418
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
419
  filepath_unique = os.path.join(self.output_dir, filename)
420
 
421
  try:
422
+ # Renombrar archivo
423
  os.rename(filepath, filepath_unique)
424
+
425
+ # A帽adir a lista de archivos descargados
426
  downloaded_files.append(filepath_unique)
427
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
428
 
429
  except Exception as rename_error:
430
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
431
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
 
432
 
433
 
434
  # Crear archivo ZIP si hay archivos descargados
 
443
  lambda: self.create_zip(zip_filename, downloaded_files)
444
  )
445
  logger.info(f"ZIP file created: {zip_filename}")
446
+
447
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
448
+
449
+ async def process_bibtex(self, bib_file, cancel_event):
450
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
451
  # Read BibTeX file content from the uploaded object
452
  try:
 
454
  bib_content = f.read()
455
  except Exception as e:
456
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
457
+ return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}",""
458
 
459
  # Parse BibTeX data
460
  try:
 
471
  downloaded_files = []
472
  failed_dois = []
473
  downloaded_links = []
474
+
475
  for doi in dois:
476
+ result = await self._download_single_doi(doi, cancel_event)
477
+
478
+ if cancel_event.is_set():
479
  logger.info("Download Cancelled in bibtex mode")
480
+ return None, "Download Cancelled", "Download Cancelled", "" # early return on cancelled
481
+
482
+ if result is None:
483
+ continue
484
 
485
+ if isinstance(result, Exception):
486
+ # Excepci贸n inesperada
487
+ error_msg = f"Unexpected error: {str(result)}"
488
+ logger.error(f"Error downloading {doi}: {error_msg}")
489
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
490
 
491
+ elif result[0] is None:
492
+ # Descarga fallida (resultado de download_single_doi_async)
493
+ error_msg = result[1]
494
+ logger.warning(f"Failed to download {doi}: {error_msg}")
495
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
496
 
497
+ else:
498
+ # Descarga exitosa
499
+ filepath = result[0]
500
+
501
+ # Unique filename for zip
502
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
503
+ filepath_unique = os.path.join(self.output_dir, filename)
504
+ os.rename(filepath, filepath_unique)
505
+ downloaded_files.append(filepath_unique)
506
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
507
+
508
  if downloaded_files:
509
  zip_filename = 'papers.zip'
510
  loop = asyncio.get_running_loop()
511
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
512
  logger.info(f"ZIP file created: {zip_filename}")
513
+
514
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),""
515
 
516
  def create_zip(self, zip_filename, files):
517
  """Crea un archivo zip con los pdfs descargados"""
 
528
  def create_gradio_interface():
529
  """Create Gradio interface for Paper Downloader"""
530
  downloader = PaperDownloader()
531
+
532
+
533
  def update_progress( message="", logs=""):
534
+ return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
535
 
536
 
537
  async def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
538
+ cancel_event = asyncio.Event() # Create cancellation event for every submission.
539
+ downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
540
+
541
+ if bib_file:
542
+ # Check file type
543
+ if not bib_file.name.lower().endswith('.bib'):
544
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None
545
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
546
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
 
 
547
 
548
+ elif doi_input:
549
+ filepath, message, error= await downloader._download_single_doi(doi_input,cancel_event)
550
+ return None, message, error, "", filepath
551
 
552
+ elif dois_input:
553
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input,cancel_event)
554
+
555
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
556
 
557
+ else:
558
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs","", None
 
 
 
 
 
559
 
560
  with gr.Blocks(theme="Hev832/Applio", css="""
561
  .gradio-container {
 
614
  stop_button.click(lambda: downloader.cancel_download(), outputs=None) # added function in object downloader
615
 
616
  submit_button.click(
617
+ download_papers,
618
+ inputs=[bib_file, doi_input, dois_input],
619
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
620
  )
621
+
622
  interface.title="馃敩 Academic Paper Batch Downloader"
623
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
624