C2MV commited on
Commit
0ae971f
·
verified ·
1 Parent(s): 65a8746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -81
app.py CHANGED
@@ -334,6 +334,7 @@ class PaperDownloader:
334
  def _download_single_doi(self, doi, cancel_event): # removed async keyword
335
  """Descargar un único DOI con retroalimentación de progreso"""
336
  if not doi:
 
337
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
338
  logger.info(f"Starting download process for DOI: {doi}")
339
 
@@ -377,7 +378,7 @@ class PaperDownloader:
377
  """Download multiple DOIs"""
378
  # Validar entrada
379
  if not dois_text:
380
- return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
381
 
382
  # Sanitizar y filtrar DOIs
383
  # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
@@ -393,46 +394,46 @@ class PaperDownloader:
393
  downloaded_links = [] # Links de DOIs descargados
394
 
395
  for doi in dois:
396
- result = self._download_single_doi(doi,cancel_event) # now single methods directly
397
-
398
- if cancel_event.is_set():
399
- logger.info("Downloads cancelled on multiple dois download")
400
- return None,"Downloads cancelled","Downloads cancelled", ""# early return on cancelled
401
 
402
- if result is None:
403
- continue
404
 
405
- if isinstance(result, Exception):
406
- # Excepción inesperada
407
- error_msg = f"Unexpected error: {str(result)}"
408
- logger.error(f"Error downloading {doi}: {error_msg}")
409
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
 
 
 
 
 
 
410
 
411
- elif result[0] is None:
412
- # Descarga fallida (resultado de download_single_doi_async)
413
- error_msg = result[1]
414
- logger.warning(f"Failed to download {doi}: {error_msg}")
415
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
416
-
417
- else:
418
- # Descarga exitosa
419
- filepath = result[0]
420
 
421
- # Generar nombre de archivo único
422
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
423
- filepath_unique = os.path.join(self.output_dir, filename)
 
 
 
 
424
 
425
- try:
426
- # Renombrar archivo
427
- os.rename(filepath, filepath_unique)
428
-
429
  # Añadir a lista de archivos descargados
430
- downloaded_files.append(filepath_unique)
431
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
432
 
433
- except Exception as rename_error:
434
- logger.error(f"Error renaming file for {doi}: {rename_error}")
435
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
436
 
437
 
438
  # Crear archivo ZIP si hay archivos descargados
@@ -447,8 +448,9 @@ class PaperDownloader:
447
  lambda: self.create_zip(zip_filename, downloaded_files)
448
  )
449
  logger.info(f"ZIP file created: {zip_filename}")
450
-
451
- return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
 
452
 
453
  def process_bibtex(self, bib_file, cancel_event):# removed async here
454
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
@@ -475,22 +477,21 @@ class PaperDownloader:
475
  downloaded_files = []
476
  failed_dois = []
477
  downloaded_links = []
478
-
479
  for doi in dois:
480
- result= self._download_single_doi(doi,cancel_event) # calls function
481
-
482
  if cancel_event.is_set():
483
  logger.info("Download Cancelled in bibtex mode")
484
- return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
485
-
486
- if result is None: #skips nulls to perform continue on a multi processing with threads.
487
  continue;
488
-
489
  if isinstance(result, Exception):
490
  # Excepción inesperada
491
- error_msg = f"Unexpected error: {str(result)}"
492
- logger.error(f"Error downloading {doi}: {error_msg}")
493
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
494
 
495
  elif result[0] is None:
496
  # Descarga fallida (resultado de download_single_doi_async)
@@ -508,15 +509,14 @@ class PaperDownloader:
508
  os.rename(filepath, filepath_unique)
509
  downloaded_files.append(filepath_unique)
510
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
511
-
512
-
513
  if downloaded_files:
514
  zip_filename = 'papers.zip'
515
  loop = asyncio.get_running_loop()
516
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
517
  logger.info(f"ZIP file created: {zip_filename}")
518
-
519
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""# all callbacks done, sending empty logs so no value breaks on UI.
520
 
521
 
522
  def create_zip(self, zip_filename, files):
@@ -535,53 +535,53 @@ def create_gradio_interface():
535
  """Create Gradio interface for Paper Downloader"""
536
  downloader = PaperDownloader()
537
 
 
538
  def update_progress( message="", logs=""):
539
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
540
 
541
 
542
  def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
543
-
544
-
545
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
546
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
547
  if bib_file:
548
  # Check file type
549
  if not bib_file.name.lower().endswith('.bib'):
550
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
551
 
552
  downloader.download_task = downloader.executor.submit(
553
- downloader.process_bibtex,
554
- bib_file,
555
- cancel_event # Added cancelllation event.
556
- )
557
- zip_file, downloaded_dois, failed_dois, logs_text= downloader.download_task.result() #gets the values returned.
 
 
 
558
 
559
- return zip_file, downloaded_dois, failed_dois, logs_text, None # simple return
560
-
561
  elif doi_input:
562
 
563
  downloader.download_task = downloader.executor.submit( #changed async execution method
564
- downloader._download_single_doi,
565
- doi_input,
566
- cancel_event
567
- )
568
-
569
- filepath, message, error = downloader.download_task.result()#gets results, nothing more
570
- return None, message, error, "", filepath
 
571
 
572
  elif dois_input:
573
  downloader.download_task = downloader.executor.submit( #changed async execution method
574
- downloader.download_multiple_dois,
575
- dois_input,
576
- cancel_event #Add cancellation event.
577
- )
578
- zip_file, downloaded_dois, failed_dois, logs_text= downloader.download_task.result()
579
- return zip_file, downloaded_dois, failed_dois, logs_text, None
580
-
581
  else:
582
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None#all params provided
583
 
584
-
585
 
586
  with gr.Blocks(theme="Hev832/Applio", css="""
587
  .gradio-container {
@@ -642,13 +642,12 @@ def create_gradio_interface():
642
  submit_button.click(
643
  download_papers,
644
  inputs=[bib_file, doi_input, dois_input],
645
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
646
- )
647
-
648
-
649
  interface.title="🔬 Academic Paper Batch Downloader"
650
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
651
-
652
  return interface
653
 
654
  def main():
 
334
  def _download_single_doi(self, doi, cancel_event): # removed async keyword
335
  """Descargar un único DOI con retroalimentación de progreso"""
336
  if not doi:
337
+
338
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
339
  logger.info(f"Starting download process for DOI: {doi}")
340
 
 
378
  """Download multiple DOIs"""
379
  # Validar entrada
380
  if not dois_text:
381
+ return None, "Error: No DOIs provided", "Error: No DOIs provided",""
382
 
383
  # Sanitizar y filtrar DOIs
384
  # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
 
394
  downloaded_links = [] # Links de DOIs descargados
395
 
396
  for doi in dois:
397
+ result = self._download_single_doi(doi,cancel_event) # all downloads return their info
398
+
399
+ if cancel_event.is_set():
400
+ logger.info("Downloads cancelled on multiple dois download")
401
+ return None,"Downloads cancelled","Downloads cancelled", ""# early return on cancelled
402
 
403
+ if result is None:
404
+ continue
405
 
406
+ if isinstance(result, Exception):
407
+ # Excepción inesperada
408
+ error_msg = f"Unexpected error: {str(result)}"
409
+ logger.error(f"Error downloading {doi}: {error_msg}")
410
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
411
+
412
+ elif result[0] is None:
413
+ # Descarga fallida (resultado de download_single_doi_async)
414
+ error_msg = result[1]
415
+ logger.warning(f"Failed to download {doi}: {error_msg}")
416
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
417
 
418
+ else:
419
+ # Descarga exitosa
420
+ filepath = result[0]
 
 
 
 
 
 
421
 
422
+ # Generar nombre de archivo único
423
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
424
+ filepath_unique = os.path.join(self.output_dir, filename)
425
+
426
+ try:
427
+ # Renombrar archivo
428
+ os.rename(filepath, filepath_unique)
429
 
 
 
 
 
430
  # Añadir a lista de archivos descargados
431
+ downloaded_files.append(filepath_unique)
432
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
433
 
434
+ except Exception as rename_error:
435
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
436
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
437
 
438
 
439
  # Crear archivo ZIP si hay archivos descargados
 
448
  lambda: self.create_zip(zip_filename, downloaded_files)
449
  )
450
  logger.info(f"ZIP file created: {zip_filename}")
451
+
452
+
453
+ return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
454
 
455
  def process_bibtex(self, bib_file, cancel_event):# removed async here
456
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
 
477
  downloaded_files = []
478
  failed_dois = []
479
  downloaded_links = []
 
480
  for doi in dois:
481
+ result = self._download_single_doi(doi, cancel_event)
482
+
483
  if cancel_event.is_set():
484
  logger.info("Download Cancelled in bibtex mode")
485
+ return None, "Download Cancelled", "Download Cancelled" , ""
486
+
487
+ if result is None:
488
  continue;
489
+
490
  if isinstance(result, Exception):
491
  # Excepción inesperada
492
+ error_msg = f"Unexpected error: {str(result)}"
493
+ logger.error(f"Error downloading {doi}: {error_msg}")
494
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
495
 
496
  elif result[0] is None:
497
  # Descarga fallida (resultado de download_single_doi_async)
 
509
  os.rename(filepath, filepath_unique)
510
  downloaded_files.append(filepath_unique)
511
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
512
+
 
513
  if downloaded_files:
514
  zip_filename = 'papers.zip'
515
  loop = asyncio.get_running_loop()
516
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
517
  logger.info(f"ZIP file created: {zip_filename}")
518
+
519
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),""
520
 
521
 
522
  def create_zip(self, zip_filename, files):
 
535
  """Create Gradio interface for Paper Downloader"""
536
  downloader = PaperDownloader()
537
 
538
+
539
  def update_progress( message="", logs=""):
540
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
541
 
542
 
543
  def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
 
 
544
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
545
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
546
  if bib_file:
547
  # Check file type
548
  if not bib_file.name.lower().endswith('.bib'):
549
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
550
 
551
  downloader.download_task = downloader.executor.submit(
552
+ downloader.process_bibtex,
553
+ bib_file,
554
+ cancel_event
555
+ )
556
+
557
+ zip_file, downloaded_dois, failed_dois, logs_text = downloader.download_task.result()
558
+
559
+ return zip_file, downloaded_dois, failed_dois, logs_text, None #just direct return now no callbaccks or specific UI methods on callback functions
560
 
 
 
561
  elif doi_input:
562
 
563
  downloader.download_task = downloader.executor.submit( #changed async execution method
564
+ downloader._download_single_doi,
565
+ doi_input,
566
+ cancel_event
567
+ )
568
+
569
+ filepath, message, error = downloader.download_task.result()
570
+
571
+ return None, message, error, "", filepath #just direct return now no callbaccks or specific UI methods on callback functions
572
 
573
  elif dois_input:
574
  downloader.download_task = downloader.executor.submit( #changed async execution method
575
+ downloader.download_multiple_dois,
576
+ dois_input,
577
+ cancel_event
578
+ )
579
+
580
+ zip_file, downloaded_dois, failed_dois, logs_text = downloader.download_task.result()
581
+ return zip_file, downloaded_dois, failed_dois, logs_text, None #just direct return now no callbaccks or specific UI methods on callback functions
582
  else:
583
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None # must also have five values to satisfy gradio block method
584
 
 
585
 
586
  with gr.Blocks(theme="Hev832/Applio", css="""
587
  .gradio-container {
 
642
  submit_button.click(
643
  download_papers,
644
  inputs=[bib_file, doi_input, dois_input],
645
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
646
+ )
647
+
 
648
  interface.title="🔬 Academic Paper Batch Downloader"
649
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
650
+
651
  return interface
652
 
653
  def main():