C2MV commited on
Commit
65a8746
verified
1 Parent(s): bc356d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -93
app.py CHANGED
@@ -334,7 +334,6 @@ class PaperDownloader:
334
  def _download_single_doi(self, doi, cancel_event): # removed async keyword
335
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
336
  if not doi:
337
-
338
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
339
  logger.info(f"Starting download process for DOI: {doi}")
340
 
@@ -363,23 +362,22 @@ class PaperDownloader:
363
  logger.warning(f"No se pudo descargar: {doi}")
364
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
365
 
366
- filepath, message, error = asyncio.run(call_async()) #added the loop event here
367
- return filepath, message, error
368
  except CancelledError:
369
  logger.info(f"Download Cancelled DOI: {doi}")
370
- return None, f"Download cancelled {doi}","Download Cancelled"
371
 
372
  except Exception as e:
373
  logger.error(f"Error processing {doi}: {e}")
374
- return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
375
 
376
 
377
  def download_multiple_dois(self, dois_text, cancel_event): #removed async here
378
  """Download multiple DOIs"""
379
  # Validar entrada
380
  if not dois_text:
381
-
382
- return None, "Error: No DOIs provided", "Error: No DOIs provided",""
383
 
384
  # Sanitizar y filtrar DOIs
385
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
@@ -387,27 +385,25 @@ class PaperDownloader:
387
 
388
  # Validar lista de DOIs
389
  if not dois:
390
-
391
- return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
392
 
393
  # Listas para rastrear resultados
394
  downloaded_files = [] # Rutas de archivos descargados
395
  failed_dois = [] # DOIs que no se pudieron descargar
396
  downloaded_links = [] # Links de DOIs descargados
 
397
  for doi in dois:
398
-
399
- result = self._download_single_doi(doi,cancel_event)
400
 
401
  if cancel_event.is_set():
402
-
403
  logger.info("Downloads cancelled on multiple dois download")
404
- return None,"Downloads cancelled","Downloads cancelled","" # early return on cancelled
405
-
406
- if result is None: #when errors happen results are none
407
- continue
408
- # Manejar diferentes tipos de resultados
409
  if isinstance(result, Exception):
410
- # Excepci贸n inesperada
411
  error_msg = f"Unexpected error: {str(result)}"
412
  logger.error(f"Error downloading {doi}: {error_msg}")
413
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
@@ -419,24 +415,24 @@ class PaperDownloader:
419
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
420
 
421
  else:
422
- # Descarga exitosa
423
  filepath = result[0]
424
 
425
- # Generar nombre de archivo 煤nico
426
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
427
  filepath_unique = os.path.join(self.output_dir, filename)
428
-
429
  try:
430
  # Renombrar archivo
431
  os.rename(filepath, filepath_unique)
432
-
433
- # A帽adir a lista de archivos descargados
434
  downloaded_files.append(filepath_unique)
435
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
436
 
437
  except Exception as rename_error:
438
- logger.error(f"Error renaming file for {doi}: {rename_error}")
439
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
440
 
441
 
442
  # Crear archivo ZIP si hay archivos descargados
@@ -452,8 +448,7 @@ class PaperDownloader:
452
  )
453
  logger.info(f"ZIP file created: {zip_filename}")
454
 
455
- return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), "" # returning only results here and not in lambda
456
-
457
 
458
  def process_bibtex(self, bib_file, cancel_event):# removed async here
459
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
@@ -480,16 +475,16 @@ class PaperDownloader:
480
  downloaded_files = []
481
  failed_dois = []
482
  downloaded_links = []
483
-
484
  for doi in dois:
485
- result = self._download_single_doi(doi, cancel_event) # removed lambda call from executor
486
-
487
- if cancel_event.is_set():
488
- logger.info("Download Cancelled in bibtex mode")
489
- return None, "Download Cancelled", "Download Cancelled" ,"" #cancel if requested
490
-
491
- if result is None: #skips if it contains null values as a results.
492
- continue
493
 
494
  if isinstance(result, Exception):
495
  # Excepci贸n inesperada
@@ -499,29 +494,30 @@ class PaperDownloader:
499
 
500
  elif result[0] is None:
501
  # Descarga fallida (resultado de download_single_doi_async)
502
- error_msg = result[1]
503
- logger.warning(f"Failed to download {doi}: {error_msg}")
504
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
505
-
506
  else:
507
- # Descarga exitosa
508
- filepath = result[0]
509
 
510
- # Unique filename for zip
511
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
512
- filepath_unique = os.path.join(self.output_dir, filename)
513
- os.rename(filepath, filepath_unique)
514
- downloaded_files.append(filepath_unique)
515
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
516
-
 
517
  if downloaded_files:
518
  zip_filename = 'papers.zip'
519
  loop = asyncio.get_running_loop()
520
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
521
  logger.info(f"ZIP file created: {zip_filename}")
522
 
523
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),"" #send all results
524
-
525
 
526
  def create_zip(self, zip_filename, files):
527
  """Crea un archivo zip con los pdfs descargados"""
@@ -544,45 +540,48 @@ def create_gradio_interface():
544
 
545
 
546
  def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
547
- cancel_event = asyncio.Event() # Create cancellation event for every submission.
548
- downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
549
-
550
-
551
- if bib_file:
552
- # Check file type
553
- if not bib_file.name.lower().endswith('.bib'):
554
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None
 
 
 
 
 
 
 
 
 
 
 
555
 
556
- downloader.download_task = downloader.executor.submit(
557
- downloader.process_bibtex,
558
- bib_file,
559
- cancel_event # Added cancelllation event.
560
- )
561
- zip_file, downloaded_dois, failed_dois, logs_text = downloader.download_task.result() # gets results from method
562
- return zip_file, downloaded_dois, failed_dois, logs_text, None # we use this method because all outputs values were already done inside the callback
 
563
 
564
-
565
- elif doi_input:
566
- downloader.download_task = downloader.executor.submit( #changed async execution method
567
- downloader._download_single_doi,
568
- doi_input,
569
- cancel_event
570
- )
571
- filepath, message, error= downloader.download_task.result()
572
-
573
- return None, message, error, "", filepath
574
-
575
- elif dois_input:
576
- downloader.download_task = downloader.executor.submit( #changed async execution method
577
  downloader.download_multiple_dois,
578
  dois_input,
579
- cancel_event
580
- )
581
- zip_file, downloaded_dois, failed_dois, logs_text= downloader.download_task.result()
582
- return zip_file, downloaded_dois, failed_dois, logs_text, None #we use the direct assignments with returns as before on multi.
 
 
 
 
583
 
584
- else:
585
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None # all parameters must have return data, to prevent gradio to crash on incomplete data for block
586
 
587
  with gr.Blocks(theme="Hev832/Applio", css="""
588
  .gradio-container {
@@ -641,17 +640,16 @@ def create_gradio_interface():
641
  stop_button.click(lambda: downloader.cancel_download(), outputs=None) # added function in object downloader
642
 
643
  submit_button.click(
644
- download_papers,
645
- inputs=[bib_file, doi_input, dois_input],
646
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
647
-
648
- )
649
-
650
  interface.title="馃敩 Academic Paper Batch Downloader"
651
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
652
-
653
  return interface
654
-
655
 
656
  def main():
657
  interface = create_gradio_interface()
 
334
  def _download_single_doi(self, doi, cancel_event): # removed async keyword
335
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
336
  if not doi:
 
337
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
338
  logger.info(f"Starting download process for DOI: {doi}")
339
 
 
362
  logger.warning(f"No se pudo descargar: {doi}")
363
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
364
 
365
+ return asyncio.run(call_async()) #added the loop event here
366
+
367
  except CancelledError:
368
  logger.info(f"Download Cancelled DOI: {doi}")
369
+ return None, f"Download cancelled {doi}","Download Cancelled"
370
 
371
  except Exception as e:
372
  logger.error(f"Error processing {doi}: {e}")
373
+ return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
374
 
375
 
376
  def download_multiple_dois(self, dois_text, cancel_event): #removed async here
377
  """Download multiple DOIs"""
378
  # Validar entrada
379
  if not dois_text:
380
+ return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
 
381
 
382
  # Sanitizar y filtrar DOIs
383
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
 
385
 
386
  # Validar lista de DOIs
387
  if not dois:
388
+ return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
 
389
 
390
  # Listas para rastrear resultados
391
  downloaded_files = [] # Rutas de archivos descargados
392
  failed_dois = [] # DOIs que no se pudieron descargar
393
  downloaded_links = [] # Links de DOIs descargados
394
+
395
  for doi in dois:
396
+ result = self._download_single_doi(doi,cancel_event) # now single methods directly
 
397
 
398
  if cancel_event.is_set():
 
399
  logger.info("Downloads cancelled on multiple dois download")
400
+ return None,"Downloads cancelled","Downloads cancelled", ""# early return on cancelled
401
+
402
+ if result is None:
403
+ continue
404
+
405
  if isinstance(result, Exception):
406
+ # Excepci贸n inesperada
407
  error_msg = f"Unexpected error: {str(result)}"
408
  logger.error(f"Error downloading {doi}: {error_msg}")
409
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
 
415
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
416
 
417
  else:
418
+ # Descarga exitosa
419
  filepath = result[0]
420
 
421
+ # Generar nombre de archivo 煤nico
422
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
423
  filepath_unique = os.path.join(self.output_dir, filename)
424
+
425
  try:
426
  # Renombrar archivo
427
  os.rename(filepath, filepath_unique)
428
+
429
+ # A帽adir a lista de archivos descargados
430
  downloaded_files.append(filepath_unique)
431
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
432
 
433
  except Exception as rename_error:
434
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
435
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
436
 
437
 
438
  # Crear archivo ZIP si hay archivos descargados
 
448
  )
449
  logger.info(f"ZIP file created: {zip_filename}")
450
 
451
+ return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
 
452
 
453
  def process_bibtex(self, bib_file, cancel_event):# removed async here
454
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
 
475
  downloaded_files = []
476
  failed_dois = []
477
  downloaded_links = []
478
+
479
  for doi in dois:
480
+ result= self._download_single_doi(doi,cancel_event) # calls function
481
+
482
+ if cancel_event.is_set():
483
+ logger.info("Download Cancelled in bibtex mode")
484
+ return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
485
+
486
+ if result is None: #skips nulls to perform continue on a multi processing with threads.
487
+ continue;
488
 
489
  if isinstance(result, Exception):
490
  # Excepci贸n inesperada
 
494
 
495
  elif result[0] is None:
496
  # Descarga fallida (resultado de download_single_doi_async)
497
+ error_msg = result[1]
498
+ logger.warning(f"Failed to download {doi}: {error_msg}")
499
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
500
+
501
  else:
502
+ # Descarga exitosa
503
+ filepath = result[0]
504
 
505
+ # Unique filename for zip
506
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
507
+ filepath_unique = os.path.join(self.output_dir, filename)
508
+ os.rename(filepath, filepath_unique)
509
+ downloaded_files.append(filepath_unique)
510
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
511
+
512
+
513
  if downloaded_files:
514
  zip_filename = 'papers.zip'
515
  loop = asyncio.get_running_loop()
516
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
517
  logger.info(f"ZIP file created: {zip_filename}")
518
 
519
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""# all callbacks done, sending empty logs so no value breaks on UI.
520
+
521
 
522
  def create_zip(self, zip_filename, files):
523
  """Crea un archivo zip con los pdfs descargados"""
 
540
 
541
 
542
  def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
543
+
544
+
545
+ cancel_event = asyncio.Event() # Create cancellation event for every submission.
546
+ downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
547
+ if bib_file:
548
+ # Check file type
549
+ if not bib_file.name.lower().endswith('.bib'):
550
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
551
+
552
+ downloader.download_task = downloader.executor.submit(
553
+ downloader.process_bibtex,
554
+ bib_file,
555
+ cancel_event # Added cancelllation event.
556
+ )
557
+ zip_file, downloaded_dois, failed_dois, logs_text= downloader.download_task.result() #gets the values returned.
558
+
559
+ return zip_file, downloaded_dois, failed_dois, logs_text, None # simple return
560
+
561
+ elif doi_input:
562
 
563
+ downloader.download_task = downloader.executor.submit( #changed async execution method
564
+ downloader._download_single_doi,
565
+ doi_input,
566
+ cancel_event
567
+ )
568
+
569
+ filepath, message, error = downloader.download_task.result()#gets results, nothing more
570
+ return None, message, error, "", filepath
571
 
572
+ elif dois_input:
573
+ downloader.download_task = downloader.executor.submit( #changed async execution method
 
 
 
 
 
 
 
 
 
 
 
574
  downloader.download_multiple_dois,
575
  dois_input,
576
+ cancel_event #Add cancellation event.
577
+ )
578
+ zip_file, downloaded_dois, failed_dois, logs_text= downloader.download_task.result()
579
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
580
+
581
+ else:
582
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None#all params provided
583
+
584
 
 
 
585
 
586
  with gr.Blocks(theme="Hev832/Applio", css="""
587
  .gradio-container {
 
640
  stop_button.click(lambda: downloader.cancel_download(), outputs=None) # added function in object downloader
641
 
642
  submit_button.click(
643
+ download_papers,
644
+ inputs=[bib_file, doi_input, dois_input],
645
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
646
+ )
647
+
648
+
649
  interface.title="馃敩 Academic Paper Batch Downloader"
650
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
651
+
652
  return interface
 
653
 
654
  def main():
655
  interface = create_gradio_interface()