C2MV commited on
Commit
434b119
verified
1 Parent(s): efd181d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -66
app.py CHANGED
@@ -356,18 +356,19 @@ class PaperDownloader:
356
 
357
  logger.info(f"Descarga exitosa: {filename}")
358
 
359
- progress_callback(filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", "", None ) # 4 args with filepath at begin
360
-
361
 
362
  else:
363
  logger.warning(f"No se pudo descargar: {doi}")
364
- progress_callback(None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>', None)
365
 
366
- asyncio.run(call_async()) #added the loop event here
 
367
 
368
  except CancelledError:
369
  logger.info(f"Download Cancelled DOI: {doi}")
370
- progress_callback(None, f"Download cancelled {doi}","Download Cancelled", None) #send proper types with null values
371
 
372
  except Exception as e:
373
  logger.error(f"Error processing {doi}: {e}")
@@ -378,7 +379,7 @@ class PaperDownloader:
378
  """Download multiple DOIs"""
379
  # Validar entrada
380
  if not dois_text:
381
- progress_callback(None, "Error: No DOIs provided", "Error: No DOIs provided","", None)
382
  return None
383
 
384
  # Sanitizar y filtrar DOIs
@@ -387,7 +388,7 @@ class PaperDownloader:
387
 
388
  # Validar lista de DOIs
389
  if not dois:
390
- progress_callback(None, "Error: No valid DOIs provided", "Error: No valid DOIs provided","", None)
391
  return None
392
 
393
  # Listas para rastrear resultados
@@ -397,47 +398,52 @@ class PaperDownloader:
397
 
398
 
399
  for doi in dois:
400
- self._download_single_doi(doi, lambda a,b,c,d: progress_callback(a,b,c, d) , cancel_event )
401
- if cancel_event.is_set():
402
- logger.info("Downloads cancelled on multiple dois download")
403
- progress_callback(None, "Downloads cancelled","Downloads cancelled","", None) # early return on cancelled
404
- return None #break here when is cancelled
405
 
406
- result = self.results_dict.get(doi, (None,None,"", None)) # obtain from self.results
407
-
 
 
 
408
  # Manejar diferentes tipos de resultados
409
- if isinstance(result, Exception):
410
  # Excepci贸n inesperada
411
  error_msg = f"Unexpected error: {str(result)}"
412
  logger.error(f"Error downloading {doi}: {error_msg}")
413
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
414
-
415
- elif result[0] is None:
416
- # Descarga fallida (resultado de download_single_doi_async)
417
  error_msg = result[1]
418
  logger.warning(f"Failed to download {doi}: {error_msg}")
419
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
420
-
421
- else:
422
- # Descarga exitosa
423
- filepath = result[0]
424
 
425
- # Generar nombre de archivo 煤nico
426
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
427
- filepath_unique = os.path.join(self.output_dir, filename)
428
 
429
- try:
430
- # Renombrar archivo
431
- os.rename(filepath, filepath_unique)
432
 
433
- # A帽adir a lista de archivos descargados
434
- downloaded_files.append(filepath_unique)
435
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
436
 
437
- except Exception as rename_error:
438
  logger.error(f"Error renaming file for {doi}: {rename_error}")
439
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
440
 
 
 
441
  # Crear archivo ZIP si hay archivos descargados
442
  zip_filename = None
443
  if downloaded_files:
@@ -453,7 +459,7 @@ class PaperDownloader:
453
 
454
 
455
  # Devolver resultados
456
- progress_callback( zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), "" , None ) # Send null for zip result to fill params
457
  return
458
 
459
  def process_bibtex(self, bib_file, progress_callback, cancel_event):# removed async here
@@ -483,17 +489,19 @@ class PaperDownloader:
483
  downloaded_files = []
484
  failed_dois = []
485
  downloaded_links = []
486
-
487
  for doi in dois:
488
 
489
- self._download_single_doi(doi, lambda a,b,c, d: progress_callback(a,b,c, d), cancel_event )
490
  if cancel_event.is_set():
491
  logger.info("Download Cancelled in bibtex mode")
492
  progress_callback(None, "Download Cancelled", "Download Cancelled", None)
493
  return None #cancel if requested
494
 
495
- result = self.results_dict.get(doi, (None,None,"", None)) # obtain from self.results
496
 
 
 
 
497
  if isinstance(result, Exception):
498
  # Excepci贸n inesperada
499
  error_msg = f"Unexpected error: {str(result)}"
@@ -524,8 +532,7 @@ class PaperDownloader:
524
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
525
  logger.info(f"ZIP file created: {zip_filename}")
526
 
527
- progress_callback(zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), "", None )#added a "", None in order to work correctly on Blocks interface output
528
-
529
  return
530
 
531
  def create_zip(self, zip_filename, files):
@@ -547,10 +554,10 @@ def create_gradio_interface():
547
  downloader.results_dict = {} #shared results dict, since it runs on different threads
548
 
549
  def update_progress( message="", logs=""):
550
- return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
551
 
552
 
553
- def download_papers(bib_file, doi_input, dois_input):
554
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
555
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
556
 
@@ -560,49 +567,49 @@ def create_gradio_interface():
560
 
561
  #store data for single or multiple mode on download_papers execution.
562
  if doi:
563
- downloader.results_dict[doi] = (filepath, message,fail_message, "")
564
-
565
-
566
- updates = update_progress(message, fail_message) #send update info.
567
 
568
- return updates
 
569
 
570
  if bib_file:
571
  # Check file type
572
  if not bib_file.name.lower().endswith('.bib'):
573
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None #add empty string to satisfy 5 out
 
574
 
575
  downloader.download_task = downloader.executor.submit(
576
  downloader.process_bibtex,
577
- bib_file,
578
- lambda a,b,c, d: update_progress(a,f"{b}<br>{c}"), #convert for ui output, the return data will contain the HTML, send 4
579
  cancel_event # Added cancelllation event.
580
  )
 
 
581
 
582
- return None,"","", "", None # returns five
583
 
584
  elif doi_input:
585
  downloader.download_task = downloader.executor.submit( #changed async execution method
586
- downloader._download_single_doi,
587
- doi_input,
588
- lambda a,b,c,d: update_progress(a,f"{b}<br>{c}") , #callback function, format output and send html info, removed lambda from executor calls
589
- cancel_event # Add cancellation event.
590
- )
591
 
592
- return None,"","", "", None # returns five
593
-
594
  elif dois_input:
595
- downloader.download_task = downloader.executor.submit( #changed async execution method
596
- downloader.download_multiple_dois,
597
- dois_input,
598
- lambda a,b,c, d: update_progress(a,f"{b}<br>{c}"), #callback function
599
- cancel_event #Add cancellation event.
600
- )
601
-
602
- return None, "","", "", None #returns five
603
 
604
  else:
605
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None#returns five
606
 
607
 
608
  with gr.Blocks(theme="Hev832/Applio", css="""
@@ -664,7 +671,8 @@ def create_gradio_interface():
664
  submit_button.click(
665
  download_papers,
666
  inputs=[bib_file, doi_input, dois_input],
667
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
 
668
  )
669
 
670
  interface.title="馃敩 Academic Paper Batch Downloader"
 
356
 
357
  logger.info(f"Descarga exitosa: {filename}")
358
 
359
+ return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
360
+
361
 
362
  else:
363
  logger.warning(f"No se pudo descargar: {doi}")
364
+ return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
365
 
366
+ filepath, message, error = asyncio.run(call_async()) #added the loop event here
367
+ progress_callback(filepath, message, error, None ) # call this once the callback is made
368
 
369
  except CancelledError:
370
  logger.info(f"Download Cancelled DOI: {doi}")
371
+ progress_callback(None, f"Download cancelled {doi}","Download Cancelled", None ) #send proper types with null values
372
 
373
  except Exception as e:
374
  logger.error(f"Error processing {doi}: {e}")
 
379
  """Download multiple DOIs"""
380
  # Validar entrada
381
  if not dois_text:
382
+ progress_callback(None, "Error: No DOIs provided", "Error: No DOIs provided","" , None)
383
  return None
384
 
385
  # Sanitizar y filtrar DOIs
 
388
 
389
  # Validar lista de DOIs
390
  if not dois:
391
+ progress_callback(None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", "" , None)
392
  return None
393
 
394
  # Listas para rastrear resultados
 
398
 
399
 
400
  for doi in dois:
401
+ filepath, message, error= self._download_single_doi(doi, lambda a,b,c,d : progress_callback(a,b,c,d), cancel_event )
402
+ if cancel_event.is_set():
403
+ logger.info("Downloads cancelled on multiple dois download")
404
+ progress_callback(None, "Downloads cancelled","Downloads cancelled", None) # early return on cancelled
405
+ return None #break here when is cancelled
406
 
407
+ result = self.results_dict.get(doi, (None,None,"", None)) # obtain from self.results
408
+
409
+ if result is None: #when errors happen results are none
410
+ continue;
411
+
412
  # Manejar diferentes tipos de resultados
413
+ if isinstance(result, Exception):
414
  # Excepci贸n inesperada
415
  error_msg = f"Unexpected error: {str(result)}"
416
  logger.error(f"Error downloading {doi}: {error_msg}")
417
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
418
+
419
+ elif result[0] is None:
420
+ # Descarga fallida (resultado de download_single_doi_async)
421
  error_msg = result[1]
422
  logger.warning(f"Failed to download {doi}: {error_msg}")
423
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
424
+
425
+ else:
426
+ # Descarga exitosa
427
+ filepath = result[0]
428
 
429
+ # Generar nombre de archivo 煤nico
430
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
431
+ filepath_unique = os.path.join(self.output_dir, filename)
432
 
433
+ try:
434
+ # Renombrar archivo
435
+ os.rename(filepath, filepath_unique)
436
 
437
+ # A帽adir a lista de archivos descargados
438
+ downloaded_files.append(filepath_unique)
439
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
440
 
441
+ except Exception as rename_error:
442
  logger.error(f"Error renaming file for {doi}: {rename_error}")
443
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
444
 
445
+
446
+
447
  # Crear archivo ZIP si hay archivos descargados
448
  zip_filename = None
449
  if downloaded_files:
 
459
 
460
 
461
  # Devolver resultados
462
+ progress_callback( zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois),"" , None) # send empty values on callback to not break it.
463
  return
464
 
465
  def process_bibtex(self, bib_file, progress_callback, cancel_event):# removed async here
 
489
  downloaded_files = []
490
  failed_dois = []
491
  downloaded_links = []
 
492
  for doi in dois:
493
 
494
+ filepath, message, error= self._download_single_doi(doi, lambda a,b,c,d: progress_callback(a,b,c,d), cancel_event ) # added lambda for params handling.
495
  if cancel_event.is_set():
496
  logger.info("Download Cancelled in bibtex mode")
497
  progress_callback(None, "Download Cancelled", "Download Cancelled", None)
498
  return None #cancel if requested
499
 
500
+ result = self.results_dict.get(doi, (None,None,"",None)) # obtain from self.results
501
 
502
+ if result is None:
503
+ continue # skips for a None type results when callback fails
504
+
505
  if isinstance(result, Exception):
506
  # Excepci贸n inesperada
507
  error_msg = f"Unexpected error: {str(result)}"
 
532
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
533
  logger.info(f"ZIP file created: {zip_filename}")
534
 
535
+ progress_callback(zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),"") # send "", None to conform output
 
536
  return
537
 
538
  def create_zip(self, zip_filename, files):
 
554
  downloader.results_dict = {} #shared results dict, since it runs on different threads
555
 
556
  def update_progress( message="", logs=""):
557
+ return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
558
 
559
 
560
+ def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
561
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
562
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
563
 
 
567
 
568
  #store data for single or multiple mode on download_papers execution.
569
  if doi:
570
+ downloader.results_dict[doi] = (filepath, message,fail_message, "")
 
 
 
571
 
572
+
573
+ return update_progress(message, fail_message) # send return values only with results
574
 
575
  if bib_file:
576
  # Check file type
577
  if not bib_file.name.lower().endswith('.bib'):
578
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None # must return all 5 results at each possibility
579
+
580
 
581
  downloader.download_task = downloader.executor.submit(
582
  downloader.process_bibtex,
583
+ bib_file,
584
+ lambda a,b,c, d: update_progress(a,f"{b}<br>{c}"), #convert for ui output, the return data will contain the HTML
585
  cancel_event # Added cancelllation event.
586
  )
587
+
588
+ return None, "","", "", None #must be None ,str , str, str, None ( five params)
589
 
 
590
 
591
  elif doi_input:
592
  downloader.download_task = downloader.executor.submit( #changed async execution method
593
+ downloader._download_single_doi,
594
+ doi_input,
595
+ lambda a,b,c,d: update_progress(a,f"{b}<br>{c}"), #callback function, format output and send html info, removed lambda from executor calls
596
+ cancel_event # Add cancellation event.
597
+ )
598
 
599
+ return None, "","", "", None #must be None ,str , str, str, None ( five params)
600
+
601
  elif dois_input:
602
+ downloader.download_task = downloader.executor.submit( #changed async execution method
603
+ downloader.download_multiple_dois,
604
+ dois_input,
605
+ lambda a,b,c,d: update_progress(a,f"{b}<br>{c}"), #callback function, return simple values
606
+ cancel_event #Add cancellation event.
607
+ )
608
+
609
+ return None, "","", "", None #must be None ,str , str, str, None ( five params)
610
 
611
  else:
612
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None #must be None ,str , str, str, None ( five params)
613
 
614
 
615
  with gr.Blocks(theme="Hev832/Applio", css="""
 
671
  submit_button.click(
672
  download_papers,
673
  inputs=[bib_file, doi_input, dois_input],
674
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
675
+
676
  )
677
 
678
  interface.title="馃敩 Academic Paper Batch Downloader"