C2MV commited on
Commit
db19829
verified
1 Parent(s): 0ae971f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -113
app.py CHANGED
@@ -68,7 +68,6 @@ class PaperDownloader:
68
  retry_count = 0
69
 
70
  while redirect_count <= max_redirects:
71
-
72
  try:
73
  while retry_count <= max_retries:
74
  try:
@@ -331,54 +330,47 @@ class PaperDownloader:
331
  # Si se agotan todos los reintentos
332
  return None
333
 
334
- def _download_single_doi(self, doi, cancel_event): # removed async keyword
335
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
336
  if not doi:
337
-
338
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
339
  logger.info(f"Starting download process for DOI: {doi}")
340
 
341
  try:
342
 
343
- async def call_async():# Added this in order to execute correctly on executor
344
-
345
- pdf_content = await self.download_with_retry_async(doi)
346
- if pdf_content:
347
  logger.info(f"Downloaded PDF for DOI: {doi}")
348
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
349
  filepath = os.path.join(self.output_dir, filename)
350
 
351
  # Escribir contenido del PDF
352
 
353
- open(filepath, 'wb').write(pdf_content)
 
354
 
355
  logger.info(f"Saved PDF to file: {filepath}")
356
 
357
  logger.info(f"Descarga exitosa: {filename}")
358
 
359
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
360
-
361
-
362
  else:
363
  logger.warning(f"No se pudo descargar: {doi}")
364
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
365
-
366
- return asyncio.run(call_async()) #added the loop event here
367
-
368
  except CancelledError:
369
- logger.info(f"Download Cancelled DOI: {doi}")
370
- return None, f"Download cancelled {doi}","Download Cancelled"
371
 
372
  except Exception as e:
373
- logger.error(f"Error processing {doi}: {e}")
374
- return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
375
 
376
 
377
- def download_multiple_dois(self, dois_text, cancel_event): #removed async here
378
  """Download multiple DOIs"""
379
- # Validar entrada
380
  if not dois_text:
381
- return None, "Error: No DOIs provided", "Error: No DOIs provided",""
382
 
383
  # Sanitizar y filtrar DOIs
384
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
@@ -394,48 +386,47 @@ class PaperDownloader:
394
  downloaded_links = [] # Links de DOIs descargados
395
 
396
  for doi in dois:
397
- result = self._download_single_doi(doi,cancel_event) # all downloads return their info
398
-
399
- if cancel_event.is_set():
400
- logger.info("Downloads cancelled on multiple dois download")
401
- return None,"Downloads cancelled","Downloads cancelled", ""# early return on cancelled
402
 
403
- if result is None:
404
- continue
 
405
 
406
- if isinstance(result, Exception):
407
- # Excepci贸n inesperada
408
- error_msg = f"Unexpected error: {str(result)}"
409
- logger.error(f"Error downloading {doi}: {error_msg}")
410
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
411
-
412
- elif result[0] is None:
413
- # Descarga fallida (resultado de download_single_doi_async)
414
- error_msg = result[1]
415
- logger.warning(f"Failed to download {doi}: {error_msg}")
416
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
417
 
418
- else:
419
- # Descarga exitosa
420
- filepath = result[0]
 
 
 
 
 
 
421
 
422
- # Generar nombre de archivo 煤nico
423
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
424
- filepath_unique = os.path.join(self.output_dir, filename)
425
-
426
- try:
427
- # Renombrar archivo
428
- os.rename(filepath, filepath_unique)
429
-
430
- # A帽adir a lista de archivos descargados
431
- downloaded_files.append(filepath_unique)
432
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
433
 
434
- except Exception as rename_error:
435
- logger.error(f"Error renaming file for {doi}: {rename_error}")
436
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
437
-
438
-
439
  # Crear archivo ZIP si hay archivos descargados
440
  zip_filename = None
441
  if downloaded_files:
@@ -448,11 +439,10 @@ class PaperDownloader:
448
  lambda: self.create_zip(zip_filename, downloaded_files)
449
  )
450
  logger.info(f"ZIP file created: {zip_filename}")
451
-
452
-
453
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
454
 
455
- def process_bibtex(self, bib_file, cancel_event):# removed async here
456
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
457
  # Read BibTeX file content from the uploaded object
458
  try:
@@ -477,46 +467,47 @@ class PaperDownloader:
477
  downloaded_files = []
478
  failed_dois = []
479
  downloaded_links = []
 
480
  for doi in dois:
481
- result = self._download_single_doi(doi, cancel_event)
482
-
483
- if cancel_event.is_set():
484
  logger.info("Download Cancelled in bibtex mode")
485
- return None, "Download Cancelled", "Download Cancelled" , ""
486
-
487
- if result is None:
488
  continue;
489
-
490
- if isinstance(result, Exception):
491
  # Excepci贸n inesperada
492
  error_msg = f"Unexpected error: {str(result)}"
493
  logger.error(f"Error downloading {doi}: {error_msg}")
494
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
495
 
496
- elif result[0] is None:
497
  # Descarga fallida (resultado de download_single_doi_async)
498
  error_msg = result[1]
499
  logger.warning(f"Failed to download {doi}: {error_msg}")
500
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
501
-
502
- else:
503
- # Descarga exitosa
504
  filepath = result[0]
505
 
506
- # Unique filename for zip
507
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
508
  filepath_unique = os.path.join(self.output_dir, filename)
509
  os.rename(filepath, filepath_unique)
510
  downloaded_files.append(filepath_unique)
511
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
512
-
513
  if downloaded_files:
514
  zip_filename = 'papers.zip'
515
  loop = asyncio.get_running_loop()
516
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
517
  logger.info(f"ZIP file created: {zip_filename}")
518
-
519
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),""
520
 
521
 
522
  def create_zip(self, zip_filename, files):
@@ -534,53 +525,35 @@ class PaperDownloader:
534
  def create_gradio_interface():
535
  """Create Gradio interface for Paper Downloader"""
536
  downloader = PaperDownloader()
537
-
538
-
539
  def update_progress( message="", logs=""):
540
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
541
 
542
 
543
- def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
544
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
545
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
 
546
  if bib_file:
547
- # Check file type
548
- if not bib_file.name.lower().endswith('.bib'):
549
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
550
-
551
- downloader.download_task = downloader.executor.submit(
552
- downloader.process_bibtex,
553
- bib_file,
554
- cancel_event
555
- )
556
-
557
- zip_file, downloaded_dois, failed_dois, logs_text = downloader.download_task.result()
558
-
559
- return zip_file, downloaded_dois, failed_dois, logs_text, None #just direct return now no callbaccks or specific UI methods on callback functions
560
 
561
  elif doi_input:
 
 
562
 
563
- downloader.download_task = downloader.executor.submit( #changed async execution method
564
- downloader._download_single_doi,
565
- doi_input,
566
- cancel_event
567
- )
568
-
569
- filepath, message, error = downloader.download_task.result()
570
 
571
- return None, message, error, "", filepath #just direct return now no callbaccks or specific UI methods on callback functions
572
 
573
- elif dois_input:
574
- downloader.download_task = downloader.executor.submit( #changed async execution method
575
- downloader.download_multiple_dois,
576
- dois_input,
577
- cancel_event
578
- )
579
-
580
- zip_file, downloaded_dois, failed_dois, logs_text = downloader.download_task.result()
581
- return zip_file, downloaded_dois, failed_dois, logs_text, None #just direct return now no callbaccks or specific UI methods on callback functions
582
  else:
583
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None # must also have five values to satisfy gradio block method
584
 
585
 
586
  with gr.Blocks(theme="Hev832/Applio", css="""
@@ -642,12 +615,14 @@ def create_gradio_interface():
642
  submit_button.click(
643
  download_papers,
644
  inputs=[bib_file, doi_input, dois_input],
645
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
 
646
  )
647
-
 
648
  interface.title="馃敩 Academic Paper Batch Downloader"
649
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
650
-
651
  return interface
652
 
653
  def main():
 
68
  retry_count = 0
69
 
70
  while redirect_count <= max_redirects:
 
71
  try:
72
  while retry_count <= max_retries:
73
  try:
 
330
  # Si se agotan todos los reintentos
331
  return None
332
 
333
+ async def _download_single_doi(self, doi, cancel_event): # removed async keyword
334
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
335
  if not doi:
 
336
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
337
  logger.info(f"Starting download process for DOI: {doi}")
338
 
339
  try:
340
 
341
+ pdf_content = await self.download_with_retry_async(doi)
342
+ if pdf_content:
 
 
343
  logger.info(f"Downloaded PDF for DOI: {doi}")
344
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
345
  filepath = os.path.join(self.output_dir, filename)
346
 
347
  # Escribir contenido del PDF
348
 
349
+ with open(filepath, 'wb') as f: # added context for handling resource and also made sync the file method
350
+ f.write(pdf_content)
351
 
352
  logger.info(f"Saved PDF to file: {filepath}")
353
 
354
  logger.info(f"Descarga exitosa: {filename}")
355
 
356
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
 
 
357
  else:
358
  logger.warning(f"No se pudo descargar: {doi}")
359
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
360
+
 
 
361
  except CancelledError:
362
+ logger.info(f"Download Cancelled DOI: {doi}")
363
+ return None, f"Download cancelled {doi}","Download Cancelled"
364
 
365
  except Exception as e:
366
+ logger.error(f"Error processing {doi}: {e}")
367
+ return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
368
 
369
 
370
+ async def download_multiple_dois(self, dois_text, cancel_event):
371
  """Download multiple DOIs"""
 
372
  if not dois_text:
373
+ return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
374
 
375
  # Sanitizar y filtrar DOIs
376
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
 
386
  downloaded_links = [] # Links de DOIs descargados
387
 
388
  for doi in dois:
389
+ result = await self._download_single_doi(doi,cancel_event)
 
 
 
 
390
 
391
+ if cancel_event.is_set():
392
+ logger.info("Downloads cancelled on multiple dois download")
393
+ return None,"Downloads cancelled","Downloads cancelled", ""# early return on cancelled
394
 
395
+ if result is None:
396
+ continue
397
+
398
+ if isinstance(result, Exception):
399
+ # Excepci贸n inesperada
400
+ error_msg = f"Unexpected error: {str(result)}"
401
+ logger.error(f"Error downloading {doi}: {error_msg}")
402
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
 
 
 
403
 
404
+ elif result[0] is None:
405
+ # Descarga fallida (resultado de download_single_doi_async)
406
+ error_msg = result[1]
407
+ logger.warning(f"Failed to download {doi}: {error_msg}")
408
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
409
+
410
+ else:
411
+ # Descarga exitosa
412
+ filepath = result[0]
413
 
414
+ # Generar nombre de archivo 煤nico
415
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
416
+ filepath_unique = os.path.join(self.output_dir, filename)
417
+
418
+ try:
419
+ # Renombrar archivo
420
+ os.rename(filepath, filepath_unique)
421
+
422
+ # A帽adir a lista de archivos descargados
423
+ downloaded_files.append(filepath_unique)
424
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
425
 
426
+ except Exception as rename_error:
427
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
428
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
429
+
 
430
  # Crear archivo ZIP si hay archivos descargados
431
  zip_filename = None
432
  if downloaded_files:
 
439
  lambda: self.create_zip(zip_filename, downloaded_files)
440
  )
441
  logger.info(f"ZIP file created: {zip_filename}")
442
+
 
443
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
444
 
445
+ async def process_bibtex(self, bib_file, cancel_event):# removed async here
446
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
447
  # Read BibTeX file content from the uploaded object
448
  try:
 
467
  downloaded_files = []
468
  failed_dois = []
469
  downloaded_links = []
470
+
471
  for doi in dois:
472
+ result = await self._download_single_doi(doi,cancel_event)
473
+
474
+ if cancel_event.is_set():
475
  logger.info("Download Cancelled in bibtex mode")
476
+ return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
477
+
478
+ if result is None:
479
  continue;
480
+
481
+ if isinstance(result, Exception):
482
  # Excepci贸n inesperada
483
  error_msg = f"Unexpected error: {str(result)}"
484
  logger.error(f"Error downloading {doi}: {error_msg}")
485
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
486
 
487
+ elif result[0] is None:
488
  # Descarga fallida (resultado de download_single_doi_async)
489
  error_msg = result[1]
490
  logger.warning(f"Failed to download {doi}: {error_msg}")
491
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
492
+
493
+ else:
494
+ # Descarga exitosa
495
  filepath = result[0]
496
 
497
+ # Unique filename for zip
498
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
499
  filepath_unique = os.path.join(self.output_dir, filename)
500
  os.rename(filepath, filepath_unique)
501
  downloaded_files.append(filepath_unique)
502
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
503
+
504
  if downloaded_files:
505
  zip_filename = 'papers.zip'
506
  loop = asyncio.get_running_loop()
507
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
508
  logger.info(f"ZIP file created: {zip_filename}")
509
+
510
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
511
 
512
 
513
  def create_zip(self, zip_filename, files):
 
525
  def create_gradio_interface():
526
  """Create Gradio interface for Paper Downloader"""
527
  downloader = PaperDownloader()
528
+
 
529
  def update_progress( message="", logs=""):
530
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
531
 
532
 
533
+ async def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
534
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
535
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
536
+
537
  if bib_file:
538
+ # Check file type
539
+ if not bib_file.name.lower().endswith('.bib'):
540
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file","", None
541
+
542
+ zip_file, downloaded_dois, failed_dois, logs_text = await downloader.process_bibtex(bib_file, cancel_event)
543
+ return zip_file, downloaded_dois, failed_dois, logs_text, None# we use variables to keep consistent return with correct info.
 
 
 
 
 
 
 
544
 
545
  elif doi_input:
546
+ filepath, message, error= await downloader._download_single_doi(doi_input,cancel_event)
547
+ return None, message, error,"",filepath# correct return
548
 
549
+ elif dois_input:
 
 
 
 
 
 
550
 
551
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input,cancel_event)
552
 
553
+ return zip_file, downloaded_dois, failed_dois, logs_text, None # correct return
554
+
 
 
 
 
 
 
 
555
  else:
556
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs","", None # direct return values, no callbacks
557
 
558
 
559
  with gr.Blocks(theme="Hev832/Applio", css="""
 
615
  submit_button.click(
616
  download_papers,
617
  inputs=[bib_file, doi_input, dois_input],
618
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
619
+
620
  )
621
+
622
+
623
  interface.title="馃敩 Academic Paper Batch Downloader"
624
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
625
+
626
  return interface
627
 
628
  def main():