C2MV commited on
Commit
f732808
verified
1 Parent(s): 565c23e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -126
app.py CHANGED
@@ -307,57 +307,48 @@ class PaperDownloader:
307
  delay *= 2 # Exponential backoff
308
 
309
  return None
310
-
311
-
312
  async def _download_single_doi(self, doi):
313
- """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
314
- if not doi:
315
- return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
316
- logger.info(f"Starting download process for DOI: {doi}")
317
-
318
- try:
319
-
320
- pdf_content = await self.download_with_retry_async(doi)
321
- if pdf_content:
322
  logger.info(f"Downloaded PDF for DOI: {doi}")
323
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
324
  filepath = os.path.join(self.output_dir, filename)
325
-
326
- # Escribir contenido del PDF
327
-
328
  with open(filepath, 'wb') as f:
329
  f.write(pdf_content)
330
-
331
  logger.info(f"Saved PDF to file: {filepath}")
332
-
333
  logger.info(f"Descarga exitosa: {filename}")
334
-
335
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
336
-
337
- else:
338
- logger.warning(f"No se pudo descargar: {doi}")
339
- return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
 
 
 
 
340
 
341
- except CancelledError:
342
- logger.info(f"Download Cancelled DOI: {doi}")
343
- return None, f"Download cancelled {doi}","Download Cancelled"
344
- except Exception as e:
345
- logger.error(f"Error processing {doi}: {e}")
346
- return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
347
 
348
-
349
  async def download_multiple_dois(self, dois_text, cancel_event):
350
  """Download multiple DOIs"""
351
  if not dois_text:
352
  return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
353
-
354
  # Sanitizar y filtrar DOIs
355
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
356
  dois = list(set([doi.strip() for doi in dois_text.split('\n') if doi.strip()]))
357
 
358
  # Validar lista de DOIs
359
  if not dois:
360
- return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
361
 
362
  # Listas para rastrear resultados
363
  downloaded_files = [] # Rutas de archivos descargados
@@ -365,46 +356,44 @@ class PaperDownloader:
365
  downloaded_links = [] # Links de DOIs descargados
366
 
367
  for i, doi in enumerate(dois):
368
- result = await self._download_single_doi(doi,cancel_event)
369
-
370
  if cancel_event.is_set():
371
- logger.info("Downloads cancelled on multiple dois download")
372
- return None, "Downloads cancelled","Downloads cancelled", "" # early return on cancelled
373
-
374
  if result is None:
375
- continue;
376
-
377
  if isinstance(result, Exception):
378
  # Excepci贸n inesperada
379
- error_msg = f"Unexpected error: {str(result)}"
380
- logger.error(f"Error downloading {doi}: {error_msg}")
381
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
382
-
383
  elif result[0] is None:
384
- # Descarga fallida (resultado de download_single_doi_async)
385
  error_msg = result[1]
386
  logger.warning(f"Failed to download {doi}: {error_msg}")
387
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
388
-
389
  else:
390
- # Descarga exitosa
391
  filepath = result[0]
392
-
393
  # Generar nombre de archivo 煤nico
394
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf" # indent problem corrected
395
- filepath_unique = os.path.join(self.output_dir, filename) #Fixed identation.
396
-
397
  try:
398
- # Renombrar archivo
399
- os.rename(filepath, filepath_unique) #Fixed identation.
400
 
401
- # A帽adir a lista de archivos descargados
402
- downloaded_files.append(filepath_unique) #Fixed identation.
403
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')#Fixed identation.
404
 
405
  except Exception as rename_error:
406
- logger.error(f"Error renaming file for {doi}: {rename_error}")
407
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')#Fixed identation.
408
 
409
 
410
  # Crear archivo ZIP si hay archivos descargados
@@ -419,75 +408,78 @@ class PaperDownloader:
419
  lambda: self.create_zip(zip_filename, downloaded_files)
420
  )
421
  logger.info(f"ZIP file created: {zip_filename}")
 
 
422
 
423
- return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
424
-
425
- async def process_bibtex(self, bib_file, cancel_event):
426
- """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
427
- # Read BibTeX file content from the uploaded object
428
- try:
429
- with open(bib_file.name, 'r', encoding='utf-8') as f:
430
- bib_content = f.read()
431
- except Exception as e:
432
- logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
433
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
434
-
435
- # Parse BibTeX data
436
- try:
437
- bib_database = bibtexparser.loads(bib_content)
438
- except Exception as e:
439
- logger.error(f"Error parsing BibTeX data: {e}")
440
- return None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}",""
441
-
442
- # Extract DOIs
443
- dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
444
- logger.info(f"Found {len(dois)} DOIs to download")
445
 
446
- # Result lists
447
- downloaded_files = []
448
- failed_dois = []
449
- downloaded_links = []
450
- for i,doi in enumerate(dois):
451
- result = await self._download_single_doi(doi, cancel_event)
 
 
 
452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  if cancel_event.is_set():
454
- logger.info("Download Cancelled in bibtex mode")
455
- return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
 
456
  if result is None:
457
- continue
458
 
459
  if isinstance(result, Exception):
460
- # Excepci贸n inesperada
461
  error_msg = f"Unexpected error: {str(result)}"
462
  logger.error(f"Error downloading {doi}: {error_msg}")
463
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
464
-
465
- elif result[0] is None:
466
- # Descarga fallida (resultado de download_single_doi_async)
467
- error_msg = result[1]
468
- logger.warning(f"Failed to download {doi}: {error_msg}")
469
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
470
-
471
- else:
472
- # Descarga exitosa
473
- filepath = result[0]
474
 
475
  # Unique filename for zip
476
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf" #indent fixed
477
- filepath_unique = os.path.join(self.output_dir, filename)
478
- os.rename(filepath, filepath_unique)
479
- downloaded_files.append(filepath_unique)
480
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
 
 
 
 
 
 
 
 
481
 
482
 
483
- if downloaded_files:
484
- zip_filename = 'papers.zip'
485
- loop = asyncio.get_running_loop()
486
- loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
487
- logger.info(f"ZIP file created: {zip_filename}")
488
-
489
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
490
-
491
  def create_zip(self, zip_filename, files):
492
  """Crea un archivo zip con los pdfs descargados"""
493
  with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -504,30 +496,32 @@ def create_gradio_interface():
504
  """Create Gradio interface for Paper Downloader"""
505
  downloader = PaperDownloader()
506
 
507
-
508
  def update_progress( message="", logs=""):
509
- return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
510
 
511
 
512
  async def download_papers(bib_file, doi_input, dois_input):
513
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
514
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
 
515
  if bib_file:
516
  # Check file type
517
- if not bib_file.name.lower().endswith('.bib'):
518
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None #Fixed with default value
 
 
 
519
 
520
- zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
521
- return zip_file, downloaded_dois, failed_dois, logs_text, None
522
  elif doi_input:
523
- filepath, message, error = await downloader._download_single_doi(doi_input, cancel_event)
524
- return None, message, error,"", filepath# correct returns.
525
-
526
  elif dois_input:
527
- zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input, cancel_event)
528
- return zip_file, downloaded_dois, failed_dois, logs_text, None
529
  else:
530
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None
 
531
 
532
  with gr.Blocks(theme="Hev832/Applio", css="""
533
  .gradio-container {
@@ -569,10 +563,10 @@ def create_gradio_interface():
569
 
570
 
571
  output_file = gr.File(label="Download Papers (ZIP) or Single PDF")
572
- downloaded_dois_textbox = gr.Textbox(label="""
573
  Found DOIs
574
  """,)
575
- failed_dois_textbox=gr.Textbox(label="""
576
  Missed DOIs
577
  """,)
578
  logs = gr.Textbox(label="""
@@ -590,14 +584,15 @@ def create_gradio_interface():
590
  inputs=[bib_file, doi_input, dois_input],
591
  outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
592
  )
593
-
594
  interface.title="馃敩 Academic Paper Batch Downloader"
595
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
 
596
  return interface
597
 
598
  def main():
599
  interface = create_gradio_interface()
600
- interface.launch()
601
 
602
  if __name__ == "__main__":
603
  main()
 
307
  delay *= 2 # Exponential backoff
308
 
309
  return None
 
 
310
  async def _download_single_doi(self, doi):
311
+ """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
312
+ if not doi:
313
+ return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
314
+ logger.info(f"Starting download process for DOI: {doi}")
315
+
316
+ try:
317
+ pdf_content = await self.download_with_retry_async(doi)
318
+ if pdf_content:
 
319
  logger.info(f"Downloaded PDF for DOI: {doi}")
320
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
321
  filepath = os.path.join(self.output_dir, filename)
 
 
 
322
  with open(filepath, 'wb') as f:
323
  f.write(pdf_content)
 
324
  logger.info(f"Saved PDF to file: {filepath}")
 
325
  logger.info(f"Descarga exitosa: {filename}")
 
326
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
327
+
328
+ else:
329
+ logger.warning(f"No se pudo descargar: {doi}")
330
+ return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
331
+
332
+ except CancelledError:
333
+ logger.info(f"Download Cancelled DOI: {doi}")
334
+ return None, f"Download cancelled {doi}","Download Cancelled"
335
 
336
+ except Exception as e:
337
+ logger.error(f"Error processing {doi}: {e}")
338
+ return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
 
 
 
339
 
 
340
  async def download_multiple_dois(self, dois_text, cancel_event):
341
  """Download multiple DOIs"""
342
  if not dois_text:
343
  return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
344
+
345
  # Sanitizar y filtrar DOIs
346
  # Eliminar l铆neas vac铆as, espacios en blanco, y DOIs duplicados
347
  dois = list(set([doi.strip() for doi in dois_text.split('\n') if doi.strip()]))
348
 
349
  # Validar lista de DOIs
350
  if not dois:
351
+ return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
352
 
353
  # Listas para rastrear resultados
354
  downloaded_files = [] # Rutas de archivos descargados
 
356
  downloaded_links = [] # Links de DOIs descargados
357
 
358
  for i, doi in enumerate(dois):
359
+ result = await self._download_single_doi(doi)
360
+
361
  if cancel_event.is_set():
362
+ logger.info("Downloads cancelled on multiple dois download")
363
+ return None,"Downloads cancelled","Downloads cancelled", ""
 
364
  if result is None:
365
+ continue
 
366
  if isinstance(result, Exception):
367
  # Excepci贸n inesperada
368
+ error_msg = f"Unexpected error: {str(result)}"
369
+ logger.error(f"Error downloading {doi}: {error_msg}")
370
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
371
+
372
  elif result[0] is None:
373
+ # Descarga fallida (resultado de download_single_doi_async)
374
  error_msg = result[1]
375
  logger.warning(f"Failed to download {doi}: {error_msg}")
376
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
377
+
378
  else:
379
+ # Descarga exitosa
380
  filepath = result[0]
381
+
382
  # Generar nombre de archivo 煤nico
383
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf" # indent fix.
384
+ filepath_unique = os.path.join(self.output_dir, filename)
385
+
386
  try:
387
+ # Renombrar archivo
388
+ os.rename(filepath, filepath_unique) #Fixed ident
389
 
390
+ # A帽adir a lista de archivos descargados
391
+ downloaded_files.append(filepath_unique) #Fixed ident
392
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')#Fixed ident
393
 
394
  except Exception as rename_error:
395
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
396
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')#Fixed ident
397
 
398
 
399
  # Crear archivo ZIP si hay archivos descargados
 
408
  lambda: self.create_zip(zip_filename, downloaded_files)
409
  )
410
  logger.info(f"ZIP file created: {zip_filename}")
411
+
412
+ return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois),""
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
 
415
+ async def process_bibtex(self, bib_file, cancel_event):
416
+ """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
417
+ # Read BibTeX file content from the uploaded object
418
+ try:
419
+ with open(bib_file.name, 'r', encoding='utf-8') as f:
420
+ bib_content = f.read()
421
+ except Exception as e:
422
+ logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
423
+ return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
424
 
425
+ # Parse BibTeX data
426
+ try:
427
+ bib_database = bibtexparser.loads(bib_content)
428
+ except Exception as e:
429
+ logger.error(f"Error parsing BibTeX data: {e}")
430
+ return None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}",""
431
+
432
+ # Extract DOIs
433
+ dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
434
+ logger.info(f"Found {len(dois)} DOIs to download")
435
+
436
+ # Result lists
437
+ downloaded_files = []
438
+ failed_dois = []
439
+ downloaded_links = []
440
+
441
+ for i, doi in enumerate(dois):
442
+ result = await self._download_single_doi(doi, cancel_event) # now its async directly here
443
+
444
  if cancel_event.is_set():
445
+ logger.info("Download Cancelled in bibtex mode")
446
+ return None, "Download Cancelled", "Download Cancelled", ""
447
+
448
  if result is None:
449
+ continue
450
 
451
  if isinstance(result, Exception):
452
+ # Excepci贸n inesperada
453
  error_msg = f"Unexpected error: {str(result)}"
454
  logger.error(f"Error downloading {doi}: {error_msg}")
455
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
456
+
457
+ elif result[0] is None:
458
+ # Descarga fallida (resultado de download_single_doi_async)
459
+ error_msg = result[1]
460
+ logger.warning(f"Failed to download {doi}: {error_msg}")
461
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
462
+
463
+ else:
464
+ # Descarga exitosa
465
+ filepath = result[0]
466
 
467
  # Unique filename for zip
468
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
469
+ filepath_unique = os.path.join(self.output_dir, filename)
470
+ os.rename(filepath, filepath_unique)
471
+ downloaded_files.append(filepath_unique)
472
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
473
+
474
+ if downloaded_files:
475
+ zip_filename = 'papers.zip'
476
+ loop = asyncio.get_running_loop()
477
+ loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
478
+ logger.info(f"ZIP file created: {zip_filename}")
479
+
480
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),""
481
 
482
 
 
 
 
 
 
 
 
 
483
  def create_zip(self, zip_filename, files):
484
  """Crea un archivo zip con los pdfs descargados"""
485
  with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
 
496
  """Create Gradio interface for Paper Downloader"""
497
  downloader = PaperDownloader()
498
 
 
499
  def update_progress( message="", logs=""):
500
+ return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
501
 
502
 
503
  async def download_papers(bib_file, doi_input, dois_input):
504
  cancel_event = asyncio.Event() # Create cancellation event for every submission.
505
  downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
506
+
507
  if bib_file:
508
  # Check file type
509
+ if not bib_file.name.lower().endswith('.bib'):
510
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None
511
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
512
+
513
+ return zip_file, downloaded_dois, failed_dois, logs_text, None #all outputs at return.
514
 
 
 
515
  elif doi_input:
516
+ filepath, message, error = await downloader._download_single_doi(doi_input,cancel_event)
517
+ return None, message, error, "", filepath
518
+
519
  elif dois_input:
520
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input, cancel_event)
521
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
522
  else:
523
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs","", None #all output data returned
524
+
525
 
526
  with gr.Blocks(theme="Hev832/Applio", css="""
527
  .gradio-container {
 
563
 
564
 
565
  output_file = gr.File(label="Download Papers (ZIP) or Single PDF")
566
+ downloaded_dois_textbox = gr.HTML(label="""
567
  Found DOIs
568
  """,)
569
+ failed_dois_textbox=gr.HTML(label="""
570
  Missed DOIs
571
  """,)
572
  logs = gr.Textbox(label="""
 
584
  inputs=[bib_file, doi_input, dois_input],
585
  outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
586
  )
587
+
588
  interface.title="馃敩 Academic Paper Batch Downloader"
589
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
590
+
591
  return interface
592
 
593
  def main():
594
  interface = create_gradio_interface()
595
+ interface.launch(share=True)
596
 
597
  if __name__ == "__main__":
598
  main()