C2MV commited on
Commit
4d6b47a
·
verified ·
1 Parent(s): 8bb51fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -114
app.py CHANGED
@@ -311,42 +311,43 @@ class PaperDownloader:
311
  return None
312
 
313
  async def download_single_doi_async(self, doi, progress_callback):
314
- """Descargar un único DOI con retroalimentación de progreso"""
315
- if not doi:
316
- return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
317
- logger.info(f"Starting download process for DOI: {doi}") # ADDED
318
- try:
319
- pdf_content = await self.download_with_retry_async(doi)
320
- if pdf_content:
321
- logger.info(f"Downloaded PDF for DOI: {doi}") # ADDED
322
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
323
- filepath = os.path.join(self.output_dir, filename)
324
-
325
- # Escribir contenido del PDF
326
- loop = asyncio.get_running_loop()
327
- await loop.run_in_executor(
328
- self.executor,
329
- lambda: open(filepath, 'wb').write(pdf_content)
330
- )
331
- logger.info(f"Saved PDF to file: {filepath}") # ADDED
332
-
333
- logger.info(f"Descarga exitosa: {filename}")
334
- progress_callback("test") # CHANGED for debug purposes
335
- return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
336
- else:
337
- logger.warning(f"No se pudo descargar: {doi}") # ADDED
338
- progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
339
- return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
340
-
341
- except Exception as e:
342
- logger.error(f"Error processing {doi}: {e}") # ADDED
343
- progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{e}")
344
- return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
 
345
 
346
  async def download_multiple_dois_async(self, dois_text, progress_callback):
347
  # Validar entrada
348
  if not dois_text:
349
- return None, "Error: No DOIs provided", "Error: No DOIs provided"
350
 
351
  # Sanitizar y filtrar DOIs
352
  # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
@@ -354,7 +355,7 @@ class PaperDownloader:
354
 
355
  # Validar lista de DOIs
356
  if not dois:
357
- return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
358
 
359
  # Listas para rastrear resultados
360
  downloaded_files = [] # Rutas de archivos descargados
@@ -424,56 +425,57 @@ class PaperDownloader:
424
  return (
425
  zip_filename if downloaded_files else None, # Archivo ZIP o None
426
  "\n".join(downloaded_links), # DOIs descargados
427
- "\n".join(failed_dois) # DOIs fallidos
 
428
  )
429
 
430
  async def process_bibtex_async(self, bib_file, progress_callback):
431
- """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
432
- # Read BibTeX file content from the uploaded object
433
- try:
434
- with open(bib_file.name, 'r', encoding='utf-8') as f:
435
- bib_content = f.read()
436
- except Exception as e:
437
- logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
438
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
439
 
440
- # Parse BibTeX data
441
- try:
442
- bib_database = bibtexparser.loads(bib_content)
443
- except Exception as e:
444
- logger.error(f"Error parsing BibTeX data: {e}")
445
- return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
446
 
447
- # Extract DOIs
448
- dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
449
- logger.info(f"Found {len(dois)} DOIs to download")
450
 
451
- # Result lists
452
- downloaded_files = []
453
- failed_dois = []
454
- downloaded_links = []
455
 
456
- tasks = [self.download_single_doi_async(doi, progress_callback) for doi in dois]
457
- results = await asyncio.gather(*tasks)
458
-
459
- for i, (filepath, success_message, fail_message) in enumerate(results):
460
- if filepath:
461
- # Unique filename for zip
462
- filename = f"{str(dois[i]).replace('/', '_').replace('.', '_')}_{i}.pdf"
463
- filepath_unique = os.path.join(self.output_dir, filename)
464
- os.rename(filepath, filepath_unique)
465
- downloaded_files.append(filepath_unique)
466
- downloaded_links.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
467
- else:
468
- failed_dois.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
469
-
470
- if downloaded_files:
471
- zip_filename = 'papers.zip'
472
- loop = asyncio.get_running_loop()
473
- await loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
474
- logger.info(f"ZIP file created: {zip_filename}")
475
 
476
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois)
477
 
478
  def create_zip(self, zip_filename, files):
479
  """Crea un archivo zip con los pdfs descargados"""
@@ -481,30 +483,39 @@ class PaperDownloader:
481
  for file in files:
482
  zf.write(file, os.path.basename(file))
483
 
484
-
485
  def create_gradio_interface():
486
  """Create Gradio interface for Paper Downloader"""
487
  downloader = PaperDownloader()
488
 
489
- async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
490
- if bib_file:
491
- # Check file type
492
- if not bib_file.name.lower().endswith('.bib'):
493
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
494
-
495
-
496
- zip_path, downloaded_dois, failed_dois = await downloader.process_bibtex_async(bib_file, progress.update)
497
-
498
- return zip_path, downloaded_dois, failed_dois, None
499
- elif doi_input:
500
- filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input,progress.update)
501
- return None, message, failed_doi, filepath
502
- elif dois_input:
503
- zip_path, downloaded_dois, failed_dois = await downloader.download_multiple_dois_async(dois_input, progress.update)
504
- return zip_path, downloaded_dois, failed_dois, None
505
- else:
506
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
  # Gradio Interface
510
  interface = gr.Interface(
@@ -513,7 +524,6 @@ def create_gradio_interface():
513
  gr.File(file_types=['.bib'], label="Upload BibTeX File"),
514
  gr.Textbox(label="Enter Single DOI", placeholder="10.xxxx/xxxx"),
515
  gr.Textbox(label="Enter Multiple DOIs (one per line)", placeholder="10.xxxx/xxxx\n10.yyyy/yyyy\n...")
516
-
517
  ],
518
  outputs=[
519
  gr.File(label="Download Papers (ZIP) or Single PDF"),
@@ -533,7 +543,16 @@ def create_gradio_interface():
533
  <div id="failed-dois"></div>
534
  </div>
535
  """),
536
- gr.File(label="Downloaded Single PDF")
 
 
 
 
 
 
 
 
 
537
  ],
538
  title="🔬 Academic Paper Batch Downloader",
539
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
@@ -559,30 +578,15 @@ def create_gradio_interface():
559
  .output-text a {
560
  color: #007bff; /* Blue color for hyperlinks */
561
  }
 
 
 
562
  """,
563
  cache_examples=False,
564
  )
565
 
566
- # Add Javascript to update HTML
567
- interface.load = """
568
- function(downloaded_dois, failed_dois){
569
- let downloaded_html = '';
570
- downloaded_dois.split('\\n').filter(Boolean).forEach(doi => {
571
- downloaded_html += '[' + doi + ']<br>';
572
- });
573
- document.querySelector("#downloaded-dois").innerHTML = downloaded_html;
574
-
575
- let failed_html = '';
576
- failed_dois.split('\\n').filter(Boolean).forEach(doi => {
577
- failed_html += '[' + doi + ']<br>';
578
- });
579
- document.querySelector("#failed-dois").innerHTML = failed_html;
580
- return [downloaded_html, failed_html];
581
- }
582
- """
583
  return interface
584
 
585
-
586
  def main():
587
  interface = create_gradio_interface()
588
  interface.launch()
 
311
  return None
312
 
313
  async def download_single_doi_async(self, doi, progress_callback):
314
+ """Descargar un único DOI con retroalimentación de progreso"""
315
+ if not doi:
316
+ return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado", ""
317
+ logger.info(f"Starting download process for DOI: {doi}")
318
+ try:
319
+ pdf_content = await self.download_with_retry_async(doi)
320
+ if pdf_content:
321
+ logger.info(f"Downloaded PDF for DOI: {doi}")
322
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
323
+ filepath = os.path.join(self.output_dir, filename)
324
+
325
+ # Escribir contenido del PDF
326
+ loop = asyncio.get_running_loop()
327
+ await loop.run_in_executor(
328
+ self.executor,
329
+ lambda: open(filepath, 'wb').write(pdf_content)
330
+ )
331
+ logger.info(f"Saved PDF to file: {filepath}")
332
+
333
+ logger.info(f"Descarga exitosa: {filename}")
334
+ progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
335
+ return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', "", ""
336
+ else:
337
+ logger.warning(f"No se pudo descargar: {doi}")
338
+ progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
339
+ return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>', ""
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error processing {doi}: {e}")
343
+ progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{e}")
344
+ return None, f"Error procesando {doi}: {e}", f"Error processing {doi}: {e}", ""
345
+
346
 
347
  async def download_multiple_dois_async(self, dois_text, progress_callback):
348
  # Validar entrada
349
  if not dois_text:
350
+ return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
351
 
352
  # Sanitizar y filtrar DOIs
353
  # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
 
355
 
356
  # Validar lista de DOIs
357
  if not dois:
358
+ return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
359
 
360
  # Listas para rastrear resultados
361
  downloaded_files = [] # Rutas de archivos descargados
 
425
  return (
426
  zip_filename if downloaded_files else None, # Archivo ZIP o None
427
  "\n".join(downloaded_links), # DOIs descargados
428
+ "\n".join(failed_dois), # DOIs fallidos
429
+ ""
430
  )
431
 
432
  async def process_bibtex_async(self, bib_file, progress_callback):
433
+ """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
434
+ # Read BibTeX file content from the uploaded object
435
+ try:
436
+ with open(bib_file.name, 'r', encoding='utf-8') as f:
437
+ bib_content = f.read()
438
+ except Exception as e:
439
+ logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
440
+ return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
441
 
442
+ # Parse BibTeX data
443
+ try:
444
+ bib_database = bibtexparser.loads(bib_content)
445
+ except Exception as e:
446
+ logger.error(f"Error parsing BibTeX data: {e}")
447
+ return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", ""
448
 
449
+ # Extract DOIs
450
+ dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
451
+ logger.info(f"Found {len(dois)} DOIs to download")
452
 
453
+ # Result lists
454
+ downloaded_files = []
455
+ failed_dois = []
456
+ downloaded_links = []
457
 
458
+ tasks = [self.download_single_doi_async(doi, progress_callback) for doi in dois]
459
+ results = await asyncio.gather(*tasks)
460
+
461
+ for i, (filepath, success_message, fail_message,_) in enumerate(results):
462
+ if filepath:
463
+ # Unique filename for zip
464
+ filename = f"{str(dois[i]).replace('/', '_').replace('.', '_')}_{i}.pdf"
465
+ filepath_unique = os.path.join(self.output_dir, filename)
466
+ os.rename(filepath, filepath_unique)
467
+ downloaded_files.append(filepath_unique)
468
+ downloaded_links.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
469
+ else:
470
+ failed_dois.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
471
+
472
+ if downloaded_files:
473
+ zip_filename = 'papers.zip'
474
+ loop = asyncio.get_running_loop()
475
+ await loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
476
+ logger.info(f"ZIP file created: {zip_filename}")
477
 
478
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
479
 
480
  def create_zip(self, zip_filename, files):
481
  """Crea un archivo zip con los pdfs descargados"""
 
483
  for file in files:
484
  zf.write(file, os.path.basename(file))
485
 
 
486
  def create_gradio_interface():
487
  """Create Gradio interface for Paper Downloader"""
488
  downloader = PaperDownloader()
489
 
490
+ def update_progress(message, log_message=""):
491
+ return gr.HTML.update(value=f"{message}"), gr.HTML.update(value=f"<pre>{log_message}</pre>")
492
+
493
+ async def download_papers(bib_file, doi_input, dois_input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
+ def custom_progress_callback(message):
496
+ logger.info(f"Callback message: {message}") #for debug purposes, should log message
497
+
498
+ #this method will update the custom field instead
499
+ updates = update_progress(message)
500
+ return updates
501
+
502
+
503
+ if bib_file:
504
+ # Check file type
505
+ if not bib_file.name.lower().endswith('.bib'):
506
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None
507
+
508
+ zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, custom_progress_callback)
509
+ return zip_path, downloaded_dois, failed_dois, "", None
510
+ elif doi_input:
511
+ filepath, message, failed_doi, _ = await downloader.download_single_doi_async(doi_input,custom_progress_callback)
512
+ return None, message, failed_doi, "", filepath
513
+ elif dois_input:
514
+ zip_path, downloaded_dois, failed_dois, _ = await downloader.download_multiple_dois_async(dois_input, custom_progress_callback)
515
+ return zip_path, downloaded_dois, failed_dois, "", None
516
+ else:
517
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None
518
+
519
 
520
  # Gradio Interface
521
  interface = gr.Interface(
 
524
  gr.File(file_types=['.bib'], label="Upload BibTeX File"),
525
  gr.Textbox(label="Enter Single DOI", placeholder="10.xxxx/xxxx"),
526
  gr.Textbox(label="Enter Multiple DOIs (one per line)", placeholder="10.xxxx/xxxx\n10.yyyy/yyyy\n...")
 
527
  ],
528
  outputs=[
529
  gr.File(label="Download Papers (ZIP) or Single PDF"),
 
543
  <div id="failed-dois"></div>
544
  </div>
545
  """),
546
+ gr.HTML(label="""
547
+ <div style='padding-bottom: 5px; font-weight: bold;'>
548
+ Logs
549
+ </div>
550
+ <div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px; max-height: 150px; overflow-y: auto;white-space: pre-line;'>
551
+
552
+ </div>
553
+
554
+ """,),
555
+ gr.File(label="Downloaded Single PDF")
556
  ],
557
  title="🔬 Academic Paper Batch Downloader",
558
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
 
578
  .output-text a {
579
  color: #007bff; /* Blue color for hyperlinks */
580
  }
581
+ .logs_box {
582
+
583
+ }
584
  """,
585
  cache_examples=False,
586
  )
587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  return interface
589
 
 
590
  def main():
591
  interface = create_gradio_interface()
592
  interface.launch()