C2MV commited on
Commit
565c23e
verified
1 Parent(s): aaba5e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -173
app.py CHANGED
@@ -161,7 +161,6 @@ class PaperDownloader:
161
  text, headers = await self.fetch_with_headers(session, scihub_url, timeout=15)
162
  if not text:
163
  continue
164
-
165
 
166
  # Search for multiple PDF URL patterns
167
  pdf_patterns = [
@@ -169,24 +168,25 @@ class PaperDownloader:
169
  r'(https?://[^\s<>"]+?download/[^\s<>"]+)',
170
  r'(https?://[^\s<>"]+?\/pdf\/[^\s<>"]+)',
171
  ]
172
-
173
 
174
  pdf_urls = []
175
  for pattern in pdf_patterns:
176
  pdf_urls.extend(re.findall(pattern, text))
177
-
178
 
179
- # Try downloading from found URLs, but iterate over ALL
180
  for pdf_url in pdf_urls:
181
- pdf_content = await self.fetch_pdf_content(session,pdf_url)
182
- if pdf_content:
183
- logger.debug(f"Found PDF from: {pdf_url}")
184
- return pdf_content
185
-
 
 
 
 
186
  except Exception as e:
187
- logger.debug(f"Error trying to download {doi} from {base_url}: {e}")
188
 
189
-
190
  return None
191
 
192
  async def download_paper_libgen_async(self, session, doi):
@@ -210,10 +210,10 @@ class PaperDownloader:
210
  if links:
211
  link = links[0]
212
  pdf_url = link['href']
213
- pdf_content = await self.fetch_pdf_content(session, pdf_url)
214
- if pdf_content:
215
  logger.debug(f"Found PDF from: {pdf_url}")
216
- return pdf_content
217
  except Exception as e:
218
  logger.debug(f"Error trying to download {doi} from libgen: {e}")
219
  return None
@@ -239,99 +239,77 @@ class PaperDownloader:
239
 
240
  if links:
241
  pdf_url = links[0]['href']
242
- pdf_content = await self.fetch_pdf_content(session,pdf_url)
243
- if pdf_content:
244
- logger.debug(f"Found PDF from: {pdf_url}")
245
- return pdf_content
246
-
247
  except Exception as e:
248
  logger.debug(f"Google Scholar error for {doi}: {e}")
249
 
250
  return None
251
-
252
- async def download_paper_crossref_async(self, session, doi):
253
- """Alternative search method using Crossref"""
254
- if not doi:
255
- return None
256
-
257
- try:
258
- # Search for open access link
259
- url = f"https://api.crossref.org/works/{doi}"
260
- response = await session.get(url, headers=self.headers, timeout=10)
261
-
262
- if response.status == 200:
263
- data = await response.json()
264
- work = data.get('message', {})
265
-
266
- # Search for open access links
267
- links = work.get('link', [])
268
- for link in links:
269
- if link.get('content-type') == 'application/pdf':
270
- pdf_url = link.get('URL')
271
- if pdf_url:
272
- pdf_content = await self.fetch_pdf_content(session, pdf_url)
273
- if pdf_content:
274
- logger.debug(f"Found PDF from: {pdf_url}")
275
- return pdf_content
276
- except Exception as e:
277
- logger.debug(f"Crossref error for {doi}: {e}")
278
- return None
279
 
280
- async def download_with_retry_async(self, doi, max_retries=3):
281
- """
282
- Intenta descargar un paper con m煤ltiples estrategias y un n煤mero limitado de reintentos.
283
-
284
- Args:
285
- doi (str): DOI del paper a descargar
286
- max_retries (int): N煤mero m谩ximo de reintentos
287
-
288
- Returns:
289
- bytes or None: Contenido del PDF o None si no se puede descargar
290
- """
291
  if not doi:
292
- logger.warning("DOI no proporcionado")
293
  return None
294
-
295
- # Estrategias de descarga en orden de preferencia
296
- download_strategies = [
297
- self.download_paper_direct_doi_async,
298
- self.download_paper_scihub_async,
299
- self.download_paper_libgen_async,
300
- self.download_paper_google_scholar_async,
301
- self.download_paper_crossref_async
302
- ]
303
-
304
- async with aiohttp.ClientSession() as session:
305
- for retry in range(max_retries):
306
- logger.info(f"Intento de descarga {retry + 1} para DOI: {doi}")
307
-
308
- # Probar cada estrategia de descarga
309
- for strategy in download_strategies:
310
- try:
311
- logger.info(f"Trying strategy {strategy.__name__} for DOI {doi}") # ADDED
312
- pdf_content = await strategy(session, doi)
313
- if pdf_content:
314
- logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
315
- return pdf_content
316
- except CancelledError:
317
- logger.info(f"Download cancelled on strategy: {strategy.__name__} with DOI {doi}")
318
- return None # return here in order to stop retry
319
 
320
- except Exception as e:
321
- logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}") #ADDED
322
-
323
-
324
- # Si ninguna estrategia funcion贸, esperar un poco antes de reintentar
325
- await asyncio.sleep(1) # Peque帽a pausa entre reintentos
326
-
327
- if retry == max_retries -1: #log all if it does not works on max retries.
328
- logger.warning(f"FALLO FINAL: No se pudo descargar DOI {doi} despu茅s de {max_retries} intentos")
 
 
 
 
 
 
 
 
 
 
329
 
 
 
330
 
331
- # Si se agotan todos los reintentos
332
- return None
 
 
 
 
 
333
 
334
- async def _download_single_doi(self, doi, cancel_event):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
336
  if not doi:
337
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
@@ -348,7 +326,7 @@ class PaperDownloader:
348
  # Escribir contenido del PDF
349
 
350
  with open(filepath, 'wb') as f:
351
- f.write(pdf_content)
352
 
353
  logger.info(f"Saved PDF to file: {filepath}")
354
 
@@ -357,13 +335,12 @@ class PaperDownloader:
357
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
358
 
359
  else:
360
- logger.warning(f"No se pudo descargar: {doi}")
361
- return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
362
 
363
  except CancelledError:
364
  logger.info(f"Download Cancelled DOI: {doi}")
365
  return None, f"Download cancelled {doi}","Download Cancelled"
366
-
367
  except Exception as e:
368
  logger.error(f"Error processing {doi}: {e}")
369
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
@@ -380,7 +357,7 @@ class PaperDownloader:
380
 
381
  # Validar lista de DOIs
382
  if not dois:
383
- return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
384
 
385
  # Listas para rastrear resultados
386
  downloaded_files = [] # Rutas de archivos descargados
@@ -389,44 +366,47 @@ class PaperDownloader:
389
 
390
  for i, doi in enumerate(dois):
391
  result = await self._download_single_doi(doi,cancel_event)
 
392
  if cancel_event.is_set():
393
  logger.info("Downloads cancelled on multiple dois download")
394
- return None,"Downloads cancelled","Downloads cancelled", "" # early return on cancelled
 
395
  if result is None:
396
- continue
397
 
398
  if isinstance(result, Exception):
399
  # Excepci贸n inesperada
400
- error_msg = f"Unexpected error: {str(result)}"
401
- logger.error(f"Error downloading {doi}: {error_msg}")
402
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
403
-
404
  elif result[0] is None:
405
  # Descarga fallida (resultado de download_single_doi_async)
406
  error_msg = result[1]
407
  logger.warning(f"Failed to download {doi}: {error_msg}")
408
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
409
-
410
  else:
411
  # Descarga exitosa
412
  filepath = result[0]
413
-
414
  # Generar nombre de archivo 煤nico
415
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf" #Fixed indent
416
- filepath_unique = os.path.join(self.output_dir, filename) # Fixed indent
417
 
418
  try:
419
- # Renombrar archivo
420
- os.rename(filepath, filepath_unique) #Fixed indent
421
 
422
  # A帽adir a lista de archivos descargados
423
- downloaded_files.append(filepath_unique) #Fixed indent
424
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>') #Fixed indent
425
 
426
  except Exception as rename_error:
427
- logger.error(f"Error renaming file for {doi}: {rename_error}")
428
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file') #Fixed indent
429
-
 
430
  # Crear archivo ZIP si hay archivos descargados
431
  zip_filename = None
432
  if downloaded_files:
@@ -439,8 +419,9 @@ class PaperDownloader:
439
  lambda: self.create_zip(zip_filename, downloaded_files)
440
  )
441
  logger.info(f"ZIP file created: {zip_filename}")
442
- return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois),""
443
 
 
 
444
  async def process_bibtex(self, bib_file, cancel_event):
445
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
446
  # Read BibTeX file content from the uploaded object
@@ -449,7 +430,7 @@ class PaperDownloader:
449
  bib_content = f.read()
450
  except Exception as e:
451
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
452
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}",""
453
 
454
  # Parse BibTeX data
455
  try:
@@ -466,47 +447,46 @@ class PaperDownloader:
466
  downloaded_files = []
467
  failed_dois = []
468
  downloaded_links = []
469
-
470
  for i,doi in enumerate(dois):
471
- result = await self._download_single_doi(doi, cancel_event)
472
 
473
- if cancel_event.is_set():
474
  logger.info("Download Cancelled in bibtex mode")
475
- return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
476
-
477
- if result is None:
478
- continue
479
-
480
- if isinstance(result, Exception):
481
- # Excepci贸n inesperada
482
- error_msg = f"Unexpected error: {str(result)}"
483
- logger.error(f"Error downloading {doi}: {error_msg}")
484
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
485
 
486
- elif result[0] is None:
487
  # Descarga fallida (resultado de download_single_doi_async)
488
- error_msg = result[1]
489
- logger.warning(f"Failed to download {doi}: {error_msg}")
490
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
491
-
492
- else:
493
- # Descarga exitosa
494
  filepath = result[0]
495
-
496
  # Unique filename for zip
497
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
498
  filepath_unique = os.path.join(self.output_dir, filename)
499
  os.rename(filepath, filepath_unique)
500
  downloaded_files.append(filepath_unique)
501
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
502
 
 
503
  if downloaded_files:
504
  zip_filename = 'papers.zip'
505
  loop = asyncio.get_running_loop()
506
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
507
  logger.info(f"ZIP file created: {zip_filename}")
508
-
509
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
510
 
511
  def create_zip(self, zip_filename, files):
512
  """Crea un archivo zip con los pdfs descargados"""
@@ -529,32 +509,25 @@ def create_gradio_interface():
529
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
530
 
531
 
532
- async def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
533
- cancel_event = asyncio.Event() # Create cancellation event for every submission.
534
- downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
535
- if bib_file:
536
- # Check file type
537
- if not bib_file.name.lower().endswith('.bib'):
538
- return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None #Added for consistent value returns
539
-
540
- zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
541
-
542
- return zip_file, downloaded_dois, failed_dois, logs_text, None# Correctly send None and "", in one var to return tuple 5 values, as it is required
543
-
544
- elif doi_input:
545
-
546
- filepath, message, error= await downloader._download_single_doi(doi_input, cancel_event)
547
- return None, message, error,"", filepath # Correctly return params here on singe execution
548
-
549
- elif dois_input:
550
-
551
- zip_file, downloaded_dois, failed_dois, logs_text = await downloader.download_multiple_dois(dois_input, cancel_event)
552
-
553
- return zip_file, downloaded_dois, failed_dois, logs_text, None # Correctly send null
554
-
555
- else:
556
- return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs","", None # must also have five values to satisfy gradio block method
557
-
558
 
559
  with gr.Blocks(theme="Hev832/Applio", css="""
560
  .gradio-container {
@@ -615,12 +588,11 @@ def create_gradio_interface():
615
  submit_button.click(
616
  download_papers,
617
  inputs=[bib_file, doi_input, dois_input],
618
- outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ] # the new output should be a tuple and we output logs too for debugging.
619
  )
620
-
621
  interface.title="馃敩 Academic Paper Batch Downloader"
622
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
623
-
624
  return interface
625
 
626
  def main():
 
161
  text, headers = await self.fetch_with_headers(session, scihub_url, timeout=15)
162
  if not text:
163
  continue
 
164
 
165
  # Search for multiple PDF URL patterns
166
  pdf_patterns = [
 
168
  r'(https?://[^\s<>"]+?download/[^\s<>"]+)',
169
  r'(https?://[^\s<>"]+?\/pdf\/[^\s<>"]+)',
170
  ]
 
171
 
172
  pdf_urls = []
173
  for pattern in pdf_patterns:
174
  pdf_urls.extend(re.findall(pattern, text))
 
175
 
176
+ # Try downloading from found URLs
177
  for pdf_url in pdf_urls:
178
+ try:
179
+ pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
180
+ # Verify if it's a PDF
181
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
182
+ logger.debug(f"Found PDF from: {pdf_url}")
183
+ return await pdf_response.read()
184
+ except Exception as e:
185
+ logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
186
+
187
  except Exception as e:
188
+ logger.debug(f"Error trying to download {doi} from {base_url}: {e}")
189
 
 
190
  return None
191
 
192
  async def download_paper_libgen_async(self, session, doi):
 
210
  if links:
211
  link = links[0]
212
  pdf_url = link['href']
213
+ pdf_response = await session.get(pdf_url, headers=self.headers, allow_redirects=True, timeout=10)
214
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
215
  logger.debug(f"Found PDF from: {pdf_url}")
216
+ return await pdf_response.read()
217
  except Exception as e:
218
  logger.debug(f"Error trying to download {doi} from libgen: {e}")
219
  return None
 
239
 
240
  if links:
241
  pdf_url = links[0]['href']
242
+ pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
243
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
244
+ logger.debug(f"Found PDF from: {pdf_url}")
245
+ return await pdf_response.read()
 
246
  except Exception as e:
247
  logger.debug(f"Google Scholar error for {doi}: {e}")
248
 
249
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
+ async def download_paper_crossref_async(self, session, doi):
252
+ """Alternative search method using Crossref"""
 
 
 
 
 
 
 
 
 
253
  if not doi:
 
254
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
+ try:
257
+ # Search for open access link
258
+ url = f"https://api.crossref.org/works/{doi}"
259
+ response = await session.get(url, headers=self.headers, timeout=10)
260
+
261
+ if response.status == 200:
262
+ data = await response.json()
263
+ work = data.get('message', {})
264
+
265
+ # Search for open access links
266
+ links = work.get('link', [])
267
+ for link in links:
268
+ if link.get('content-type') == 'application/pdf':
269
+ pdf_url = link.get('URL')
270
+ if pdf_url:
271
+ pdf_response = await session.get(pdf_url, headers=self.headers)
272
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
273
+ logger.debug(f"Found PDF from: {pdf_url}")
274
+ return await pdf_response.read()
275
 
276
+ except Exception as e:
277
+ logger.debug(f"Crossref error for {doi}: {e}")
278
 
279
+ return None
280
+
281
+ async def download_with_retry_async(self, doi, max_retries=3, initial_delay=2):
282
+ """Downloads a paper using multiple strategies with exponential backoff and async requests"""
283
+ pdf_content = None
284
+ retries = 0
285
+ delay = initial_delay
286
 
287
+ async with aiohttp.ClientSession() as session:
288
+ while retries < max_retries and not pdf_content:
289
+ try:
290
+ pdf_content = (
291
+ await self.download_paper_direct_doi_async(session, doi) or
292
+ await self.download_paper_scihub_async(session, doi) or
293
+ await self.download_paper_libgen_async(session, doi) or
294
+ await self.download_paper_google_scholar_async(session, doi) or
295
+ await self.download_paper_crossref_async(session, doi)
296
+
297
+ )
298
+ if pdf_content:
299
+ return pdf_content
300
+ except Exception as e:
301
+ logger.error(f"Error in download attempt {retries + 1} for DOI {doi}: {e}")
302
+
303
+ if not pdf_content:
304
+ retries += 1
305
+ logger.warning(f"Retry attempt {retries} for DOI: {doi} after {delay} seconds")
306
+ await asyncio.sleep(delay)
307
+ delay *= 2 # Exponential backoff
308
+
309
+ return None
310
+
311
+
312
+ async def _download_single_doi(self, doi):
313
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
314
  if not doi:
315
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
 
326
  # Escribir contenido del PDF
327
 
328
  with open(filepath, 'wb') as f:
329
+ f.write(pdf_content)
330
 
331
  logger.info(f"Saved PDF to file: {filepath}")
332
 
 
335
  return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
336
 
337
  else:
338
+ logger.warning(f"No se pudo descargar: {doi}")
339
+ return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
340
 
341
  except CancelledError:
342
  logger.info(f"Download Cancelled DOI: {doi}")
343
  return None, f"Download cancelled {doi}","Download Cancelled"
 
344
  except Exception as e:
345
  logger.error(f"Error processing {doi}: {e}")
346
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
 
357
 
358
  # Validar lista de DOIs
359
  if not dois:
360
+ return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
361
 
362
  # Listas para rastrear resultados
363
  downloaded_files = [] # Rutas de archivos descargados
 
366
 
367
  for i, doi in enumerate(dois):
368
  result = await self._download_single_doi(doi,cancel_event)
369
+
370
  if cancel_event.is_set():
371
  logger.info("Downloads cancelled on multiple dois download")
372
+ return None, "Downloads cancelled","Downloads cancelled", "" # early return on cancelled
373
+
374
  if result is None:
375
+ continue;
376
 
377
  if isinstance(result, Exception):
378
  # Excepci贸n inesperada
379
+ error_msg = f"Unexpected error: {str(result)}"
380
+ logger.error(f"Error downloading {doi}: {error_msg}")
381
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
382
+
383
  elif result[0] is None:
384
  # Descarga fallida (resultado de download_single_doi_async)
385
  error_msg = result[1]
386
  logger.warning(f"Failed to download {doi}: {error_msg}")
387
  failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
388
+
389
  else:
390
  # Descarga exitosa
391
  filepath = result[0]
392
+
393
  # Generar nombre de archivo 煤nico
394
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf" # indent problem corrected
395
+ filepath_unique = os.path.join(self.output_dir, filename) #Fixed identation.
396
 
397
  try:
398
+ # Renombrar archivo
399
+ os.rename(filepath, filepath_unique) #Fixed identation.
400
 
401
  # A帽adir a lista de archivos descargados
402
+ downloaded_files.append(filepath_unique) #Fixed identation.
403
+ downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')#Fixed identation.
404
 
405
  except Exception as rename_error:
406
+ logger.error(f"Error renaming file for {doi}: {rename_error}")
407
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')#Fixed identation.
408
+
409
+
410
  # Crear archivo ZIP si hay archivos descargados
411
  zip_filename = None
412
  if downloaded_files:
 
419
  lambda: self.create_zip(zip_filename, downloaded_files)
420
  )
421
  logger.info(f"ZIP file created: {zip_filename}")
 
422
 
423
+ return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
424
+
425
  async def process_bibtex(self, bib_file, cancel_event):
426
  """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
427
  # Read BibTeX file content from the uploaded object
 
430
  bib_content = f.read()
431
  except Exception as e:
432
  logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
433
+ return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
434
 
435
  # Parse BibTeX data
436
  try:
 
447
  downloaded_files = []
448
  failed_dois = []
449
  downloaded_links = []
 
450
  for i,doi in enumerate(dois):
451
+ result = await self._download_single_doi(doi, cancel_event)
452
 
453
+ if cancel_event.is_set():
454
  logger.info("Download Cancelled in bibtex mode")
455
+ return None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
456
+ if result is None:
457
+ continue
458
+
459
+ if isinstance(result, Exception):
460
+ # Excepci贸n inesperada
461
+ error_msg = f"Unexpected error: {str(result)}"
462
+ logger.error(f"Error downloading {doi}: {error_msg}")
463
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
 
464
 
465
+ elif result[0] is None:
466
  # Descarga fallida (resultado de download_single_doi_async)
467
+ error_msg = result[1]
468
+ logger.warning(f"Failed to download {doi}: {error_msg}")
469
+ failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
470
+
471
+ else:
472
+ # Descarga exitosa
473
  filepath = result[0]
474
+
475
  # Unique filename for zip
476
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf" #indent fixed
477
  filepath_unique = os.path.join(self.output_dir, filename)
478
  os.rename(filepath, filepath_unique)
479
  downloaded_files.append(filepath_unique)
480
  downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
481
 
482
+
483
  if downloaded_files:
484
  zip_filename = 'papers.zip'
485
  loop = asyncio.get_running_loop()
486
  loop.run_in_executor(self.executor, lambda: self.create_zip(zip_filename,downloaded_files))
487
  logger.info(f"ZIP file created: {zip_filename}")
488
+
489
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
490
 
491
  def create_zip(self, zip_filename, files):
492
  """Crea un archivo zip con los pdfs descargados"""
 
509
  return gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
510
 
511
 
512
+ async def download_papers(bib_file, doi_input, dois_input):
513
+ cancel_event = asyncio.Event() # Create cancellation event for every submission.
514
+ downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
515
+ if bib_file:
516
+ # Check file type
517
+ if not bib_file.name.lower().endswith('.bib'):
518
+ return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "", None #Fixed with default value
519
+
520
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
521
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
522
+ elif doi_input:
523
+ filepath, message, error = await downloader._download_single_doi(doi_input, cancel_event)
524
+ return None, message, error,"", filepath# correct returns.
525
+
526
+ elif dois_input:
527
+ zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input, cancel_event)
528
+ return zip_file, downloaded_dois, failed_dois, logs_text, None
529
+ else:
530
+ return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None
 
 
 
 
 
 
 
531
 
532
  with gr.Blocks(theme="Hev832/Applio", css="""
533
  .gradio-container {
 
588
  submit_button.click(
589
  download_papers,
590
  inputs=[bib_file, doi_input, dois_input],
591
+ outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
592
  )
593
+
594
  interface.title="馃敩 Academic Paper Batch Downloader"
595
  interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
 
596
  return interface
597
 
598
  def main():