C2MV commited on
Commit
707bdc5
·
verified ·
1 Parent(s): 64cb1ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -324
app.py CHANGED
@@ -12,7 +12,6 @@ from bs4 import BeautifulSoup
12
  import io
13
  import asyncio
14
  import aiohttp
15
- from playwright.async_api import async_playwright
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO,
@@ -42,51 +41,7 @@ class PaperDownloader:
42
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
43
  'Accept-Language': 'en-US,en;q=0.9',
44
  }
45
-
46
- self.playwright_browser = None
47
- self.playwright_lock = asyncio.Lock() # Added lock
48
-
49
-
50
- async def initialize_playwright(self):
51
- """Initialize the playwright browser instance to be used by the tool."""
52
- async with self.playwright_lock:
53
- if not self.playwright_browser:
54
- try:
55
- playwright = await async_playwright().start()
56
- self.playwright_browser = await playwright.chromium.launch()
57
- except Exception as e:
58
- logger.error(f"Error initializing Playwright browser: {e}")
59
-
60
- async def close_playwright(self):
61
- """Closes the playwright browser, must be called at the end of the execution."""
62
- async with self.playwright_lock:
63
- if self.playwright_browser:
64
- try:
65
- await self.playwright_browser.close()
66
- self.playwright_browser = None
67
- except Exception as e:
68
- logger.error(f"Error closing Playwright browser: {e}")
69
-
70
- async def get_html_with_playwright(self, doi_url):
71
- """Utility function to fetch content with playwright with try-catch."""
72
- if not self.playwright_browser:
73
- await self.initialize_playwright()
74
-
75
- if not self.playwright_browser:
76
- logger.error(f"Playwright browser is not initialized for url: {doi_url}")
77
- return None
78
- page = None
79
- try:
80
- page = await self.playwright_browser.new_page()
81
- await page.goto(doi_url, timeout=30000)
82
- return await page.content()
83
- except Exception as e:
84
- logger.debug(f"Error navigating or getting content for url: {doi_url}: {e}")
85
- return None
86
- finally:
87
- if page:
88
- await page.close()
89
-
90
  def clean_doi(self, doi):
91
  """Clean and encode DOI for URL"""
92
  if not isinstance(doi, str):
@@ -102,43 +57,43 @@ class PaperDownloader:
102
  except Exception as e:
103
  logger.debug(f"Error fetching {url}: {e}")
104
  return None, None
105
-
106
 
107
  async def download_paper_direct_doi_async(self, session, doi):
108
- """Attempt to download the pdf from the landing page of the doi, now with javascript rendering"""
109
- if not doi:
110
  return None
111
-
112
- try:
113
- doi_url = f"https://doi.org/{self.clean_doi(doi)}"
114
-
115
- html_content = await self.get_html_with_playwright(doi_url)
116
-
117
- if not html_content:
118
- return None
119
-
120
- pdf_patterns = [
121
- r'(https?://[^\s<>"]+?\.pdf)',
122
- r'(https?://[^\s<>"]+?download/[^\s<>"]+)',
123
- r'(https?://[^\s<>"]+?\/pdf\/[^\s<>"]+)',
124
- ]
125
-
126
- pdf_urls = []
127
- for pattern in pdf_patterns:
128
- pdf_urls.extend(re.findall(pattern, html_content))
129
-
130
- for pdf_url in pdf_urls:
131
- try:
132
- pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
133
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
134
- logger.debug(f"Found PDF from: {pdf_url}")
135
- return await pdf_response.read()
136
- except Exception as e:
137
- logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
138
-
139
- except Exception as e:
140
- logger.debug(f"Error trying to get the PDF from {doi}: {e}")
141
- return None
142
 
143
  async def download_paper_scihub_async(self, session, doi):
144
  """Improved method to download paper from Sci-Hub using async requests"""
@@ -259,10 +214,13 @@ class PaperDownloader:
259
  if link.get('content-type') == 'application/pdf':
260
  pdf_url = link.get('URL')
261
  if pdf_url:
262
- pdf_response = await session.get(pdf_url, headers=self.headers)
263
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
264
- logger.debug(f"Found PDF from: {pdf_url}")
265
- return await pdf_response.read()
 
 
 
266
 
267
  except Exception as e:
268
  logger.debug(f"Crossref error for {doi}: {e}")
@@ -279,12 +237,13 @@ class PaperDownloader:
279
  while retries < max_retries and not pdf_content:
280
  try:
281
  pdf_content = (
282
- await self.download_paper_direct_doi_async(session, doi) or
283
  await self.download_paper_scihub_async(session, doi) or
284
  await self.download_paper_libgen_async(session, doi) or
285
  await self.download_paper_google_scholar_async(session, doi) or
286
  await self.download_paper_crossref_async(session, doi)
287
- )
 
288
  if pdf_content:
289
  return pdf_content
290
  except Exception as e:
@@ -298,178 +257,14 @@ class PaperDownloader:
298
 
299
  return None
300
 
301
- def download_paper_scihub(self, doi):
302
- """Improved method to download paper from Sci-Hub"""
303
- if not doi:
304
- logger.warning("DOI not provided")
305
- return None
306
-
307
- for base_url in self.download_sources:
308
- try:
309
- scihub_url = f"{base_url}{self.clean_doi(doi)}"
310
-
311
- # Request with more tolerance
312
- response = requests.get(scihub_url,
313
- headers=self.headers,
314
- allow_redirects=True,
315
- timeout=15)
316
-
317
- # Search for multiple PDF URL patterns
318
- pdf_patterns = [
319
- r'(https?://[^\s<>"]+?\.pdf)',
320
- r'(https?://[^\s<>"]+?download/[^\s<>"]+)',
321
- r'(https?://[^\s<>"]+?\/pdf\/[^\s<>"]+)',
322
- ]
323
-
324
- pdf_urls = []
325
- for pattern in pdf_patterns:
326
- pdf_urls.extend(re.findall(pattern, response.text))
327
-
328
- # Try downloading from found URLs
329
- for pdf_url in pdf_urls:
330
- try:
331
- pdf_response = requests.get(pdf_url,
332
- headers=self.headers,
333
- timeout=10)
334
-
335
- # Verify if it's a PDF
336
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
337
- logger.debug(f"Found PDF from: {pdf_url}")
338
- return pdf_response.content
339
- except Exception as e:
340
- logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
341
-
342
- except Exception as e:
343
- logger.debug(f"Error trying to download {doi} from {base_url}: {e}")
344
-
345
- return None
346
-
347
- def download_paper_libgen(self, doi):
348
- """Download from Libgen, handles the query and the redirection"""
349
- if not doi:
350
- return None
351
-
352
- base_url = 'https://libgen.rs/scimag/'
353
- try:
354
- search_url = f"{base_url}?q={self.clean_doi(doi)}"
355
- response = requests.get(search_url, headers=self.headers, allow_redirects=True, timeout=10)
356
- response.raise_for_status()
357
-
358
- if "No results" in response.text:
359
- logger.debug(f"No results for DOI: {doi} on libgen")
360
- return None
361
-
362
- soup = BeautifulSoup(response.text, 'html.parser')
363
-
364
- # Find the link using a specific selector
365
- links = soup.select('table.c > tbody > tr:nth-child(2) > td:nth-child(1) > a')
366
-
367
- if links:
368
- link = links[0]
369
- pdf_url = link['href']
370
- pdf_response = requests.get(pdf_url, headers=self.headers, allow_redirects=True, timeout=10)
371
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
372
- logger.debug(f"Found PDF from: {pdf_url}")
373
- return pdf_response.content
374
-
375
- except Exception as e:
376
- logger.debug(f"Error trying to download {doi} from libgen: {e}")
377
- return None
378
-
379
- def download_paper_google_scholar(self, doi):
380
- """Search google scholar to find an article with the given doi, try to get the pdf"""
381
- if not doi:
382
- return None
383
-
384
- try:
385
- query = f'doi:"{doi}"'
386
- params = {'q': query}
387
- url = f'https://scholar.google.com/scholar?{urlencode(params)}'
388
-
389
- response = requests.get(url, headers=self.headers, timeout=10)
390
- response.raise_for_status()
391
-
392
- soup = BeautifulSoup(response.text, 'html.parser')
393
-
394
- # Find any links with [PDF]
395
- links = soup.find_all('a', string=re.compile(r'\[PDF\]', re.IGNORECASE))
396
-
397
- if links:
398
- pdf_url = links[0]['href']
399
- pdf_response = requests.get(pdf_url, headers=self.headers, timeout=10)
400
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
401
- logger.debug(f"Found PDF from: {pdf_url}")
402
- return pdf_response.content
403
- except Exception as e:
404
- logger.debug(f"Google Scholar error for {doi}: {e}")
405
-
406
- return None
407
-
408
- def download_paper_crossref(self, doi):
409
- """Alternative search method using Crossref"""
410
- if not doi:
411
- return None
412
-
413
- try:
414
- # Search for open access link
415
- url = f"https://api.crossref.org/works/{doi}"
416
- response = requests.get(url, headers=self.headers, timeout=10)
417
-
418
- if response.status_code == 200:
419
- data = response.json()
420
- work = data.get('message', {})
421
-
422
- # Search for open access links
423
- links = work.get('link', [])
424
- for link in links:
425
- if link.get('content-type') == 'application/pdf':
426
- pdf_url = link.get('URL')
427
- if pdf_url:
428
- pdf_response = requests.get(pdf_url, headers=self.headers)
429
- if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
430
- logger.debug(f"Found PDF from: {pdf_url}")
431
- return pdf_response.content
432
-
433
- except Exception as e:
434
- logger.debug(f"Crossref error for {doi}: {e}")
435
-
436
- return None
437
-
438
- def download_with_retry(self, doi, max_retries=3, initial_delay=2):
439
- """Downloads a paper using multiple strategies with exponential backoff"""
440
- pdf_content = None
441
- retries = 0
442
- delay = initial_delay
443
-
444
- while retries < max_retries and not pdf_content:
445
- try:
446
- pdf_content = (
447
- self.download_paper_scihub(doi) or
448
- self.download_paper_libgen(doi) or
449
- self.download_paper_google_scholar(doi) or
450
- self.download_paper_crossref(doi)
451
- )
452
-
453
- if pdf_content:
454
- return pdf_content
455
- except Exception as e:
456
- logger.error(f"Error in download attempt {retries + 1} for DOI {doi}: {e}")
457
-
458
- if not pdf_content:
459
- retries += 1
460
- logger.warning(f"Retry attempt {retries} for DOI: {doi} after {delay} seconds")
461
- time.sleep(delay)
462
- delay *= 2 # Exponential backoff
463
-
464
- return None
465
-
466
- def download_single_doi(self, doi):
467
  """Downloads a single paper using a DOI"""
468
  if not doi:
469
  return None, "Error: DOI not provided", "Error: DOI not provided"
470
 
471
  try:
472
- pdf_content = self.download_with_retry(doi)
473
 
474
  if pdf_content:
475
  if doi is None:
@@ -488,7 +283,8 @@ class PaperDownloader:
488
  logger.error(f"Error processing {doi}: {e}")
489
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
490
 
491
- def download_multiple_dois(self, dois_text):
 
492
  """Downloads multiple papers from a list of DOIs"""
493
  if not dois_text:
494
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
@@ -501,9 +297,9 @@ class PaperDownloader:
501
  failed_dois = []
502
  downloaded_links = []
503
  for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
504
- filepath, success_message, fail_message = self.download_single_doi(doi)
505
  if filepath:
506
- # Unique filename for zip
507
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
508
  filepath_unique = os.path.join(self.output_dir, filename)
509
  os.rename(filepath, filepath_unique)
@@ -522,67 +318,6 @@ class PaperDownloader:
522
 
523
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
524
 
525
- def process_bibtex(self, bib_file):
526
- """Process BibTeX file and download papers with multiple strategies"""
527
- # Read BibTeX file content from the uploaded object
528
- try:
529
- with open(bib_file.name, 'r', encoding='utf-8') as f:
530
- bib_content = f.read()
531
- except Exception as e:
532
- logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
533
- return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None
534
-
535
- # Parse BibTeX data
536
- try:
537
- bib_database = bibtexparser.loads(bib_content)
538
- except Exception as e:
539
- logger.error(f"Error parsing BibTeX data: {e}")
540
- return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
541
-
542
- # Extract DOIs
543
- dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
544
- logger.info(f"Found {len(dois)} DOIs to download")
545
-
546
- # Result lists
547
- downloaded_files = []
548
- failed_dois = []
549
- downloaded_links = []
550
-
551
- # Download PDFs
552
- for doi in tqdm(dois, desc="Downloading papers"):
553
- try:
554
- # Try to download with multiple methods with retries
555
- pdf_content = self.download_with_retry(doi)
556
-
557
- # Save PDF
558
- if pdf_content:
559
- if doi is None:
560
- return None, "Error: DOI not provided", "Error: DOI not provided", None
561
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
562
- filepath = os.path.join(self.output_dir, filename)
563
-
564
- with open(filepath, 'wb') as f:
565
- f.write(pdf_content)
566
-
567
- downloaded_files.append(filepath)
568
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
569
- logger.info(f"Successfully downloaded: {filename}")
570
- else:
571
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
572
-
573
- except Exception as e:
574
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
575
- logger.error(f"Error processing {doi}: {e}")
576
-
577
- # Create ZIP of downloaded papers
578
- if downloaded_files:
579
- zip_filename = 'papers.zip'
580
- with zipfile.ZipFile(zip_filename, 'w') as zipf:
581
- for file_path in downloaded_files:
582
- zipf.write(file_path, arcname=os.path.basename(file_path))
583
- logger.info(f"ZIP file created: {zip_filename}")
584
-
585
- return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
586
 
587
  async def process_bibtex_async(self, bib_file):
588
  """Process BibTeX file and download papers with multiple strategies"""
@@ -611,7 +346,7 @@ class PaperDownloader:
611
  downloaded_links = []
612
 
613
  # Download PDFs
614
- for doi in tqdm(dois, desc="Downloading papers"):
615
  try:
616
  # Try to download with multiple methods with retries
617
  pdf_content = await self.download_with_retry_async(doi)
@@ -620,7 +355,7 @@ class PaperDownloader:
620
  if pdf_content:
621
  if doi is None:
622
  return None, "Error: DOI not provided", "Error: DOI not provided", None
623
- filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
624
  filepath = os.path.join(self.output_dir, filename)
625
 
626
  with open(filepath, 'wb') as f:
@@ -643,11 +378,10 @@ class PaperDownloader:
643
  for file_path in downloaded_files:
644
  zipf.write(file_path, arcname=os.path.basename(file_path))
645
  logger.info(f"ZIP file created: {zip_filename}")
646
-
647
- await self.close_playwright()
648
 
649
  return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
650
 
 
651
  def create_gradio_interface():
652
  """Create Gradio interface for Paper Downloader"""
653
  downloader = PaperDownloader()
@@ -658,13 +392,13 @@ def create_gradio_interface():
658
  if not bib_file.name.lower().endswith('.bib'):
659
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
660
 
661
- zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
662
  return zip_path, downloaded_dois, failed_dois, None
663
  elif doi_input:
664
- filepath, message, failed_doi = downloader.download_single_doi(doi_input)
665
  return None, message, failed_doi, filepath
666
  elif dois_input:
667
- zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
668
  return zip_path, downloaded_dois, failed_dois, None
669
  else:
670
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
 
12
  import io
13
  import asyncio
14
  import aiohttp
 
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO,
 
41
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
42
  'Accept-Language': 'en-US,en;q=0.9',
43
  }
44
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def clean_doi(self, doi):
46
  """Clean and encode DOI for URL"""
47
  if not isinstance(doi, str):
 
57
  except Exception as e:
58
  logger.debug(f"Error fetching {url}: {e}")
59
  return None, None
60
+
61
 
62
  async def download_paper_direct_doi_async(self, session, doi):
63
+ """Attempt to download the pdf from the landing page of the doi"""
64
+ if not doi:
65
  return None
66
+
67
+ try:
68
+ doi_url = f"https://doi.org/{self.clean_doi(doi)}"
69
+ text, headers = await self.fetch_with_headers(session, doi_url, timeout=15)
70
+ if not text:
71
+ return None
72
+
73
+ pdf_patterns = [
74
+ r'(https?://[^\s<>"]+?\.pdf)',
75
+ r'(https?://[^\s<>"]+?download/[^\s<>"]+)',
76
+ r'(https?://[^\s<>"]+?\/pdf\/[^\s<>"]+)',
77
+ ]
78
+
79
+ pdf_urls = []
80
+ for pattern in pdf_patterns:
81
+ pdf_urls.extend(re.findall(pattern, text))
82
+
83
+ for pdf_url in pdf_urls:
84
+ try:
85
+ pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
86
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
87
+ logger.debug(f"Found PDF from: {pdf_url}")
88
+ return await pdf_response.read()
89
+ except Exception as e:
90
+ logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
91
+
92
+
93
+ except Exception as e:
94
+ logger.debug(f"Error trying to get the PDF from {doi}: {e}")
95
+
96
+ return None
97
 
98
  async def download_paper_scihub_async(self, session, doi):
99
  """Improved method to download paper from Sci-Hub using async requests"""
 
214
  if link.get('content-type') == 'application/pdf':
215
  pdf_url = link.get('URL')
216
  if pdf_url:
217
+ try:
218
+ pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
219
+ if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
220
+ logger.debug(f"Found PDF from: {pdf_url}")
221
+ return await pdf_response.read()
222
+ except Exception as e:
223
+ logger.debug(f"Error fetching from {pdf_url}")
224
 
225
  except Exception as e:
226
  logger.debug(f"Crossref error for {doi}: {e}")
 
237
  while retries < max_retries and not pdf_content:
238
  try:
239
  pdf_content = (
240
+ await self.download_paper_direct_doi_async(session, doi) or
241
  await self.download_paper_scihub_async(session, doi) or
242
  await self.download_paper_libgen_async(session, doi) or
243
  await self.download_paper_google_scholar_async(session, doi) or
244
  await self.download_paper_crossref_async(session, doi)
245
+
246
+ )
247
  if pdf_content:
248
  return pdf_content
249
  except Exception as e:
 
257
 
258
  return None
259
 
260
+
261
+ async def download_single_doi_async(self, doi):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  """Downloads a single paper using a DOI"""
263
  if not doi:
264
  return None, "Error: DOI not provided", "Error: DOI not provided"
265
 
266
  try:
267
+ pdf_content = await self.download_with_retry_async(doi)
268
 
269
  if pdf_content:
270
  if doi is None:
 
283
  logger.error(f"Error processing {doi}: {e}")
284
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
285
 
286
+
287
+ async def download_multiple_dois_async(self, dois_text):
288
  """Downloads multiple papers from a list of DOIs"""
289
  if not dois_text:
290
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
 
297
  failed_dois = []
298
  downloaded_links = []
299
  for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
300
+ filepath, success_message, fail_message = await self.download_single_doi_async(doi)
301
  if filepath:
302
+ # Unique filename for zip
303
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
304
  filepath_unique = os.path.join(self.output_dir, filename)
305
  os.rename(filepath, filepath_unique)
 
318
 
319
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  async def process_bibtex_async(self, bib_file):
323
  """Process BibTeX file and download papers with multiple strategies"""
 
346
  downloaded_links = []
347
 
348
  # Download PDFs
349
+ for i,doi in enumerate(tqdm(dois, desc="Downloading papers")):
350
  try:
351
  # Try to download with multiple methods with retries
352
  pdf_content = await self.download_with_retry_async(doi)
 
355
  if pdf_content:
356
  if doi is None:
357
  return None, "Error: DOI not provided", "Error: DOI not provided", None
358
+ filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
359
  filepath = os.path.join(self.output_dir, filename)
360
 
361
  with open(filepath, 'wb') as f:
 
378
  for file_path in downloaded_files:
379
  zipf.write(file_path, arcname=os.path.basename(file_path))
380
  logger.info(f"ZIP file created: {zip_filename}")
 
 
381
 
382
  return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
383
 
384
+
385
  def create_gradio_interface():
386
  """Create Gradio interface for Paper Downloader"""
387
  downloader = PaperDownloader()
 
392
  if not bib_file.name.lower().endswith('.bib'):
393
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
394
 
395
+ zip_path, downloaded_dois, failed_dois, = await downloader.process_bibtex_async(bib_file)
396
  return zip_path, downloaded_dois, failed_dois, None
397
  elif doi_input:
398
+ filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input)
399
  return None, message, failed_doi, filepath
400
  elif dois_input:
401
+ zip_path, downloaded_dois, failed_dois = await downloader.download_multiple_dois_async(dois_input)
402
  return zip_path, downloaded_dois, failed_dois, None
403
  else:
404
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None