Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -283,7 +283,6 @@ class PaperDownloader:
|
|
283 |
logger.error(f"Error processing {doi}: {e}")
|
284 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
285 |
|
286 |
-
|
287 |
async def download_multiple_dois_async(self, dois_text):
|
288 |
"""Downloads multiple papers from a list of DOIs"""
|
289 |
if not dois_text:
|
@@ -299,13 +298,12 @@ class PaperDownloader:
|
|
299 |
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
300 |
filepath, success_message, fail_message = await self.download_single_doi_async(doi)
|
301 |
if filepath:
|
302 |
-
|
303 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
304 |
filepath_unique = os.path.join(self.output_dir, filename)
|
305 |
os.rename(filepath, filepath_unique)
|
306 |
downloaded_files.append(filepath_unique)
|
307 |
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
308 |
-
|
309 |
else:
|
310 |
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
311 |
|
@@ -318,7 +316,6 @@ class PaperDownloader:
|
|
318 |
|
319 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
320 |
|
321 |
-
|
322 |
async def process_bibtex_async(self, bib_file):
|
323 |
"""Process BibTeX file and download papers with multiple strategies"""
|
324 |
# Read BibTeX file content from the uploaded object
|
@@ -327,14 +324,14 @@ class PaperDownloader:
|
|
327 |
bib_content = f.read()
|
328 |
except Exception as e:
|
329 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
330 |
-
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
|
331 |
|
332 |
# Parse BibTeX data
|
333 |
try:
|
334 |
bib_database = bibtexparser.loads(bib_content)
|
335 |
except Exception as e:
|
336 |
logger.error(f"Error parsing BibTeX data: {e}")
|
337 |
-
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
|
338 |
|
339 |
# Extract DOIs
|
340 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
@@ -346,7 +343,7 @@ class PaperDownloader:
|
|
346 |
downloaded_links = []
|
347 |
|
348 |
# Download PDFs
|
349 |
-
for i,doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
350 |
try:
|
351 |
# Try to download with multiple methods with retries
|
352 |
pdf_content = await self.download_with_retry_async(doi)
|
@@ -354,7 +351,7 @@ class PaperDownloader:
|
|
354 |
# Save PDF
|
355 |
if pdf_content:
|
356 |
if doi is None:
|
357 |
-
return None, "Error: DOI not provided", "Error: DOI not provided"
|
358 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
359 |
filepath = os.path.join(self.output_dir, filename)
|
360 |
|
@@ -379,8 +376,7 @@ class PaperDownloader:
|
|
379 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
380 |
logger.info(f"ZIP file created: {zip_filename}")
|
381 |
|
382 |
-
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois)
|
383 |
-
|
384 |
|
385 |
def create_gradio_interface():
|
386 |
"""Create Gradio interface for Paper Downloader"""
|
@@ -392,7 +388,7 @@ def create_gradio_interface():
|
|
392 |
if not bib_file.name.lower().endswith('.bib'):
|
393 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
394 |
|
395 |
-
zip_path, downloaded_dois, failed_dois
|
396 |
return zip_path, downloaded_dois, failed_dois, None
|
397 |
elif doi_input:
|
398 |
filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input)
|
@@ -403,6 +399,7 @@ def create_gradio_interface():
|
|
403 |
else:
|
404 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
405 |
|
|
|
406 |
# Gradio Interface
|
407 |
interface = gr.Interface(
|
408 |
fn=download_papers,
|
@@ -429,7 +426,7 @@ def create_gradio_interface():
|
|
429 |
<div id="failed-dois"></div>
|
430 |
</div>
|
431 |
"""),
|
432 |
-
|
433 |
],
|
434 |
title="🔬 Academic Paper Batch Downloader",
|
435 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
|
|
283 |
logger.error(f"Error processing {doi}: {e}")
|
284 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
285 |
|
|
|
286 |
async def download_multiple_dois_async(self, dois_text):
|
287 |
"""Downloads multiple papers from a list of DOIs"""
|
288 |
if not dois_text:
|
|
|
298 |
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
299 |
filepath, success_message, fail_message = await self.download_single_doi_async(doi)
|
300 |
if filepath:
|
301 |
+
# Unique filename for zip
|
302 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
303 |
filepath_unique = os.path.join(self.output_dir, filename)
|
304 |
os.rename(filepath, filepath_unique)
|
305 |
downloaded_files.append(filepath_unique)
|
306 |
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
307 |
else:
|
308 |
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
309 |
|
|
|
316 |
|
317 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
318 |
|
|
|
319 |
async def process_bibtex_async(self, bib_file):
|
320 |
"""Process BibTeX file and download papers with multiple strategies"""
|
321 |
# Read BibTeX file content from the uploaded object
|
|
|
324 |
bib_content = f.read()
|
325 |
except Exception as e:
|
326 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
327 |
+
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
|
328 |
|
329 |
# Parse BibTeX data
|
330 |
try:
|
331 |
bib_database = bibtexparser.loads(bib_content)
|
332 |
except Exception as e:
|
333 |
logger.error(f"Error parsing BibTeX data: {e}")
|
334 |
+
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
|
335 |
|
336 |
# Extract DOIs
|
337 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
|
|
343 |
downloaded_links = []
|
344 |
|
345 |
# Download PDFs
|
346 |
+
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
347 |
try:
|
348 |
# Try to download with multiple methods with retries
|
349 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
351 |
# Save PDF
|
352 |
if pdf_content:
|
353 |
if doi is None:
|
354 |
+
return None, "Error: DOI not provided", "Error: DOI not provided"
|
355 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
356 |
filepath = os.path.join(self.output_dir, filename)
|
357 |
|
|
|
376 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
377 |
logger.info(f"ZIP file created: {zip_filename}")
|
378 |
|
379 |
+
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois)
|
|
|
380 |
|
381 |
def create_gradio_interface():
|
382 |
"""Create Gradio interface for Paper Downloader"""
|
|
|
388 |
if not bib_file.name.lower().endswith('.bib'):
|
389 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
390 |
|
391 |
+
zip_path, downloaded_dois, failed_dois = await downloader.process_bibtex_async(bib_file)
|
392 |
return zip_path, downloaded_dois, failed_dois, None
|
393 |
elif doi_input:
|
394 |
filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input)
|
|
|
399 |
else:
|
400 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
401 |
|
402 |
+
|
403 |
# Gradio Interface
|
404 |
interface = gr.Interface(
|
405 |
fn=download_papers,
|
|
|
426 |
<div id="failed-dois"></div>
|
427 |
</div>
|
428 |
"""),
|
429 |
+
gr.File(label="Downloaded Single PDF")
|
430 |
],
|
431 |
title="🔬 Academic Paper Batch Downloader",
|
432 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|