FreeBibTec2

Sleeping

App Files Files Community

C2MV commited on Dec 14, 2024

Commit

f732808

verified ·

1 Parent(s): 565c23e

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -126

app.py CHANGED Viewed

@@ -307,57 +307,48 @@ class PaperDownloader:
                     delay *= 2  # Exponential backoff
         return None
     async def _download_single_doi(self, doi):
-            """Descargar un único DOI con retroalimentación de progreso"""
-            if not doi:
-                return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
-            logger.info(f"Starting download process for DOI: {doi}")
-            try:
-                pdf_content = await self.download_with_retry_async(doi)
-                if pdf_content:
                     logger.info(f"Downloaded PDF for DOI: {doi}")
                     filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
                     filepath = os.path.join(self.output_dir, filename)
-                    # Escribir contenido del PDF
                     with open(filepath, 'wb') as f:
                         f.write(pdf_content)
                     logger.info(f"Saved PDF to file: {filepath}")
                     logger.info(f"Descarga exitosa: {filename}")
                     return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
-                else:
-                     logger.warning(f"No se pudo descargar: {doi}")
-                     return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
-            except CancelledError:
-                logger.info(f"Download Cancelled DOI: {doi}")
-                return None, f"Download cancelled {doi}","Download Cancelled"
-            except Exception as e:
-                logger.error(f"Error processing {doi}: {e}")
-                return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
     async def download_multiple_dois(self, dois_text, cancel_event):
             """Download multiple DOIs"""
             if not dois_text:
                return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
             # Sanitizar y filtrar DOIs
             # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
             dois = list(set([doi.strip() for doi in dois_text.split('\n') if doi.strip()]))
             # Validar lista de DOIs
             if not dois:
-                return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
             # Listas para rastrear resultados
             downloaded_files = []      # Rutas de archivos descargados
@@ -365,46 +356,44 @@ class PaperDownloader:
             downloaded_links = []      # Links de DOIs descargados
             for i, doi in enumerate(dois):
-                    result = await self._download_single_doi(doi,cancel_event)
                     if cancel_event.is_set():
-                        logger.info("Downloads cancelled on multiple dois download")
-                        return None, "Downloads cancelled","Downloads cancelled", "" # early return on cancelled
                     if result is None:
-                       continue;
                     if isinstance(result, Exception):
                         # Excepción inesperada
-                         error_msg = f"Unexpected error: {str(result)}"
-                         logger.error(f"Error downloading {doi}: {error_msg}")
-                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                     elif result[0] is None:
-                        # Descarga fallida (resultado de download_single_doi_async)
                         error_msg = result[1]
                         logger.warning(f"Failed to download {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                     else:
-                         # Descarga exitosa
                         filepath = result[0]
                          # Generar nombre de archivo único
-                        filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"  # indent problem corrected
-                        filepath_unique = os.path.join(self.output_dir, filename) #Fixed identation.
                         try:
-                              # Renombrar archivo
-                             os.rename(filepath, filepath_unique)  #Fixed identation.
-                            # Añadir a lista de archivos descargados
-                             downloaded_files.append(filepath_unique) #Fixed identation.
-                             downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')#Fixed identation.
                          except Exception as rename_error:
-                               logger.error(f"Error renaming file for {doi}: {rename_error}")
-                               failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')#Fixed identation.
             # Crear archivo ZIP si hay archivos descargados
@@ -419,75 +408,78 @@ class PaperDownloader:
                     lambda: self.create_zip(zip_filename, downloaded_files)
                 )
                 logger.info(f"ZIP file created: {zip_filename}")
-            return  zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), ""
-    async def process_bibtex(self, bib_file, cancel_event):
-            """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
-            # Read BibTeX file content from the uploaded object
-            try:
-                with open(bib_file.name, 'r', encoding='utf-8') as f:
-                    bib_content = f.read()
-            except Exception as e:
-                logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
-                return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
-            # Parse BibTeX data
-            try:
-                bib_database = bibtexparser.loads(bib_content)
-            except Exception as e:
-                logger.error(f"Error parsing BibTeX data: {e}")
-                return None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}",""
-            # Extract DOIs
-            dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
-            logger.info(f"Found {len(dois)} DOIs to download")
-             # Result lists
-            downloaded_files = []
-            failed_dois = []
-            downloaded_links = []
-            for i,doi in enumerate(dois):
-                result = await self._download_single_doi(doi, cancel_event)
                 if cancel_event.is_set():
-                       logger.info("Download Cancelled in bibtex mode")
-                       return  None, "Download Cancelled", "Download Cancelled", ""#cancel if requested
                 if result is None:
-                    continue
                 if isinstance(result, Exception):
-                     # Excepción inesperada
                      error_msg = f"Unexpected error: {str(result)}"
                      logger.error(f"Error downloading {doi}: {error_msg}")
                      failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
-                 elif result[0] is None:
-                     # Descarga fallida (resultado de download_single_doi_async)
-                    error_msg = result[1]
-                    logger.warning(f"Failed to download {doi}: {error_msg}")
-                    failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
-                 else:
-                     # Descarga exitosa
-                     filepath = result[0]
                     # Unique filename for zip
-                     filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf" #indent fixed
-                     filepath_unique = os.path.join(self.output_dir, filename)
-                     os.rename(filepath, filepath_unique)
-                     downloaded_files.append(filepath_unique)
-                     downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
-            if downloaded_files:
-                zip_filename = 'papers.zip'
-                loop = asyncio.get_running_loop()
-                loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
-                logger.info(f"ZIP file created: {zip_filename}")
-            return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
         with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -504,30 +496,32 @@ def create_gradio_interface():
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
     def update_progress( message="", logs=""):
-       return  gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
     async def download_papers(bib_file, doi_input, dois_input):
            cancel_event = asyncio.Event() # Create cancellation event for every submission.
            downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
            if bib_file:
               # Check file type
-               if not bib_file.name.lower().endswith('.bib'):
-                     return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file",  "", None #Fixed with default value
-               zip_file, downloaded_dois, failed_dois, logs_text=  await downloader.process_bibtex(bib_file, cancel_event)
-               return zip_file, downloaded_dois, failed_dois, logs_text, None
            elif doi_input:
-                 filepath, message, error =  await downloader._download_single_doi(doi_input, cancel_event)
-                 return None, message, error,"", filepath# correct returns.
            elif dois_input:
-                  zip_file, downloaded_dois, failed_dois, logs_text=  await downloader.download_multiple_dois(dois_input, cancel_event)
-                  return  zip_file, downloaded_dois, failed_dois, logs_text, None
            else:
-                 return   None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", "", None
     with gr.Blocks(theme="Hev832/Applio", css="""
         .gradio-container {
@@ -569,10 +563,10 @@ def create_gradio_interface():
                         output_file = gr.File(label="Download Papers (ZIP) or Single PDF")
-                        downloaded_dois_textbox =  gr.Textbox(label="""
                            Found DOIs
                          """,)
-                        failed_dois_textbox=gr.Textbox(label="""
                           Missed DOIs
                          """,)
                         logs =  gr.Textbox(label="""
@@ -590,14 +584,15 @@ def create_gradio_interface():
                 inputs=[bib_file, doi_input, dois_input],
                   outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ],  # the new output should be a tuple and we output logs too for debugging.
             )
     interface.title="🔬 Academic Paper Batch Downloader"
     interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
     return interface
 def main():
     interface = create_gradio_interface()
-    interface.launch()
 if __name__ == "__main__":
     main()

                     delay *= 2  # Exponential backoff
         return None
     async def _download_single_doi(self, doi):
+        """Descargar un único DOI con retroalimentación de progreso"""
+        if not doi:
+            return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
+        logger.info(f"Starting download process for DOI: {doi}")
+        try:
+            pdf_content = await self.download_with_retry_async(doi)
+            if pdf_content:
                     logger.info(f"Downloaded PDF for DOI: {doi}")
                     filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
                     filepath = os.path.join(self.output_dir, filename)
                     with open(filepath, 'wb') as f:
                         f.write(pdf_content)
                     logger.info(f"Saved PDF to file: {filepath}")
                     logger.info(f"Descarga exitosa: {filename}")
                     return filepath, f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>", ""
+            else:
+                    logger.warning(f"No se pudo descargar: {doi}")
+                    return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
+        except CancelledError:
+               logger.info(f"Download Cancelled DOI: {doi}")
+               return None, f"Download cancelled {doi}","Download Cancelled"
+        except Exception as e:
+            logger.error(f"Error processing {doi}: {e}")
+            return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
     async def download_multiple_dois(self, dois_text, cancel_event):
             """Download multiple DOIs"""
             if not dois_text:
                return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
             # Sanitizar y filtrar DOIs
             # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
             dois = list(set([doi.strip() for doi in dois_text.split('\n') if doi.strip()]))
             # Validar lista de DOIs
             if not dois:
+               return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
             # Listas para rastrear resultados
             downloaded_files = []      # Rutas de archivos descargados
             downloaded_links = []      # Links de DOIs descargados
             for i, doi in enumerate(dois):
+                    result = await self._download_single_doi(doi)
                     if cancel_event.is_set():
+                       logger.info("Downloads cancelled on multiple dois download")
+                       return  None,"Downloads cancelled","Downloads cancelled", ""
                     if result is None:
+                        continue
                     if isinstance(result, Exception):
                         # Excepción inesperada
+                        error_msg = f"Unexpected error: {str(result)}"
+                        logger.error(f"Error downloading {doi}: {error_msg}")
+                        failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                     elif result[0] is None:
+                       # Descarga fallida (resultado de download_single_doi_async)
                         error_msg = result[1]
                         logger.warning(f"Failed to download {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                     else:
+                       # Descarga exitosa
                         filepath = result[0]
                          # Generar nombre de archivo único
+                        filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf" # indent fix.
+                        filepath_unique = os.path.join(self.output_dir, filename)
                         try:
+                             # Renombrar archivo
+                             os.rename(filepath, filepath_unique) #Fixed ident
+                             # Añadir a lista de archivos descargados
+                             downloaded_files.append(filepath_unique) #Fixed ident
+                             downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')#Fixed ident
                          except Exception as rename_error:
+                                logger.error(f"Error renaming file for {doi}: {rename_error}")
+                                failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')#Fixed ident
             # Crear archivo ZIP si hay archivos descargados
                     lambda: self.create_zip(zip_filename, downloaded_files)
                 )
                 logger.info(f"ZIP file created: {zip_filename}")
+            return zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois),""
+    async def process_bibtex(self, bib_file, cancel_event):
+        """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
+        # Read BibTeX file content from the uploaded object
+        try:
+            with open(bib_file.name, 'r', encoding='utf-8') as f:
+                bib_content = f.read()
+        except Exception as e:
+            logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
+            return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
+        # Parse BibTeX data
+        try:
+            bib_database = bibtexparser.loads(bib_content)
+        except Exception as e:
+            logger.error(f"Error parsing BibTeX data: {e}")
+            return None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}",""
+        # Extract DOIs
+        dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
+        logger.info(f"Found {len(dois)} DOIs to download")
+         # Result lists
+        downloaded_files = []
+        failed_dois = []
+        downloaded_links = []
+        for i, doi in enumerate(dois):
+                result = await self._download_single_doi(doi, cancel_event) # now its async directly here
                 if cancel_event.is_set():
+                    logger.info("Download Cancelled in bibtex mode")
+                    return None, "Download Cancelled", "Download Cancelled", ""
                 if result is None:
+                   continue
                 if isinstance(result, Exception):
+                    # Excepción inesperada
                      error_msg = f"Unexpected error: {str(result)}"
                      logger.error(f"Error downloading {doi}: {error_msg}")
                      failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
+                elif result[0] is None:
+                      # Descarga fallida (resultado de download_single_doi_async)
+                      error_msg = result[1]
+                      logger.warning(f"Failed to download {doi}: {error_msg}")
+                      failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
+                else:
+                    # Descarga exitosa
+                    filepath = result[0]
                     # Unique filename for zip
+                    filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
+                    filepath_unique = os.path.join(self.output_dir, filename)
+                    os.rename(filepath, filepath_unique)
+                    downloaded_files.append(filepath_unique)
+                    downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
+        if downloaded_files:
+            zip_filename = 'papers.zip'
+            loop = asyncio.get_running_loop()
+            loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
+            logger.info(f"ZIP file created: {zip_filename}")
+        return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),""
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
         with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
     def update_progress( message="", logs=""):
+      return  gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
     async def download_papers(bib_file, doi_input, dois_input):
            cancel_event = asyncio.Event() # Create cancellation event for every submission.
            downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
            if bib_file:
               # Check file type
+              if not bib_file.name.lower().endswith('.bib'):
+                return  None, "Error: Please upload a .bib file", "Error: Please upload a .bib file",  "", None
+              zip_file, downloaded_dois, failed_dois, logs_text= await downloader.process_bibtex(bib_file, cancel_event)
+              return zip_file, downloaded_dois, failed_dois, logs_text, None #all outputs at return.
            elif doi_input:
+              filepath, message, error = await downloader._download_single_doi(doi_input,cancel_event)
+              return None, message, error,  "", filepath
            elif dois_input:
+             zip_file, downloaded_dois, failed_dois, logs_text= await downloader.download_multiple_dois(dois_input, cancel_event)
+             return  zip_file, downloaded_dois, failed_dois, logs_text, None
            else:
+                 return  None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs","",  None #all output data returned
     with gr.Blocks(theme="Hev832/Applio", css="""
         .gradio-container {
                         output_file = gr.File(label="Download Papers (ZIP) or Single PDF")
+                        downloaded_dois_textbox =  gr.HTML(label="""
                            Found DOIs
                          """,)
+                        failed_dois_textbox=gr.HTML(label="""
                           Missed DOIs
                          """,)
                         logs =  gr.Textbox(label="""
                 inputs=[bib_file, doi_input, dois_input],
                   outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ],  # the new output should be a tuple and we output logs too for debugging.
             )
     interface.title="🔬 Academic Paper Batch Downloader"
     interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
     return interface
 def main():
     interface = create_gradio_interface()
+    interface.launch(share=True)
 if __name__ == "__main__":
     main()