FreeBibTec2

Sleeping

App Files Files Community

C2MV commited on Dec 14, 2024

Commit

bc356d0

verified ·

1 Parent(s): 434b119

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -130

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ class PaperDownloader:
         }
         self.executor = ThreadPoolExecutor(max_workers=4)
         self.download_task = None # Added attribute
     def clean_doi(self, doi):
         """Clean and encode DOI for URL"""
         if not isinstance(doi, str):
@@ -331,11 +331,11 @@ class PaperDownloader:
             # Si se agotan todos los reintentos
             return None
-    def _download_single_doi(self, doi, progress_callback, cancel_event): # removed async keyword
             """Descargar un único DOI con retroalimentación de progreso"""
             if not doi:
-                progress_callback(None, "Error: DOI no proporcionado", "Error: DOI no proporcionado","" , None)
-                return None
             logger.info(f"Starting download process for DOI: {doi}")
             try:
@@ -363,24 +363,23 @@ class PaperDownloader:
                         logger.warning(f"No se pudo descargar: {doi}")
                         return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
-                filepath, message, error =  asyncio.run(call_async()) #added the loop event here
-                progress_callback(filepath, message, error, None )  # call this once the callback is made
             except CancelledError:
                logger.info(f"Download Cancelled DOI: {doi}")
-               progress_callback(None, f"Download cancelled {doi}","Download Cancelled",  None ) #send proper types with null values
             except Exception as e:
                logger.error(f"Error processing {doi}: {e}")
-               progress_callback(None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}", None ) #send proper type of results
-    def download_multiple_dois(self, dois_text, progress_callback, cancel_event): #removed async here
             """Download multiple DOIs"""
              # Validar entrada
             if not dois_text:
-                progress_callback(None, "Error: No DOIs provided", "Error: No DOIs provided","" , None)
-                return None
             # Sanitizar y filtrar DOIs
             # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
@@ -388,60 +387,56 @@ class PaperDownloader:
             # Validar lista de DOIs
             if not dois:
-                progress_callback(None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", "" , None)
-                return None
             # Listas para rastrear resultados
             downloaded_files = []      # Rutas de archivos descargados
             failed_dois = []           # DOIs que no se pudieron descargar
             downloaded_links = []      # Links de DOIs descargados
             for doi in dois:
-                 filepath, message, error=  self._download_single_doi(doi, lambda a,b,c,d : progress_callback(a,b,c,d), cancel_event )
-                 if cancel_event.is_set():
-                       logger.info("Downloads cancelled on multiple dois download")
-                       progress_callback(None,  "Downloads cancelled","Downloads cancelled",  None) # early return on cancelled
-                       return None #break here when is cancelled
-                 result = self.results_dict.get(doi, (None,None,"", None)) # obtain from self.results
-                 if result is None: #when errors happen results are none
-                     continue;
-                    # Manejar diferentes tipos de resultados
-                 if isinstance(result, Exception):
-                        # Excepción inesperada
                         error_msg = f"Unexpected error: {str(result)}"
                         logger.error(f"Error downloading {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
-                 elif result[0] is None:
-                     # Descarga fallida (resultado de download_single_doi_async)
                         error_msg = result[1]
                         logger.warning(f"Failed to download {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
-                 else:
-                      # Descarga exitosa
-                     filepath = result[0]
-                      # Generar nombre de archivo único
-                     filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
-                     filepath_unique = os.path.join(self.output_dir, filename)
-                     try:
-                          # Renombrar archivo
-                         os.rename(filepath, filepath_unique)
-                         # Añadir a lista de archivos descargados
-                         downloaded_files.append(filepath_unique)
-                         downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
-                     except Exception as rename_error:
-                            logger.error(f"Error renaming file for {doi}: {rename_error}")
-                            failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
             # Crear archivo ZIP si hay archivos descargados
@@ -456,13 +451,11 @@ class PaperDownloader:
                     lambda: self.create_zip(zip_filename, downloaded_files)
                 )
                 logger.info(f"ZIP file created: {zip_filename}")
-            # Devolver resultados
-            progress_callback( zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois),"" ,  None) # send empty values on callback to not break it.
-            return
-    def process_bibtex(self, bib_file, progress_callback, cancel_event):# removed async here
             """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
             # Read BibTeX file content from the uploaded object
             try:
@@ -470,16 +463,14 @@ class PaperDownloader:
                     bib_content = f.read()
             except Exception as e:
                 logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
-                progress_callback(None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None )
-                return None
             # Parse BibTeX data
             try:
                 bib_database = bibtexparser.loads(bib_content)
             except Exception as e:
                 logger.error(f"Error parsing BibTeX data: {e}")
-                progress_callback(None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None)
-                return None
             # Extract DOIs
             dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
@@ -489,24 +480,22 @@ class PaperDownloader:
             downloaded_files = []
             failed_dois = []
             downloaded_links = []
             for doi in dois:
-                 filepath, message, error= self._download_single_doi(doi, lambda a,b,c,d: progress_callback(a,b,c,d), cancel_event ) # added lambda for params handling.
-                 if cancel_event.is_set():
-                       logger.info("Download Cancelled in bibtex mode")
-                       progress_callback(None, "Download Cancelled", "Download Cancelled", None)
-                       return None #cancel if requested
-                 result = self.results_dict.get(doi, (None,None,"",None)) # obtain from self.results
-                 if result is None:
-                    continue # skips for a None type results when callback fails
                  if isinstance(result, Exception):
                     # Excepción inesperada
-                    error_msg = f"Unexpected error: {str(result)}"
-                    logger.error(f"Error downloading {doi}: {error_msg}")
-                    failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                  elif result[0] is None:
                     # Descarga fallida (resultado de download_single_doi_async)
@@ -515,7 +504,7 @@ class PaperDownloader:
                     failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                  else:
-                    # Descarga exitosa
                     filepath = result[0]
                     # Unique filename for zip
@@ -524,16 +513,15 @@ class PaperDownloader:
                     os.rename(filepath, filepath_unique)
                     downloaded_files.append(filepath_unique)
                     downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
             if downloaded_files:
                 zip_filename = 'papers.zip'
                 loop = asyncio.get_running_loop()
                 loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
                 logger.info(f"ZIP file created: {zip_filename}")
-            progress_callback(zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),"")  # send "", None to conform output
-            return
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
@@ -551,66 +539,50 @@ def create_gradio_interface():
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
-    downloader.results_dict = {}  #shared results dict, since it runs on different threads
     def update_progress( message="", logs=""):
          return  gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
     def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
-            cancel_event = asyncio.Event() # Create cancellation event for every submission.
-            downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
-            def custom_progress_callback(filepath, message, fail_message, doi=None): #new callback to send to the execution function
-                    logger.info(f"Callback message: {message}") # log each callback msg
-                    #store data for single or multiple mode on download_papers execution.
-                    if doi:
-                          downloader.results_dict[doi] = (filepath, message,fail_message, "")
-                    return  update_progress(message, fail_message)  # send return values only with results
-            if bib_file:
-                 # Check file type
-                 if not bib_file.name.lower().endswith('.bib'):
-                    return  None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "",  None # must return all 5 results at each possibility
-                 downloader.download_task = downloader.executor.submit(
-                       downloader.process_bibtex,
-                         bib_file,
-                      lambda a,b,c, d: update_progress(a,f"{b}<br>{c}"),  #convert for ui output, the return data will contain the HTML
-                        cancel_event # Added cancelllation event.
                  )
-                 return  None, "","",  "", None #must be None ,str , str, str, None  ( five params)
-            elif doi_input:
-                 downloader.download_task = downloader.executor.submit( #changed async execution method
-                      downloader._download_single_doi,
-                       doi_input,
-                          lambda a,b,c,d: update_progress(a,f"{b}<br>{c}"),  #callback function, format output and send html info, removed lambda from executor calls
-                           cancel_event  # Add cancellation event.
-                   )
-                 return   None, "","", "", None  #must be None ,str , str, str, None  ( five params)
-            elif dois_input:
-                  downloader.download_task = downloader.executor.submit( #changed async execution method
-                        downloader.download_multiple_dois,
-                          dois_input,
-                         lambda a,b,c,d: update_progress(a,f"{b}<br>{c}"), #callback function, return simple values
-                        cancel_event #Add cancellation event.
                     )
-                  return   None, "","", "", None #must be None ,str , str, str, None  ( five params)
-            else:
-                  return  None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs",  "", None #must be None ,str , str, str, None  ( five params)
     with gr.Blocks(theme="Hev832/Applio", css="""
         .gradio-container {
@@ -670,14 +642,14 @@ def create_gradio_interface():
             submit_button.click(
                  download_papers,
-                 inputs=[bib_file, doi_input, dois_input],
                  outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
             )
     interface.title="🔬 Academic Paper Batch Downloader"
     interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
     return interface

         }
         self.executor = ThreadPoolExecutor(max_workers=4)
         self.download_task = None # Added attribute
+        self.results_dict = {}
     def clean_doi(self, doi):
         """Clean and encode DOI for URL"""
         if not isinstance(doi, str):
             # Si se agotan todos los reintentos
             return None
+    def _download_single_doi(self, doi, cancel_event): # removed async keyword
             """Descargar un único DOI con retroalimentación de progreso"""
             if not doi:
+                return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
             logger.info(f"Starting download process for DOI: {doi}")
             try:
                         logger.warning(f"No se pudo descargar: {doi}")
                         return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
+                filepath, message, error = asyncio.run(call_async()) #added the loop event here
+                return filepath, message, error
             except CancelledError:
                logger.info(f"Download Cancelled DOI: {doi}")
+               return None, f"Download cancelled {doi}","Download Cancelled"
             except Exception as e:
                logger.error(f"Error processing {doi}: {e}")
+               return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
+    def download_multiple_dois(self, dois_text, cancel_event): #removed async here
             """Download multiple DOIs"""
              # Validar entrada
             if not dois_text:
+                return None, "Error: No DOIs provided", "Error: No DOIs provided",""
             # Sanitizar y filtrar DOIs
             # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
             # Validar lista de DOIs
             if not dois:
+                return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
             # Listas para rastrear resultados
             downloaded_files = []      # Rutas de archivos descargados
             failed_dois = []           # DOIs que no se pudieron descargar
             downloaded_links = []      # Links de DOIs descargados
             for doi in dois:
+                    result = self._download_single_doi(doi,cancel_event)
+                    if cancel_event.is_set():
+                        logger.info("Downloads cancelled on multiple dois download")
+                        return  None,"Downloads cancelled","Downloads cancelled","" # early return on cancelled
+                    if result is None: #when errors happen results are none
+                         continue
+                   # Manejar diferentes tipos de resultados
+                    if isinstance(result, Exception):
+                         # Excepción inesperada
                         error_msg = f"Unexpected error: {str(result)}"
                         logger.error(f"Error downloading {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
+                    elif result[0] is None:
+                         # Descarga fallida (resultado de download_single_doi_async)
                         error_msg = result[1]
                         logger.warning(f"Failed to download {doi}: {error_msg}")
                         failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
+                    else:
+                         # Descarga exitosa
+                         filepath = result[0]
+                        # Generar nombre de archivo único
+                         filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
+                         filepath_unique = os.path.join(self.output_dir, filename)
+                         try:
+                              # Renombrar archivo
+                              os.rename(filepath, filepath_unique)
+                              # Añadir a lista de archivos descargados
+                              downloaded_files.append(filepath_unique)
+                              downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
+                         except Exception as rename_error:
+                              logger.error(f"Error renaming file for {doi}: {rename_error}")
+                              failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - Error saving file')
             # Crear archivo ZIP si hay archivos descargados
                     lambda: self.create_zip(zip_filename, downloaded_files)
                 )
                 logger.info(f"ZIP file created: {zip_filename}")
+            return  zip_filename if downloaded_files else None, "\n".join(downloaded_links),"\n".join(failed_dois), "" # returning only results here and not in lambda
+    def process_bibtex(self, bib_file, cancel_event):# removed async here
             """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
             # Read BibTeX file content from the uploaded object
             try:
                     bib_content = f.read()
             except Exception as e:
                 logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
+                return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}",""
             # Parse BibTeX data
             try:
                 bib_database = bibtexparser.loads(bib_content)
             except Exception as e:
                 logger.error(f"Error parsing BibTeX data: {e}")
+                return None,f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}",""
             # Extract DOIs
             dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
             downloaded_files = []
             failed_dois = []
             downloaded_links = []
             for doi in dois:
+                result = self._download_single_doi(doi, cancel_event) # removed lambda call from executor
+                if cancel_event.is_set():
+                     logger.info("Download Cancelled in bibtex mode")
+                     return  None, "Download Cancelled", "Download Cancelled" ,"" #cancel if requested
+                if result is None: #skips if it contains null values as a results.
+                      continue
                  if isinstance(result, Exception):
                     # Excepción inesperada
+                     error_msg = f"Unexpected error: {str(result)}"
+                     logger.error(f"Error downloading {doi}: {error_msg}")
+                     failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                  elif result[0] is None:
                     # Descarga fallida (resultado de download_single_doi_async)
                     failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a> - {error_msg}')
                  else:
+                     # Descarga exitosa
                     filepath = result[0]
                     # Unique filename for zip
                     os.rename(filepath, filepath_unique)
                     downloaded_files.append(filepath_unique)
                     downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
             if downloaded_files:
                 zip_filename = 'papers.zip'
                 loop = asyncio.get_running_loop()
                 loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
                 logger.info(f"ZIP file created: {zip_filename}")
+            return  zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois),"" #send all results
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
     def update_progress( message="", logs=""):
          return  gr.Textbox.update(value=f"{message}"),gr.Textbox.update(value=f"<pre>{logs}</pre>")
     def download_papers(bib_file, doi_input, dois_input, output_file, downloaded_dois_textbox,failed_dois_textbox,logs, single_file):
+        cancel_event = asyncio.Event() # Create cancellation event for every submission.
+        downloader.cancel_event = cancel_event # store the event so that it is available to stop the process
+        if bib_file:
+             # Check file type
+            if not bib_file.name.lower().endswith('.bib'):
+               return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", "",  None
+            downloader.download_task = downloader.executor.submit(
+                 downloader.process_bibtex,
+                bib_file,
+                cancel_event # Added cancelllation event.
                  )
+            zip_file, downloaded_dois, failed_dois, logs_text =  downloader.download_task.result() # gets results from method
+            return zip_file, downloaded_dois, failed_dois, logs_text, None #  we use this method because all outputs values were already done inside the callback
+        elif doi_input:
+            downloader.download_task = downloader.executor.submit( #changed async execution method
+                 downloader._download_single_doi,
+                  doi_input,
+                     cancel_event
+                 )
+            filepath, message, error=   downloader.download_task.result()
+            return None, message, error, "", filepath
+        elif dois_input:
+              downloader.download_task = downloader.executor.submit( #changed async execution method
+                    downloader.download_multiple_dois,
+                       dois_input,
+                     cancel_event
                     )
+              zip_file, downloaded_dois, failed_dois, logs_text=   downloader.download_task.result()
+              return zip_file, downloaded_dois, failed_dois, logs_text, None #we use the direct assignments with returns as before on multi.
+        else:
+           return  None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs",  "", None # all parameters must have return data, to prevent gradio to crash on incomplete data for block
     with gr.Blocks(theme="Hev832/Applio", css="""
         .gradio-container {
             submit_button.click(
                  download_papers,
+                inputs=[bib_file, doi_input, dois_input],
                  outputs=[output_file, downloaded_dois_textbox, failed_dois_textbox,logs, single_file ], # the new output should be a tuple and we output logs too for debugging.
             )
     interface.title="🔬 Academic Paper Batch Downloader"
     interface.description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment."
     return interface