FreeBibTec2

Sleeping

App Files Files Community

C2MV commited on Dec 14, 2024

Commit

20214ca

verified ·

1 Parent(s): ff70361

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -70,7 +70,8 @@ class PaperDownloader:
             try:
                 while retry_count <= max_retries:
                     try:
-                        async with session.get(current_url, headers=self.headers, timeout=10, allow_redirects=False) as response:
                             if response.status in [301, 302, 307, 308]:
                                 current_url = response.headers['Location']
                                 redirect_count += 1
@@ -80,6 +81,7 @@ class PaperDownloader:
                             response.raise_for_status()
                             if 'application/pdf' in response.headers.get('Content-Type', ''):
                                 return await response.read()
                             else:
                                 logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
@@ -293,12 +295,13 @@ class PaperDownloader:
                 # Probar cada estrategia de descarga
                 for strategy in download_strategies:
                     try:
                         pdf_content = await strategy(session, doi)
                         if pdf_content:
                             logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
                             return pdf_content
                     except Exception as e:
-                        logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}")
                 # Si ninguna estrategia funcionó, esperar un poco antes de reintentar
                 await asyncio.sleep(1)  # Pequeña pausa entre reintentos
@@ -311,31 +314,32 @@ class PaperDownloader:
         """Descargar un único DOI con retroalimentación de progreso"""
         if not doi:
             return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
         try:
             pdf_content = await self.download_with_retry_async(doi)
             if pdf_content:
                 filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
                 filepath = os.path.join(self.output_dir, filename)
                 # Escribir contenido del PDF
                 loop = asyncio.get_running_loop()
                 await loop.run_in_executor(
-                    self.executor,
                     lambda: open(filepath, 'wb').write(pdf_content)
                 )
                 logger.info(f"Descarga exitosa: {filename}")
                 progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
                 return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
             else:
-                logger.warning(f"No se pudo descargar: {doi}")
                 progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
                 return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
         except Exception as e:
-            logger.error(f"Error procesando {doi}: {e}")
             progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
             return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"

             try:
                 while retry_count <= max_retries:
                     try:
+                         logger.debug(f"Fetching PDF from {current_url} - Retry {retry_count + 1}")#ADDED
+                         async with session.get(current_url, headers=self.headers, timeout=10, allow_redirects=False) as response:
                             if response.status in [301, 302, 307, 308]:
                                 current_url = response.headers['Location']
                                 redirect_count += 1
                             response.raise_for_status()
                             if 'application/pdf' in response.headers.get('Content-Type', ''):
+                                logger.debug(f"Successfully fetched PDF from {current_url}")#ADDED
                                 return await response.read()
                             else:
                                 logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
                 # Probar cada estrategia de descarga
                 for strategy in download_strategies:
                     try:
+                        logger.info(f"Trying strategy {strategy.__name__} for DOI {doi}") # ADDED
                         pdf_content = await strategy(session, doi)
                         if pdf_content:
                             logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
                             return pdf_content
                     except Exception as e:
+                        logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}") #ADDED
                 # Si ninguna estrategia funcionó, esperar un poco antes de reintentar
                 await asyncio.sleep(1)  # Pequeña pausa entre reintentos
         """Descargar un único DOI con retroalimentación de progreso"""
         if not doi:
             return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
+        logger.info(f"Starting download process for DOI: {doi}") # ADDED
         try:
             pdf_content = await self.download_with_retry_async(doi)
             if pdf_content:
+                logger.info(f"Downloaded PDF for DOI: {doi}") # ADDED
                 filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
                 filepath = os.path.join(self.output_dir, filename)
                 # Escribir contenido del PDF
                 loop = asyncio.get_running_loop()
                 await loop.run_in_executor(
+                    self.executor,
                     lambda: open(filepath, 'wb').write(pdf_content)
                 )
+                logger.info(f"Saved PDF to file: {filepath}") # ADDED
                 logger.info(f"Descarga exitosa: {filename}")
                 progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
                 return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
             else:
+                logger.warning(f"No se pudo descargar: {doi}") # ADDED
                 progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
                 return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
         except Exception as e:
+            logger.error(f"Error processing {doi}: {e}") # ADDED
             progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
             return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"