C2MV commited on
Commit
20214ca
verified
1 Parent(s): ff70361

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -70,7 +70,8 @@ class PaperDownloader:
70
  try:
71
  while retry_count <= max_retries:
72
  try:
73
- async with session.get(current_url, headers=self.headers, timeout=10, allow_redirects=False) as response:
 
74
  if response.status in [301, 302, 307, 308]:
75
  current_url = response.headers['Location']
76
  redirect_count += 1
@@ -80,6 +81,7 @@ class PaperDownloader:
80
  response.raise_for_status()
81
 
82
  if 'application/pdf' in response.headers.get('Content-Type', ''):
 
83
  return await response.read()
84
  else:
85
  logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
@@ -293,12 +295,13 @@ class PaperDownloader:
293
  # Probar cada estrategia de descarga
294
  for strategy in download_strategies:
295
  try:
 
296
  pdf_content = await strategy(session, doi)
297
  if pdf_content:
298
  logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
299
  return pdf_content
300
  except Exception as e:
301
- logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}")
302
 
303
  # Si ninguna estrategia funcion贸, esperar un poco antes de reintentar
304
  await asyncio.sleep(1) # Peque帽a pausa entre reintentos
@@ -311,31 +314,32 @@ class PaperDownloader:
311
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
312
  if not doi:
313
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
314
-
315
  try:
316
  pdf_content = await self.download_with_retry_async(doi)
317
-
318
  if pdf_content:
 
319
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
320
  filepath = os.path.join(self.output_dir, filename)
321
 
322
  # Escribir contenido del PDF
323
  loop = asyncio.get_running_loop()
324
  await loop.run_in_executor(
325
- self.executor,
326
  lambda: open(filepath, 'wb').write(pdf_content)
327
  )
328
-
 
329
  logger.info(f"Descarga exitosa: {filename}")
330
  progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
331
  return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
332
  else:
333
- logger.warning(f"No se pudo descargar: {doi}")
334
  progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
335
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
336
 
337
  except Exception as e:
338
- logger.error(f"Error procesando {doi}: {e}")
339
  progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
340
  return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
341
 
 
70
  try:
71
  while retry_count <= max_retries:
72
  try:
73
+ logger.debug(f"Fetching PDF from {current_url} - Retry {retry_count + 1}")#ADDED
74
+ async with session.get(current_url, headers=self.headers, timeout=10, allow_redirects=False) as response:
75
  if response.status in [301, 302, 307, 308]:
76
  current_url = response.headers['Location']
77
  redirect_count += 1
 
81
  response.raise_for_status()
82
 
83
  if 'application/pdf' in response.headers.get('Content-Type', ''):
84
+ logger.debug(f"Successfully fetched PDF from {current_url}")#ADDED
85
  return await response.read()
86
  else:
87
  logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
 
295
  # Probar cada estrategia de descarga
296
  for strategy in download_strategies:
297
  try:
298
+ logger.info(f"Trying strategy {strategy.__name__} for DOI {doi}") # ADDED
299
  pdf_content = await strategy(session, doi)
300
  if pdf_content:
301
  logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
302
  return pdf_content
303
  except Exception as e:
304
+ logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}") #ADDED
305
 
306
  # Si ninguna estrategia funcion贸, esperar un poco antes de reintentar
307
  await asyncio.sleep(1) # Peque帽a pausa entre reintentos
 
314
  """Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
315
  if not doi:
316
  return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
317
+ logger.info(f"Starting download process for DOI: {doi}") # ADDED
318
  try:
319
  pdf_content = await self.download_with_retry_async(doi)
 
320
  if pdf_content:
321
+ logger.info(f"Downloaded PDF for DOI: {doi}") # ADDED
322
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
323
  filepath = os.path.join(self.output_dir, filename)
324
 
325
  # Escribir contenido del PDF
326
  loop = asyncio.get_running_loop()
327
  await loop.run_in_executor(
328
+ self.executor,
329
  lambda: open(filepath, 'wb').write(pdf_content)
330
  )
331
+ logger.info(f"Saved PDF to file: {filepath}") # ADDED
332
+
333
  logger.info(f"Descarga exitosa: {filename}")
334
  progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
335
  return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
336
  else:
337
+ logger.warning(f"No se pudo descargar: {doi}") # ADDED
338
  progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
339
  return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
340
 
341
  except Exception as e:
342
+ logger.error(f"Error processing {doi}: {e}") # ADDED
343
  progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
344
  return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
345