Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -70,7 +70,8 @@ class PaperDownloader:
|
|
70 |
try:
|
71 |
while retry_count <= max_retries:
|
72 |
try:
|
73 |
-
|
|
|
74 |
if response.status in [301, 302, 307, 308]:
|
75 |
current_url = response.headers['Location']
|
76 |
redirect_count += 1
|
@@ -80,6 +81,7 @@ class PaperDownloader:
|
|
80 |
response.raise_for_status()
|
81 |
|
82 |
if 'application/pdf' in response.headers.get('Content-Type', ''):
|
|
|
83 |
return await response.read()
|
84 |
else:
|
85 |
logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
|
@@ -293,12 +295,13 @@ class PaperDownloader:
|
|
293 |
# Probar cada estrategia de descarga
|
294 |
for strategy in download_strategies:
|
295 |
try:
|
|
|
296 |
pdf_content = await strategy(session, doi)
|
297 |
if pdf_content:
|
298 |
logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
|
299 |
return pdf_content
|
300 |
except Exception as e:
|
301 |
-
logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}")
|
302 |
|
303 |
# Si ninguna estrategia funcion贸, esperar un poco antes de reintentar
|
304 |
await asyncio.sleep(1) # Peque帽a pausa entre reintentos
|
@@ -311,31 +314,32 @@ class PaperDownloader:
|
|
311 |
"""Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
|
312 |
if not doi:
|
313 |
return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
|
314 |
-
|
315 |
try:
|
316 |
pdf_content = await self.download_with_retry_async(doi)
|
317 |
-
|
318 |
if pdf_content:
|
|
|
319 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
320 |
filepath = os.path.join(self.output_dir, filename)
|
321 |
|
322 |
# Escribir contenido del PDF
|
323 |
loop = asyncio.get_running_loop()
|
324 |
await loop.run_in_executor(
|
325 |
-
self.executor,
|
326 |
lambda: open(filepath, 'wb').write(pdf_content)
|
327 |
)
|
328 |
-
|
|
|
329 |
logger.info(f"Descarga exitosa: {filename}")
|
330 |
progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
|
331 |
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
|
332 |
else:
|
333 |
-
logger.warning(f"No se pudo descargar: {doi}")
|
334 |
progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
|
335 |
return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
|
336 |
|
337 |
except Exception as e:
|
338 |
-
logger.error(f"Error
|
339 |
progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
|
340 |
return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
|
341 |
|
|
|
70 |
try:
|
71 |
while retry_count <= max_retries:
|
72 |
try:
|
73 |
+
logger.debug(f"Fetching PDF from {current_url} - Retry {retry_count + 1}")#ADDED
|
74 |
+
async with session.get(current_url, headers=self.headers, timeout=10, allow_redirects=False) as response:
|
75 |
if response.status in [301, 302, 307, 308]:
|
76 |
current_url = response.headers['Location']
|
77 |
redirect_count += 1
|
|
|
81 |
response.raise_for_status()
|
82 |
|
83 |
if 'application/pdf' in response.headers.get('Content-Type', ''):
|
84 |
+
logger.debug(f"Successfully fetched PDF from {current_url}")#ADDED
|
85 |
return await response.read()
|
86 |
else:
|
87 |
logger.debug(f"Content type not PDF for {current_url}: {response.headers.get('Content-Type', '')}")
|
|
|
295 |
# Probar cada estrategia de descarga
|
296 |
for strategy in download_strategies:
|
297 |
try:
|
298 |
+
logger.info(f"Trying strategy {strategy.__name__} for DOI {doi}") # ADDED
|
299 |
pdf_content = await strategy(session, doi)
|
300 |
if pdf_content:
|
301 |
logger.info(f"Descarga exitosa de {doi} usando {strategy.__name__}")
|
302 |
return pdf_content
|
303 |
except Exception as e:
|
304 |
+
logger.debug(f"Error en estrategia {strategy.__name__} para {doi}: {e}") #ADDED
|
305 |
|
306 |
# Si ninguna estrategia funcion贸, esperar un poco antes de reintentar
|
307 |
await asyncio.sleep(1) # Peque帽a pausa entre reintentos
|
|
|
314 |
"""Descargar un 煤nico DOI con retroalimentaci贸n de progreso"""
|
315 |
if not doi:
|
316 |
return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
|
317 |
+
logger.info(f"Starting download process for DOI: {doi}") # ADDED
|
318 |
try:
|
319 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
320 |
if pdf_content:
|
321 |
+
logger.info(f"Downloaded PDF for DOI: {doi}") # ADDED
|
322 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
323 |
filepath = os.path.join(self.output_dir, filename)
|
324 |
|
325 |
# Escribir contenido del PDF
|
326 |
loop = asyncio.get_running_loop()
|
327 |
await loop.run_in_executor(
|
328 |
+
self.executor,
|
329 |
lambda: open(filepath, 'wb').write(pdf_content)
|
330 |
)
|
331 |
+
logger.info(f"Saved PDF to file: {filepath}") # ADDED
|
332 |
+
|
333 |
logger.info(f"Descarga exitosa: {filename}")
|
334 |
progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
|
335 |
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
|
336 |
else:
|
337 |
+
logger.warning(f"No se pudo descargar: {doi}") # ADDED
|
338 |
progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
|
339 |
return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
|
340 |
|
341 |
except Exception as e:
|
342 |
+
logger.error(f"Error processing {doi}: {e}") # ADDED
|
343 |
progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{doi}</a> {e}")
|
344 |
return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
|
345 |
|