FreeBibTec2

Sleeping

App Files Files Community

C2MV commited on Dec 14, 2024

Commit

4d6b47a

verified ·

1 Parent(s): 8bb51fd

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -114

app.py CHANGED Viewed

@@ -311,42 +311,43 @@ class PaperDownloader:
             return None
     async def download_single_doi_async(self, doi, progress_callback):
-        """Descargar un único DOI con retroalimentación de progreso"""
-        if not doi:
-            return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado"
-        logger.info(f"Starting download process for DOI: {doi}") # ADDED
-        try:
-            pdf_content = await self.download_with_retry_async(doi)
-            if pdf_content:
-                logger.info(f"Downloaded PDF for DOI: {doi}") # ADDED
-                filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
-                filepath = os.path.join(self.output_dir, filename)
-                # Escribir contenido del PDF
-                loop = asyncio.get_running_loop()
-                await loop.run_in_executor(
-                    self.executor,
-                    lambda: open(filepath, 'wb').write(pdf_content)
-                )
-                logger.info(f"Saved PDF to file: {filepath}") # ADDED
-                logger.info(f"Descarga exitosa: {filename}")
-                progress_callback("test") # CHANGED for debug purposes
-                return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
-            else:
-                logger.warning(f"No se pudo descargar: {doi}") # ADDED
-                progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
-                return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
-        except Exception as e:
-            logger.error(f"Error processing {doi}: {e}") # ADDED
-            progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{e}")
-            return None, f"Error procesando {doi}: {e}", f"Error procesando {doi}: {e}"
     async def download_multiple_dois_async(self, dois_text, progress_callback):
         # Validar entrada
         if not dois_text:
-            return None, "Error: No DOIs provided", "Error: No DOIs provided"
         # Sanitizar y filtrar DOIs
         # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
@@ -354,7 +355,7 @@ class PaperDownloader:
         # Validar lista de DOIs
         if not dois:
-            return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
         # Listas para rastrear resultados
         downloaded_files = []      # Rutas de archivos descargados
@@ -424,56 +425,57 @@ class PaperDownloader:
         return (
             zip_filename if downloaded_files else None,  # Archivo ZIP o None
             "\n".join(downloaded_links),                # DOIs descargados
-            "\n".join(failed_dois)                      # DOIs fallidos
         )
     async def process_bibtex_async(self, bib_file, progress_callback):
-            """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
-            # Read BibTeX file content from the uploaded object
-            try:
-                with open(bib_file.name, 'r', encoding='utf-8') as f:
-                    bib_content = f.read()
-            except Exception as e:
-                logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
-                return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
-            # Parse BibTeX data
-            try:
-                bib_database = bibtexparser.loads(bib_content)
-            except Exception as e:
-                logger.error(f"Error parsing BibTeX data: {e}")
-                return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
-            # Extract DOIs
-            dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
-            logger.info(f"Found {len(dois)} DOIs to download")
-            # Result lists
-            downloaded_files = []
-            failed_dois = []
-            downloaded_links = []
-            tasks = [self.download_single_doi_async(doi, progress_callback) for doi in dois]
-            results = await asyncio.gather(*tasks)
-            for i, (filepath, success_message, fail_message) in enumerate(results):
-                    if filepath:
-                        # Unique filename for zip
-                        filename = f"{str(dois[i]).replace('/', '_').replace('.', '_')}_{i}.pdf"
-                        filepath_unique = os.path.join(self.output_dir, filename)
-                        os.rename(filepath, filepath_unique)
-                        downloaded_files.append(filepath_unique)
-                        downloaded_links.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
-                    else:
-                        failed_dois.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
-            if downloaded_files:
-                zip_filename = 'papers.zip'
-                loop = asyncio.get_running_loop()
-                await loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
-                logger.info(f"ZIP file created: {zip_filename}")
-            return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois)
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
@@ -481,30 +483,39 @@ class PaperDownloader:
             for file in files:
                 zf.write(file, os.path.basename(file))
 def create_gradio_interface():
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
-    async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
-            if bib_file:
-                # Check file type
-                if not bib_file.name.lower().endswith('.bib'):
-                    return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
-                zip_path, downloaded_dois, failed_dois = await downloader.process_bibtex_async(bib_file, progress.update)
-                return zip_path, downloaded_dois, failed_dois, None
-            elif doi_input:
-                filepath, message, failed_doi = await downloader.download_single_doi_async(doi_input,progress.update)
-                return None, message, failed_doi, filepath
-            elif dois_input:
-                 zip_path, downloaded_dois, failed_dois = await downloader.download_multiple_dois_async(dois_input, progress.update)
-                 return zip_path, downloaded_dois, failed_dois, None
-            else:
-                 return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
     # Gradio Interface
     interface = gr.Interface(
@@ -513,7 +524,6 @@ def create_gradio_interface():
             gr.File(file_types=['.bib'], label="Upload BibTeX File"),
             gr.Textbox(label="Enter Single DOI", placeholder="10.xxxx/xxxx"),
             gr.Textbox(label="Enter Multiple DOIs (one per line)", placeholder="10.xxxx/xxxx\n10.yyyy/yyyy\n...")
         ],
         outputs=[
             gr.File(label="Download Papers (ZIP) or Single PDF"),
@@ -533,7 +543,16 @@ def create_gradio_interface():
                     <div id="failed-dois"></div>
                 </div>
             """),
-           gr.File(label="Downloaded Single PDF")
         ],
         title="🔬 Academic Paper Batch Downloader",
         description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
@@ -559,30 +578,15 @@ def create_gradio_interface():
            .output-text a {
                color: #007bff; /* Blue color for hyperlinks */
             }
         """,
         cache_examples=False,
     )
-    # Add Javascript to update HTML
-    interface.load = """
-        function(downloaded_dois, failed_dois){
-             let downloaded_html = '';
-             downloaded_dois.split('\\n').filter(Boolean).forEach(doi => {
-                downloaded_html +=  '[' + doi + ']<br>';
-            });
-            document.querySelector("#downloaded-dois").innerHTML = downloaded_html;
-             let failed_html = '';
-             failed_dois.split('\\n').filter(Boolean).forEach(doi => {
-                failed_html += '[' + doi + ']<br>';
-            });
-            document.querySelector("#failed-dois").innerHTML = failed_html;
-            return [downloaded_html, failed_html];
-        }
-    """
     return interface
 def main():
     interface = create_gradio_interface()
     interface.launch()

             return None
     async def download_single_doi_async(self, doi, progress_callback):
+            """Descargar un único DOI con retroalimentación de progreso"""
+            if not doi:
+                return None, "Error: DOI no proporcionado", "Error: DOI no proporcionado", ""
+            logger.info(f"Starting download process for DOI: {doi}")
+            try:
+                pdf_content = await self.download_with_retry_async(doi)
+                if pdf_content:
+                    logger.info(f"Downloaded PDF for DOI: {doi}")
+                    filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
+                    filepath = os.path.join(self.output_dir, filename)
+                    # Escribir contenido del PDF
+                    loop = asyncio.get_running_loop()
+                    await loop.run_in_executor(
+                        self.executor,
+                        lambda: open(filepath, 'wb').write(pdf_content)
+                    )
+                    logger.info(f"Saved PDF to file: {filepath}")
+                    logger.info(f"Descarga exitosa: {filename}")
+                    progress_callback(f"Descargado exitosamente: <a href='https://doi.org/{doi}'>{doi}</a>")
+                    return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', "", ""
+                else:
+                    logger.warning(f"No se pudo descargar: {doi}")
+                    progress_callback(f"No se pudo descargar: <a href='https://doi.org/{doi}'>{doi}</a>")
+                    return None, f"No se pudo descargar {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>', ""
+            except Exception as e:
+                logger.error(f"Error processing {doi}: {e}")
+                progress_callback(f"Error procesando {doi}: <a href='https://doi.org/{doi}'>{e}")
+                return None, f"Error procesando {doi}: {e}", f"Error processing {doi}: {e}", ""
     async def download_multiple_dois_async(self, dois_text, progress_callback):
         # Validar entrada
         if not dois_text:
+            return None, "Error: No DOIs provided", "Error: No DOIs provided", ""
         # Sanitizar y filtrar DOIs
         # Eliminar líneas vacías, espacios en blanco, y DOIs duplicados
         # Validar lista de DOIs
         if not dois:
+            return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", ""
         # Listas para rastrear resultados
         downloaded_files = []      # Rutas de archivos descargados
         return (
             zip_filename if downloaded_files else None,  # Archivo ZIP o None
             "\n".join(downloaded_links),                # DOIs descargados
+            "\n".join(failed_dois),                      # DOIs fallidos
+             ""
         )
     async def process_bibtex_async(self, bib_file, progress_callback):
+        """Process BibTeX file and download papers with multiple strategies and reports UI updates using a callback"""
+        # Read BibTeX file content from the uploaded object
+        try:
+            with open(bib_file.name, 'r', encoding='utf-8') as f:
+                bib_content = f.read()
+        except Exception as e:
+            logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
+            return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", ""
+        # Parse BibTeX data
+        try:
+            bib_database = bibtexparser.loads(bib_content)
+        except Exception as e:
+            logger.error(f"Error parsing BibTeX data: {e}")
+            return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", ""
+        # Extract DOIs
+        dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
+        logger.info(f"Found {len(dois)} DOIs to download")
+        # Result lists
+        downloaded_files = []
+        failed_dois = []
+        downloaded_links = []
+        tasks = [self.download_single_doi_async(doi, progress_callback) for doi in dois]
+        results = await asyncio.gather(*tasks)
+        for i, (filepath, success_message, fail_message,_) in enumerate(results):
+            if filepath:
+                # Unique filename for zip
+                filename = f"{str(dois[i]).replace('/', '_').replace('.', '_')}_{i}.pdf"
+                filepath_unique = os.path.join(self.output_dir, filename)
+                os.rename(filepath, filepath_unique)
+                downloaded_files.append(filepath_unique)
+                downloaded_links.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
+            else:
+                failed_dois.append(f'<a href="https://doi.org/{dois[i]}">{dois[i]}</a>')
+        if downloaded_files:
+            zip_filename = 'papers.zip'
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(self.executor, lambda:  self.create_zip(zip_filename,downloaded_files))
+            logger.info(f"ZIP file created: {zip_filename}")
+        return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), ""
     def create_zip(self, zip_filename, files):
         """Crea un archivo zip con los pdfs descargados"""
             for file in files:
                 zf.write(file, os.path.basename(file))
 def create_gradio_interface():
     """Create Gradio interface for Paper Downloader"""
     downloader = PaperDownloader()
+    def update_progress(message, log_message=""):
+            return  gr.HTML.update(value=f"{message}"), gr.HTML.update(value=f"<pre>{log_message}</pre>")
+    async def download_papers(bib_file, doi_input, dois_input):
+        def custom_progress_callback(message):
+            logger.info(f"Callback message: {message}") #for debug purposes, should log message
+            #this method will update the custom field instead
+            updates =  update_progress(message)
+            return updates
+        if bib_file:
+             # Check file type
+            if not bib_file.name.lower().endswith('.bib'):
+                return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file",  "", None
+            zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, custom_progress_callback)
+            return zip_path, downloaded_dois, failed_dois, "", None
+        elif doi_input:
+            filepath, message, failed_doi, _ = await downloader.download_single_doi_async(doi_input,custom_progress_callback)
+            return None, message, failed_doi, "", filepath
+        elif dois_input:
+            zip_path, downloaded_dois, failed_dois, _ = await downloader.download_multiple_dois_async(dois_input, custom_progress_callback)
+            return zip_path, downloaded_dois, failed_dois, "", None
+        else:
+            return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs",  "", None
     # Gradio Interface
     interface = gr.Interface(
             gr.File(file_types=['.bib'], label="Upload BibTeX File"),
             gr.Textbox(label="Enter Single DOI", placeholder="10.xxxx/xxxx"),
             gr.Textbox(label="Enter Multiple DOIs (one per line)", placeholder="10.xxxx/xxxx\n10.yyyy/yyyy\n...")
         ],
         outputs=[
             gr.File(label="Download Papers (ZIP) or Single PDF"),
                     <div id="failed-dois"></div>
                 </div>
             """),
+            gr.HTML(label="""
+            <div style='padding-bottom: 5px; font-weight: bold;'>
+                Logs
+            </div>
+                <div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px; max-height: 150px; overflow-y: auto;white-space: pre-line;'>
+                </div>
+            """,),
+            gr.File(label="Downloaded Single PDF")
         ],
         title="🔬 Academic Paper Batch Downloader",
         description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
            .output-text a {
                color: #007bff; /* Blue color for hyperlinks */
             }
+            .logs_box {
+            }
         """,
         cache_examples=False,
     )
     return interface
 def main():
     interface = create_gradio_interface()
     interface.launch()