CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 20

Commit

b9a61ea

verified ·

1 Parent(s): acf78d1

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -26

app.py CHANGED Viewed

@@ -17,9 +17,9 @@ import gc
 from diskcache import Cache
 import time
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # Persistent directory
 persistent_dir = "/data/hf_cache"
@@ -61,7 +61,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             if total_pages == 0:
-                logger.error("No pages found in PDF")
                 return ""
         batch_size = 10
@@ -89,13 +89,13 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
                 processed_pages += batch_size
                 if progress_callback:
                     progress_callback(min(processed_pages, total_pages), total_pages)
-                logger.info("Processed %d/%d pages", min(processed_pages, total_pages), total_pages)
         extracted_text = "\n\n".join(filter(None, text_chunks))
-        logger.info("Extracted %d pages, total length: %d chars", total_pages, len(extracted_text))
         return extracted_text
     except Exception as e:
-        logger.error("PDF processing error: %s", e)
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
@@ -103,8 +103,10 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
         file_h = file_hash(file_path)
         cache_key = f"{file_h}_{file_type}"
         if cache_key in cache:
-            logger.info("Using cached extraction for %s", file_path)
             return cache[cache_key]
         if file_type == "pdf":
             text = extract_all_pages(file_path, progress_callback)
@@ -128,7 +130,7 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
         logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
         return result
     except Exception as e:
-        logger.error("Error processing %s: %s", os.path.basename(file_path), e)
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def log_system_usage(tag=""):
@@ -259,28 +261,42 @@ Patient Record Excerpt (Chunk {0} of {1}):
             yield history, None, ""
             logger.info("Starting analysis for message: %s", message[:100])
             extracted = ""
             file_hash_value = ""
-            if files:
-                logger.info("Processing %d uploaded files", len(files))
-                def update_extraction_progress(current, total):
-                    progress(current / total, desc=f"Extracting text... Page {current}/{total}")
-                    return history, None, ""
-                with ThreadPoolExecutor(max_workers=6) as executor:
-                    futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
-                    results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
-                    extracted = "\n".join(results)
-                    file_hash_value = file_hash(files[0].name) if files else ""
-                logger.info("Extraction complete for %d files", len(files))
-                history.append({"role": "assistant", "content": "✅ Text extraction complete."})
-                yield history, None, ""
-            else:
-                logger.warning("No files uploaded for analysis")
             logger.info("Extracted text length: %d chars", len(extracted))
             chunk_size = 6000
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             logger.info("Created %d chunks", len(chunks))
             combined_response = ""
             batch_size = 2
@@ -337,7 +353,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
                 yield history, report_path if report_path and os.path.exists(report_path) else None, summary
             except Exception as e:
-                logger.error("Analysis error: %s", e)
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"

 from diskcache import Cache
 import time
+# Configure logging with a more specific logger name to avoid conflicts
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("ClinicalOversightApp")
 # Persistent directory
 persistent_dir = "/data/hf_cache"
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             if total_pages == 0:
+                logger.error("No pages found in PDF: %s", file_path)
                 return ""
         batch_size = 10
                 processed_pages += batch_size
                 if progress_callback:
                     progress_callback(min(processed_pages, total_pages), total_pages)
+                logger.info("Processed %d/%d pages for %s", min(processed_pages, total_pages), total_pages, file_path)
         extracted_text = "\n\n".join(filter(None, text_chunks))
+        logger.info("Extracted %d pages from %s, total length: %d chars", total_pages, file_path, len(extracted_text))
         return extracted_text
     except Exception as e:
+        logger.error("PDF processing error for %s: %s", file_path, e, exc_info=True)
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
         file_h = file_hash(file_path)
         cache_key = f"{file_h}_{file_type}"
         if cache_key in cache:
+            logger.info("Cache hit for %s (key: %s)", file_path, cache_key)
             return cache[cache_key]
+        else:
+            logger.info("Cache miss for %s (key: %s), performing fresh extraction", file_path, cache_key)
         if file_type == "pdf":
             text = extract_all_pages(file_path, progress_callback)
         logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
         return result
     except Exception as e:
+        logger.error("Error processing %s: %s", os.path.basename(file_path), e, exc_info=True)
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def log_system_usage(tag=""):
             yield history, None, ""
             logger.info("Starting analysis for message: %s", message[:100])
+            if not files:
+                logger.error("No files uploaded for analysis")
+                history.append({"role": "assistant", "content": "❌ Please upload a file to analyze."})
+                yield history, None, "### Summary of Clinical Oversights\nNo file uploaded for analysis."
+                return
             extracted = ""
             file_hash_value = ""
+            logger.info("Processing %d uploaded files", len(files))
+            for f in files:
+                logger.info("Processing file: %s", f.name)
+            def update_extraction_progress(current, total):
+                progress(current / total, desc=f"Extracting text... Page {current}/{total}")
+                return history, None, ""
+            with ThreadPoolExecutor(max_workers=6) as executor:
+                futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
+                results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
+                extracted = "\n".join(results)
+                file_hash_value = file_hash(files[0].name) if files else ""
+            logger.info("Extraction complete for %d files", len(files))
+            history.append({"role": "assistant", "content": "✅ Text extraction complete."})
+            yield history, None, ""
             logger.info("Extracted text length: %d chars", len(extracted))
+            if len(extracted.strip()) == 0:
+                logger.error("Extracted text is empty")
+                history.append({"role": "assistant", "content": "❌ Extracted text is empty. Please ensure the file contains readable content."})
+                yield history, None, "### Summary of Clinical Oversights\nExtracted text is empty."
+                return
             chunk_size = 6000
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            if not chunks:
+                chunks = [""]
             logger.info("Created %d chunks", len(chunks))
             combined_response = ""
             batch_size = 2
                 yield history, report_path if report_path and os.path.exists(report_path) else None, summary
             except Exception as e:
+                logger.error("Analysis error: %s", e, exc_info=True)
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"