CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 17

Commit

67dd49b

verified ·

1 Parent(s): a58b5f7

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -75

app.py CHANGED Viewed

@@ -14,6 +14,11 @@ import subprocess
 import multiprocessing
 from functools import partial
 import time
 # Persistent directory
 persistent_dir = "/data/hf_cache"
@@ -47,8 +52,8 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def chunk_hash(chunk: str) -> str:
-    return hashlib.md5(chunk.encode("utf-8")).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
@@ -59,7 +64,8 @@ def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
                 page_text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
-    except Exception:
         return ""
 def extract_all_pages(file_path: str, progress_callback=None) -> str:
@@ -90,6 +96,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
         return "\n\n".join(filter(None, results))
     except Exception as e:
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
@@ -121,22 +128,23 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def log_system_usage(tag=""):
     try:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
-        print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
         result = subprocess.run(
             ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
             capture_output=True, text=True
         )
         if result.returncode == 0:
             used, total, util = result.stdout.strip().split(", ")
-            print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
     except Exception as e:
-        print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def clean_response(text: str) -> str:
     """Clean TxAgent response to group findings under tool-derived headings."""
@@ -191,7 +199,7 @@ def clean_response(text: str) -> str:
     return text
 def init_agent():
-    print("🔁 Initializing model...")
     log_system_usage("Before Load")
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -204,25 +212,75 @@ def init_agent():
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
-        step_rag_num=2,  # Reduced for speed
         seed=100,
         additional_default_tools=[],
     )
     agent.init_model()
     log_system_usage("After Load")
-    print("✅ Agent Ready")
     return agent
-def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str) -> str:
-    """Process a single chunk and cache the result."""
-    chunk_id = chunk_hash(chunk)
-    chunk_cache_path = os.path.join(file_cache_dir, f"chunk_{chunk_id}.txt")
     if os.path.exists(chunk_cache_path):
         with open(chunk_cache_path, "r", encoding="utf-8") as f:
-            return f.read()
-    prompt_template = """
 You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
 - Clinical context (why the issue was missed or relevant details from the record).
 - Potential risks if unaddressed (e.g., disease progression, adverse events).
@@ -243,45 +301,6 @@ Example Output:
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
-    prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:2000])  # Truncate to avoid token limits
-    chunk_response = ""
-    for chunk_output in agent.run_gradio_chat(
-        message=prompt,
-        history=[],
-        temperature=0.2,
-        max_new_tokens=512,  # Reduced for speed
-        max_token=2048,      # Reduced for speed
-        call_agent=False,
-        conversation=[],
-    ):
-        if chunk_output is None:
-            continue
-        if isinstance(chunk_output, list):
-            for m in chunk_output:
-                if hasattr(m, 'content') and m.content:
-                    cleaned = clean_response(m.content)
-                    if cleaned and re.search(r"###\s*\w+", cleaned):
-                        chunk_response += cleaned + "\n\n"
-        elif isinstance(chunk_output, str) and chunk_output.strip():
-            cleaned = clean_response(chunk_output)
-            if cleaned and re.search(r"###\s*\w+", cleaned):
-                chunk_response += cleaned + "\n\n"
-    if chunk_response:
-        with open(chunk_cache_path, "w", encoding="utf-8") as f:
-            f.write(chunk_response)
-    return chunk_response
-def create_ui(agent):
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
-        chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
-        file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
-        max_chunks_input = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Max Chunks to Analyze")
-        msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
-        send_btn = gr.Button("Analyze", variant="primary")
-        download_output = gr.File(label="Download Full Report")
         def analyze(message: str, history: List[dict], files: List, max_chunks: int):
             history.append({"role": "user", "content": message})
@@ -311,7 +330,7 @@ def create_ui(agent):
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
-            chunk_size = 2000  # Reduced for speed
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             chunks = chunks[:max_chunks]  # Limit to max_chunks
             total_chunks = len(chunks)
@@ -323,24 +342,17 @@ def create_ui(agent):
                 return
             try:
-                with ThreadPoolExecutor(max_workers=4) as executor:  # Parallel processing
-                    futures = []
-                    for chunk_idx, chunk in enumerate(chunks, 1):
-                        futures.append(executor.submit(process_chunk, agent, chunk, chunk_idx, total_chunks, file_cache_dir))
-                    for idx, future in enumerate(as_completed(futures)):
-                        chunk_response = future.result()
-                        animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
-                        history.append({"role": "assistant", "content": f"Analyzing chunks... {animation} {idx + 1}/{total_chunks}"})
-                        yield history, None
-                        if chunk_response:
-                            combined_response += f"--- Analysis for Chunk {idx + 1} ---\n{chunk_response}\n"
-                        else:
-                            combined_response += f"--- Analysis for Chunk {idx + 1} ---\nNo oversights identified for this chunk.\n\n"
-                        history[-1] = {"role": "assistant", "content": combined_response.strip()}
-                        yield history, None
                 if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
                     history[-1]["content"] = combined_response.strip()
@@ -354,7 +366,7 @@ def create_ui(agent):
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
-                print("🚨 ERROR:", e)
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
@@ -363,7 +375,7 @@ def create_ui(agent):
     return demo
 if __name__ == "__main__":
-    print("🚀 Launching app...")
     agent = init_agent()
     demo = create_ui(agent)
     demo.queue(api_open=False).launch(

 import multiprocessing
 from functools import partial
 import time
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 # Persistent directory
 persistent_dir = "/data/hf_cache"
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def chunk_hash(chunk: str, prompt: str) -> str:
+    return hashlib.md5((chunk + prompt).encode("utf-8")).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
                 page_text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
+    except Exception as e:
+        logger.error(f"Error extracting pages {start_page}-{end_page}: {e}")
         return ""
 def extract_all_pages(file_path: str, progress_callback=None) -> str:
         return "\n\n".join(filter(None, results))
     except Exception as e:
+        logger.error(f"PDF processing error: {e}")
         return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
             f.write(result)
         return result
     except Exception as e:
+        logger.error(f"Error processing {file_path}: {e}")
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def log_system_usage(tag=""):
     try:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
+        logger.info(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
         result = subprocess.run(
             ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
             capture_output=True, text=True
         )
         if result.returncode == 0:
             used, total, util = result.stdout.strip().split(", ")
+            logger.info(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
     except Exception as e:
+        logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
 def clean_response(text: str) -> str:
     """Clean TxAgent response to group findings under tool-derived headings."""
     return text
 def init_agent():
+    logger.info("Initializing model...")
     log_system_usage("Before Load")
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
+        step_rag_num=2,
         seed=100,
         additional_default_tools=[],
     )
     agent.init_model()
     log_system_usage("After Load")
+    logger.info("Agent Ready")
     return agent
+def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str, prompt_template: str) -> tuple:
+    """Process a single chunk with error handling and caching."""
+    if not chunk.strip():
+        logger.warning(f"Chunk {chunk_idx} is empty, skipping...")
+        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
+    chunk_id = chunk_hash(chunk, prompt_template)
+    chunk_cache_path = os.path.join(cache_path, f"chunk_{chunk_id}.txt")
     if os.path.exists(chunk_cache_path):
         with open(chunk_cache_path, "r", encoding="utf-8") as f:
+            logger.info(f"Cache hit for chunk {chunk_idx}")
+            return chunk_idx, f.read()
+    prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:1000])  # Truncate to avoid token limits
+    chunk_response = ""
+    try:
+        for chunk_output in agent.run_gradio_chat(
+            message=prompt,
+            history=[],
+            temperature=0.2,
+            max_new_tokens=512,
+            max_token=2048,
+            call_agent=False,
+            conversation=[],
+        ):
+            if chunk_output is None:
+                continue
+            if isinstance(chunk_output, list):
+                for m in chunk_output:
+                    if hasattr(m, 'content') and m.content:
+                        cleaned = clean_response(m.content)
+                        if cleaned and re.search(r"###\s*\w+", cleaned):
+                            chunk_response += cleaned + "\n\n"
+            elif isinstance(chunk_output, str) and chunk_output.strip():
+                cleaned = clean_response(chunk_output)
+                if cleaned and re.search(r"###\s*\w+", cleaned):
+                    chunk_response += cleaned + "\n\n"
+    except Exception as e:
+        logger.error(f"Error processing chunk {chunk_idx}: {e}")
+        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nError occurred: {str(e)}\n\n"
+    if chunk_response:
+        with open(chunk_cache_path, "w", encoding="utf-8") as f:
+            f.write(chunk_response)
+        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
+    return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
+def create_ui(agent):
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
+        chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
+        file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
+        max_chunks_input = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Max Chunks to Analyze")
+        msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
+        send_btn = gr.Button("Analyze", variant="primary")
+        download_output = gr.File(label="Download Full Report")
+        prompt_template = """
 You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
 - Clinical context (why the issue was missed or relevant details from the record).
 - Potential risks if unaddressed (e.g., disease progression, adverse events).
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
         def analyze(message: str, history: List[dict], files: List, max_chunks: int):
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
+            chunk_size = 1000  # Reduced for speed
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             chunks = chunks[:max_chunks]  # Limit to max_chunks
             total_chunks = len(chunks)
                 return
             try:
+                # Sequential processing to avoid VLLM error
+                for chunk_idx, chunk in enumerate(chunks, 1):
+                    animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
+                    history.append({"role": "assistant", "content": f"Analyzing chunk {chunk_idx}/{total_chunks}... {animation}"})
+                    yield history, None
+                    _, chunk_response = process_chunk(agent, chunk, chunk_idx, total_chunks, file_cache_dir, prompt_template)
+                    combined_response += chunk_response
+                    history[-1] = {"role": "assistant", "content": combined_response.strip()}
+                    yield history, None
                 if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
                     history[-1]["content"] = combined_response.strip()
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
+                logger.error(f"Analysis error: {e}")
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
     return demo
 if __name__ == "__main__":
+    logger.info("Launching app...")
     agent = init_agent()
     demo = create_ui(agent)
     demo.queue(api_open=False).launch(