Update app.py
Browse files
app.py
CHANGED
@@ -55,7 +55,6 @@ TARGET_CHUNK_TOKENS = 1200
|
|
55 |
PROMPT_RESERVE = 100
|
56 |
MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
|
57 |
|
58 |
-
|
59 |
def log_system_usage(tag=""):
|
60 |
try:
|
61 |
cpu = psutil.cpu_percent(interval=1)
|
@@ -71,7 +70,6 @@ def log_system_usage(tag=""):
|
|
71 |
except Exception as e:
|
72 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
73 |
|
74 |
-
|
75 |
def sanitize_utf8(text: str) -> str:
|
76 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
77 |
|
@@ -83,7 +81,6 @@ def count_tokens(text: str) -> int:
|
|
83 |
encoding = tiktoken.get_encoding(TOKENIZER)
|
84 |
return len(encoding.encode(text))
|
85 |
|
86 |
-
|
87 |
def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
|
88 |
try:
|
89 |
text_chunks = []
|
@@ -103,7 +100,6 @@ def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
|
|
103 |
except Exception as e:
|
104 |
return f"PDF processing error: {str(e)}", 0, 0
|
105 |
|
106 |
-
|
107 |
def convert_file_to_json(file_path: str, file_type: str) -> str:
|
108 |
try:
|
109 |
h = file_hash(file_path)
|
@@ -151,18 +147,18 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
151 |
except Exception as e:
|
152 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
153 |
|
154 |
-
|
155 |
def clean_response(text: str) -> str:
|
156 |
text = sanitize_utf8(text)
|
157 |
patterns = [
|
158 |
-
r"\[TOOL_CALLS\].*",
|
159 |
-
r"
|
|
|
|
|
160 |
]
|
161 |
for pat in patterns:
|
162 |
text = re.sub(pat, "", text, flags=re.DOTALL)
|
163 |
return re.sub(r"\n{3,}", "\n\n", text).strip()
|
164 |
|
165 |
-
|
166 |
def format_final_report(analysis_results: List[str], filename: str) -> str:
|
167 |
report = [
|
168 |
"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS",
|
@@ -197,7 +193,6 @@ def format_final_report(analysis_results: List[str], filename: str) -> str:
|
|
197 |
report.append("END OF REPORT")
|
198 |
return "\n".join(report)
|
199 |
|
200 |
-
|
201 |
def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
|
202 |
paragraphs = re.split(r"\n\s*\n", content)
|
203 |
chunks, current, curr_toks = [], [], 0
|
@@ -222,7 +217,6 @@ def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
|
|
222 |
chunks.append("\n\n".join(current))
|
223 |
return chunks
|
224 |
|
225 |
-
|
226 |
def init_agent():
|
227 |
print("🔁 Initializing model...")
|
228 |
log_system_usage("Before Load")
|
@@ -240,12 +234,11 @@ def init_agent():
|
|
240 |
seed=100,
|
241 |
additional_default_tools=[]
|
242 |
)
|
243 |
-
agent.init_model(
|
244 |
log_system_usage("After Load")
|
245 |
print("✅ Agent Ready")
|
246 |
return agent
|
247 |
|
248 |
-
|
249 |
def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
|
250 |
base_prompt = (
|
251 |
"Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
|
@@ -279,7 +272,6 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent, tempe
|
|
279 |
print(f"Error processing chunk {i}: {e}")
|
280 |
return format_final_report(results, filename)
|
281 |
|
282 |
-
|
283 |
def create_ui(agent):
|
284 |
with gr.Blocks(title="Clinical Oversight Assistant") as demo:
|
285 |
gr.Markdown("""
|
@@ -303,7 +295,6 @@ def create_ui(agent):
|
|
303 |
yield "", None, "⚠️ Please upload files.", None
|
304 |
return
|
305 |
yield "", None, "⏳ Processing...", None
|
306 |
-
# convert files
|
307 |
previews = []
|
308 |
contents = []
|
309 |
for f in files:
|
@@ -328,7 +319,12 @@ if __name__ == "__main__":
|
|
328 |
try:
|
329 |
import tiktoken
|
330 |
except ImportError:
|
331 |
-
subprocess.run([sys.executable, "-m", "pip", "install", "tiktoken"]
|
332 |
agent = init_agent()
|
333 |
demo = create_ui(agent)
|
334 |
-
demo.queue(api_open=False, max_size=20).launch(
|
|
|
|
|
|
|
|
|
|
|
|
55 |
PROMPT_RESERVE = 100
|
56 |
MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
|
57 |
|
|
|
58 |
def log_system_usage(tag=""):
|
59 |
try:
|
60 |
cpu = psutil.cpu_percent(interval=1)
|
|
|
70 |
except Exception as e:
|
71 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
72 |
|
|
|
73 |
def sanitize_utf8(text: str) -> str:
|
74 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
75 |
|
|
|
81 |
encoding = tiktoken.get_encoding(TOKENIZER)
|
82 |
return len(encoding.encode(text))
|
83 |
|
|
|
84 |
def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
|
85 |
try:
|
86 |
text_chunks = []
|
|
|
100 |
except Exception as e:
|
101 |
return f"PDF processing error: {str(e)}", 0, 0
|
102 |
|
|
|
103 |
def convert_file_to_json(file_path: str, file_type: str) -> str:
|
104 |
try:
|
105 |
h = file_hash(file_path)
|
|
|
147 |
except Exception as e:
|
148 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
149 |
|
|
|
150 |
def clean_response(text: str) -> str:
|
151 |
text = sanitize_utf8(text)
|
152 |
patterns = [
|
153 |
+
r"\[TOOL_CALLS\].*",
|
154 |
+
r"\['get_[^\]]+\']\n?",
|
155 |
+
r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?",
|
156 |
+
r"To analyze the medical records for clinical oversights.*?\n"
|
157 |
]
|
158 |
for pat in patterns:
|
159 |
text = re.sub(pat, "", text, flags=re.DOTALL)
|
160 |
return re.sub(r"\n{3,}", "\n\n", text).strip()
|
161 |
|
|
|
162 |
def format_final_report(analysis_results: List[str], filename: str) -> str:
|
163 |
report = [
|
164 |
"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS",
|
|
|
193 |
report.append("END OF REPORT")
|
194 |
return "\n".join(report)
|
195 |
|
|
|
196 |
def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
|
197 |
paragraphs = re.split(r"\n\s*\n", content)
|
198 |
chunks, current, curr_toks = [], [], 0
|
|
|
217 |
chunks.append("\n\n".join(current))
|
218 |
return chunks
|
219 |
|
|
|
220 |
def init_agent():
|
221 |
print("🔁 Initializing model...")
|
222 |
log_system_usage("Before Load")
|
|
|
234 |
seed=100,
|
235 |
additional_default_tools=[]
|
236 |
)
|
237 |
+
agent.init_model()
|
238 |
log_system_usage("After Load")
|
239 |
print("✅ Agent Ready")
|
240 |
return agent
|
241 |
|
|
|
242 |
def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
|
243 |
base_prompt = (
|
244 |
"Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
|
|
|
272 |
print(f"Error processing chunk {i}: {e}")
|
273 |
return format_final_report(results, filename)
|
274 |
|
|
|
275 |
def create_ui(agent):
|
276 |
with gr.Blocks(title="Clinical Oversight Assistant") as demo:
|
277 |
gr.Markdown("""
|
|
|
295 |
yield "", None, "⚠️ Please upload files.", None
|
296 |
return
|
297 |
yield "", None, "⏳ Processing...", None
|
|
|
298 |
previews = []
|
299 |
contents = []
|
300 |
for f in files:
|
|
|
319 |
try:
|
320 |
import tiktoken
|
321 |
except ImportError:
|
322 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "tiktoken"])
|
323 |
agent = init_agent()
|
324 |
demo = create_ui(agent)
|
325 |
+
demo.queue(api_open=False, max_size=20).launch(
|
326 |
+
server_name="0.0.0.0",
|
327 |
+
server_port=7860,
|
328 |
+
show_error=True,
|
329 |
+
share=False
|
330 |
+
)
|