Update app.py
Browse files
app.py
CHANGED
@@ -33,18 +33,15 @@ MAX_CHUNK_TOKENS = 8192
|
|
33 |
MAX_NEW_TOKENS = 2048
|
34 |
PROMPT_OVERHEAD = 500
|
35 |
|
36 |
-
|
37 |
def clean_response(text: str) -> str:
|
38 |
text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
|
39 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
40 |
text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
|
41 |
return text.strip()
|
42 |
|
43 |
-
|
44 |
def estimate_tokens(text: str) -> int:
|
45 |
return len(text) // 3.5 + 1
|
46 |
|
47 |
-
|
48 |
def extract_text_from_excel(file_path: str) -> str:
|
49 |
all_text = []
|
50 |
xls = pd.ExcelFile(file_path)
|
@@ -55,7 +52,6 @@ def extract_text_from_excel(file_path: str) -> str:
|
|
55 |
all_text.extend(sheet_text)
|
56 |
return "\n".join(all_text)
|
57 |
|
58 |
-
|
59 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
60 |
effective_max = max_tokens - PROMPT_OVERHEAD
|
61 |
lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
|
@@ -74,7 +70,6 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_ch
|
|
74 |
chunks.append("\n".join(curr_chunk))
|
75 |
return chunks
|
76 |
|
77 |
-
|
78 |
def build_prompt_from_text(chunk: str) -> str:
|
79 |
return f"""
|
80 |
### Unstructured Clinical Records
|
@@ -94,7 +89,6 @@ Analyze the following clinical notes and provide a detailed, concise summary foc
|
|
94 |
Respond in well-structured bullet points with medical reasoning.
|
95 |
"""
|
96 |
|
97 |
-
|
98 |
def init_agent():
|
99 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
100 |
if not os.path.exists(tool_path):
|
@@ -111,7 +105,6 @@ def init_agent():
|
|
111 |
agent.init_model()
|
112 |
return agent
|
113 |
|
114 |
-
|
115 |
def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
|
116 |
messages = chatbot_state if chatbot_state else []
|
117 |
if file is None or not hasattr(file, "name"):
|
@@ -140,7 +133,7 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
|
|
140 |
response += r.content
|
141 |
return i, clean_response(response)
|
142 |
|
143 |
-
with ThreadPoolExecutor(max_workers=
|
144 |
futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
|
145 |
for f in as_completed(futures):
|
146 |
i, result = f.result()
|
@@ -173,7 +166,6 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
|
|
173 |
messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
|
174 |
return messages, report_path
|
175 |
|
176 |
-
|
177 |
def create_ui(agent):
|
178 |
with gr.Blocks(css="""
|
179 |
body {
|
@@ -233,7 +225,6 @@ Upload clinical Excel records below and click **Analyze** to generate a medical
|
|
233 |
|
234 |
return demo
|
235 |
|
236 |
-
|
237 |
if __name__ == "__main__":
|
238 |
try:
|
239 |
agent = init_agent()
|
|
|
33 |
MAX_NEW_TOKENS = 2048
|
34 |
PROMPT_OVERHEAD = 500
|
35 |
|
|
|
36 |
def clean_response(text: str) -> str:
|
37 |
text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
|
38 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
39 |
text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
|
40 |
return text.strip()
|
41 |
|
|
|
42 |
def estimate_tokens(text: str) -> int:
|
43 |
return len(text) // 3.5 + 1
|
44 |
|
|
|
45 |
def extract_text_from_excel(file_path: str) -> str:
|
46 |
all_text = []
|
47 |
xls = pd.ExcelFile(file_path)
|
|
|
52 |
all_text.extend(sheet_text)
|
53 |
return "\n".join(all_text)
|
54 |
|
|
|
55 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
56 |
effective_max = max_tokens - PROMPT_OVERHEAD
|
57 |
lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
|
|
|
70 |
chunks.append("\n".join(curr_chunk))
|
71 |
return chunks
|
72 |
|
|
|
73 |
def build_prompt_from_text(chunk: str) -> str:
|
74 |
return f"""
|
75 |
### Unstructured Clinical Records
|
|
|
89 |
Respond in well-structured bullet points with medical reasoning.
|
90 |
"""
|
91 |
|
|
|
92 |
def init_agent():
|
93 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
94 |
if not os.path.exists(tool_path):
|
|
|
105 |
agent.init_model()
|
106 |
return agent
|
107 |
|
|
|
108 |
def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
|
109 |
messages = chatbot_state if chatbot_state else []
|
110 |
if file is None or not hasattr(file, "name"):
|
|
|
133 |
response += r.content
|
134 |
return i, clean_response(response)
|
135 |
|
136 |
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
137 |
futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
|
138 |
for f in as_completed(futures):
|
139 |
i, result = f.result()
|
|
|
166 |
messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
|
167 |
return messages, report_path
|
168 |
|
|
|
169 |
def create_ui(agent):
|
170 |
with gr.Blocks(css="""
|
171 |
body {
|
|
|
225 |
|
226 |
return demo
|
227 |
|
|
|
228 |
if __name__ == "__main__":
|
229 |
try:
|
230 |
agent = init_agent()
|