Ali2206 commited on
Commit
28560cd
·
verified ·
1 Parent(s): 51620dd

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +128 -65
ui/ui_core.py CHANGED
@@ -4,13 +4,36 @@ import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
  from tabulate import tabulate
 
7
 
8
  # ✅ Add src to Python path
9
- sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
10
  from txagent.txagent import TxAgent
11
 
12
- def extract_all_text_from_csv_or_excel(file_path, progress=None, index=0, total=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
 
 
 
14
  if file_path.endswith(".csv"):
15
  df = pd.read_csv(file_path, encoding="utf-8", errors="replace", low_memory=False)
16
  elif file_path.endswith((".xls", ".xlsx")):
@@ -21,38 +44,57 @@ def extract_all_text_from_csv_or_excel(file_path, progress=None, index=0, total=
21
  if progress:
22
  progress((index + 1) / total, desc=f"Processed table: {os.path.basename(file_path)}")
23
 
24
- if "Booking Number" in df.columns:
25
- groups = df.groupby("Booking Number")
26
- elif "Form Name" in df.columns:
27
- groups = df.groupby("Form Name")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  else:
29
  return tabulate(df, headers="keys", tablefmt="github", showindex=False)
30
 
31
- result = []
32
- for group_name, group_df in groups:
33
- result.append(f"\n### Group: {group_name}\n")
34
- result.append(tabulate(group_df, headers="keys", tablefmt="github", showindex=False))
35
- return "\n".join(result)
36
-
37
  except Exception as e:
38
- return f"Error parsing file: {e}"
39
 
40
- def extract_all_text_from_pdf(file_path, progress=None, index=0, total=1):
 
41
  extracted = []
42
  try:
 
 
 
43
  with pdfplumber.open(file_path) as pdf:
44
- num_pages = len(pdf.pages)
45
- for i, page in enumerate(pdf.pages):
46
- tables = page.extract_tables()
47
- for table in tables:
48
- for row in table:
49
- if any(row):
50
- extracted.append("\t".join([cell or "" for cell in row]))
51
- if progress:
52
- progress((index + i / num_pages) / total, desc=f"Parsing PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
53
- return "\n".join(extracted)
 
 
 
 
 
54
  except Exception as e:
55
- return f"Error parsing PDF: {e}"
56
 
57
  def create_ui(agent: TxAgent):
58
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -68,7 +110,8 @@ def create_ui(agent: TxAgent):
68
  send_button = gr.Button("Send", variant="primary")
69
  conversation_state = gr.State([])
70
 
71
- def handle_chat(message, history, conversation, uploaded_files, progress=gr.Progress()):
 
72
  context = (
73
  "You are an expert clinical AI assistant reviewing medical form or interview data. "
74
  "Your job is to analyze this data and reason about any information or red flags that a human doctor might have overlooked. "
@@ -78,53 +121,73 @@ def create_ui(agent: TxAgent):
78
  "End with a section labeled '🧠 Final Analysis' where you summarize key findings the doctor may have missed."
79
  )
80
 
81
- if uploaded_files:
82
  extracted_text = ""
83
- total_files = len(uploaded_files)
84
-
85
- for index, file in enumerate(uploaded_files):
86
- path = file.name
87
- if path.endswith((".csv", ".xls", ".xlsx")):
88
- extracted_text += extract_all_text_from_csv_or_excel(path, progress, index, total_files) + "\n"
89
- elif path.endswith(".pdf"):
90
- extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
91
- else:
92
- extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
93
- if progress:
94
- progress((index + 1) / total_files, desc=f"Skipping unsupported file: {os.path.basename(path)}")
95
-
96
- message = f"{context}\n\n---\n{extracted_text.strip()}\n---\n\nBegin your reasoning."
97
-
98
- final_response = None
99
- generator = agent.run_gradio_chat(
100
- message=message,
101
- history=history,
102
- temperature=0.3,
103
- max_new_tokens=1024,
104
- max_token=8192,
105
- call_agent=False,
106
- conversation=conversation,
107
- uploaded_files=uploaded_files,
108
- max_round=30
109
- )
110
- for update in generator:
111
- if isinstance(update, list):
112
- cleaned = [msg for msg in update if not (msg.role == "assistant" and msg.content.strip().startswith("🧰"))]
113
- if cleaned:
114
- final_response = cleaned
115
- yield cleaned
116
- else:
117
- if isinstance(update, str) and not update.strip().startswith("🧰"):
118
- yield update.encode("utf-8", "replace").decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  inputs = [message_input, chatbot, conversation_state, file_upload]
121
  send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot)
122
  message_input.submit(fn=handle_chat, inputs=inputs, outputs=chatbot)
123
 
124
  gr.Examples([
125
- ["Upload your medical form and ask what the doctor mightve missed."],
126
  ["This patient was treated with antibiotics for UTI. What else should we check?"],
127
  ["Is there anything abnormal in the attached blood work report?"]
128
  ], inputs=message_input)
129
 
130
- return demo
 
4
  import pdfplumber
5
  import gradio as gr
6
  from tabulate import tabulate
7
+ from typing import List, Optional
8
 
9
  # ✅ Add src to Python path
10
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))
11
  from txagent.txagent import TxAgent
12
 
13
+ def safe_extract_table_data(table: List[List[str]]) -> List[str]:
14
+ """Safely extract data from a PDF table row by row."""
15
+ extracted_rows = []
16
+ if not table or not isinstance(table, list):
17
+ return extracted_rows
18
+
19
+ for row in table:
20
+ if not row or not isinstance(row, list):
21
+ continue
22
+ try:
23
+ clean_row = [str(cell) if cell is not None else "" for cell in row]
24
+ if any(clean_row):
25
+ extracted_rows.append("\t".join(clean_row))
26
+ except Exception as e:
27
+ print(f"Error processing table row: {e}")
28
+ continue
29
+ return extracted_rows
30
+
31
+ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
32
+ """Safely extract text from CSV or Excel files with error handling."""
33
  try:
34
+ if not os.path.exists(file_path):
35
+ return f"File not found: {file_path}"
36
+
37
  if file_path.endswith(".csv"):
38
  df = pd.read_csv(file_path, encoding="utf-8", errors="replace", low_memory=False)
39
  elif file_path.endswith((".xls", ".xlsx")):
 
44
  if progress:
45
  progress((index + 1) / total, desc=f"Processed table: {os.path.basename(file_path)}")
46
 
47
+ # Safely check for grouping columns
48
+ group_column = None
49
+ for col in ["Booking Number", "Form Name"]:
50
+ if col in df.columns:
51
+ group_column = col
52
+ break
53
+
54
+ if group_column:
55
+ try:
56
+ groups = df.groupby(group_column)
57
+ result = []
58
+ for group_name, group_df in groups:
59
+ if group_name is None:
60
+ continue
61
+ result.append(f"\n### Group: {group_name}\n")
62
+ result.append(tabulate(group_df, headers="keys", tablefmt="github", showindex=False))
63
+ return "\n".join(result) if result else tabulate(df, headers="keys", tablefmt="github", showindex=False)
64
+ except Exception as e:
65
+ print(f"Error during grouping: {e}")
66
+ return tabulate(df, headers="keys", tablefmt="github", showindex=False)
67
  else:
68
  return tabulate(df, headers="keys", tablefmt="github", showindex=False)
69
 
 
 
 
 
 
 
70
  except Exception as e:
71
+ return f"Error parsing file {os.path.basename(file_path)}: {str(e)}"
72
 
73
+ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
74
+ """Safely extract text from PDF files with comprehensive error handling."""
75
  extracted = []
76
  try:
77
+ if not os.path.exists(file_path):
78
+ return f"PDF file not found: {file_path}"
79
+
80
  with pdfplumber.open(file_path) as pdf:
81
+ num_pages = len(pdf.pages) if hasattr(pdf, 'pages') else 0
82
+ for i, page in enumerate(pdf.pages if num_pages > 0 else []):
83
+ try:
84
+ tables = page.extract_tables() if hasattr(page, 'extract_tables') else []
85
+ for table in tables if tables else []:
86
+ extracted.extend(safe_extract_table_data(table))
87
+
88
+ if progress and num_pages > 0:
89
+ progress((index + (i / num_pages)) / total,
90
+ desc=f"Parsing PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
91
+ except Exception as page_error:
92
+ print(f"Error processing page {i+1}: {page_error}")
93
+ continue
94
+
95
+ return "\n".join(extracted) if extracted else f"No extractable content found in {os.path.basename(file_path)}"
96
  except Exception as e:
97
+ return f"Error parsing PDF {os.path.basename(file_path)}: {str(e)}"
98
 
99
  def create_ui(agent: TxAgent):
100
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
110
  send_button = gr.Button("Send", variant="primary")
111
  conversation_state = gr.State([])
112
 
113
+ def handle_chat(message: str, history: list, conversation: list, uploaded_files: list, progress=gr.Progress()):
114
+ """Handle chat with comprehensive error handling for file processing."""
115
  context = (
116
  "You are an expert clinical AI assistant reviewing medical form or interview data. "
117
  "Your job is to analyze this data and reason about any information or red flags that a human doctor might have overlooked. "
 
121
  "End with a section labeled '🧠 Final Analysis' where you summarize key findings the doctor may have missed."
122
  )
123
 
124
+ try:
125
  extracted_text = ""
126
+ if uploaded_files and isinstance(uploaded_files, list):
127
+ total_files = len(uploaded_files)
128
+ for index, file in enumerate(uploaded_files):
129
+ if not hasattr(file, 'name'):
130
+ continue
131
+
132
+ path = file.name
133
+ try:
134
+ if path.endswith((".csv", ".xls", ".xlsx")):
135
+ extracted_text += extract_all_text_from_csv_or_excel(path, progress, index, total_files) + "\n"
136
+ elif path.endswith(".pdf"):
137
+ extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
138
+ else:
139
+ extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
140
+ if progress:
141
+ progress((index + 1) / total_files, desc=f"Skipping unsupported file: {os.path.basename(path)}")
142
+ except Exception as file_error:
143
+ print(f"Error processing file {path}: {file_error}")
144
+ extracted_text += f"\n[Error processing file: {os.path.basename(path)}]\n"
145
+ continue
146
+
147
+ message = f"{context}\n\n---\n{extracted_text.strip()}\n---\n\nBegin your reasoning."
148
+
149
+ final_response = None
150
+ generator = agent.run_gradio_chat(
151
+ message=message,
152
+ history=history,
153
+ temperature=0.3,
154
+ max_new_tokens=1024,
155
+ max_token=8192,
156
+ call_agent=False,
157
+ conversation=conversation,
158
+ uploaded_files=uploaded_files,
159
+ max_round=30
160
+ )
161
+
162
+ for update in generator:
163
+ try:
164
+ if isinstance(update, list):
165
+ cleaned = [msg for msg in update if (hasattr(msg, 'role') and
166
+ not (msg.role == "assistant" and
167
+ hasattr(msg, 'content') and
168
+ msg.content.strip().startswith("🧰"))]
169
+ if cleaned:
170
+ final_response = cleaned
171
+ yield cleaned
172
+ else:
173
+ if isinstance(update, str) and not update.strip().startswith("🧰"):
174
+ yield update.encode("utf-8", "replace").decode("utf-8")
175
+ except Exception as update_error:
176
+ print(f"Error processing update: {update_error}")
177
+ continue
178
+
179
+ except Exception as chat_error:
180
+ print(f"Chat handling error: {chat_error}")
181
+ yield "An error occurred while processing your request. Please try again."
182
 
183
  inputs = [message_input, chatbot, conversation_state, file_upload]
184
  send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot)
185
  message_input.submit(fn=handle_chat, inputs=inputs, outputs=chatbot)
186
 
187
  gr.Examples([
188
+ ["Upload your medical form and ask what the doctor might've missed."],
189
  ["This patient was treated with antibiotics for UTI. What else should we check?"],
190
  ["Is there anything abnormal in the attached blood work report?"]
191
  ], inputs=message_input)
192
 
193
+ return demo