Ali2206 commited on
Commit
c8ac86a
ยท
verified ยท
1 Parent(s): 67de838

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +15 -13
ui/ui_core.py CHANGED
@@ -12,6 +12,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
 
15
  clean_text = re.sub(r'[\ud800-\udfff]', '', text)
16
  return clean_text.encode("utf-8", "ignore").decode("utf-8", "ignore")
17
 
@@ -35,10 +36,11 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
35
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
36
  if line:
37
  lines.append(line)
38
- return f"\ud83d\udcc4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
 
39
 
40
  except Exception as e:
41
- return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
42
 
43
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
44
  try:
@@ -56,14 +58,15 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
56
  progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
57
  except Exception as e:
58
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
59
- return f"\ud83d\udcc4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
 
60
 
61
  except Exception as e:
62
- return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
63
 
64
  def create_ui(agent: TxAgent):
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
- gr.Markdown("<h1 style='text-align: center;'>\ud83d\udccb CPS: Clinical Patient Support System</h1>")
67
  chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
68
 
69
  file_upload = gr.File(
@@ -100,14 +103,13 @@ def create_ui(agent: TxAgent):
100
  elif path.endswith(".pdf"):
101
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
102
  else:
103
- extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
104
  except Exception as file_error:
105
- extracted_text += f"[Error processing file: {os.path.basename(path)}] โ€” {str(file_error)}\n"
106
  continue
107
 
108
- sanitized = sanitize_utf8(extracted_text.strip())
109
- message = (
110
- f"{context}\n\n--- Uploaded File Content ---\n\n{sanitized}\n\n--- End of File ---\n\nNow begin your reasoning:"
111
  )
112
 
113
  generator = agent.run_gradio_chat(
@@ -131,13 +133,13 @@ def create_ui(agent: TxAgent):
131
  and not (
132
  msg.role == "assistant"
133
  and hasattr(msg, 'content')
134
- and msg.content.strip().startswith("\ud83e\udde0")
135
  )
136
  ]
137
  if cleaned:
138
  yield cleaned
139
- elif isinstance(update, str) and not update.strip().startswith("\ud83e\udde0"):
140
- yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
141
  except Exception as update_error:
142
  print(f"Error processing update: {update_error}")
143
  continue
 
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
+ # Remove surrogate pairs and re-encode safely
16
  clean_text = re.sub(r'[\ud800-\udfff]', '', text)
17
  return clean_text.encode("utf-8", "ignore").decode("utf-8", "ignore")
18
 
 
36
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
37
  if line:
38
  lines.append(line)
39
+ content = f"๐Ÿ“„ {os.path.basename(file_path)}\n\n" + "\n".join(lines)
40
+ return sanitize_utf8(content)
41
 
42
  except Exception as e:
43
+ return sanitize_utf8(f"[Error reading {os.path.basename(file_path)}]: {str(e)}")
44
 
45
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
46
  try:
 
58
  progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
59
  except Exception as e:
60
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
61
+ content = f"๐Ÿ“„ {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
62
+ return sanitize_utf8(content)
63
 
64
  except Exception as e:
65
+ return sanitize_utf8(f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}")
66
 
67
  def create_ui(agent: TxAgent):
68
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
69
+ gr.Markdown("<h1 style='text-align: center;'>๐Ÿ“‹ CPS: Clinical Patient Support System</h1>")
70
  chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
71
 
72
  file_upload = gr.File(
 
103
  elif path.endswith(".pdf"):
104
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
105
  else:
106
+ extracted_text += sanitize_utf8(f"(Uploaded file: {os.path.basename(path)})\n")
107
  except Exception as file_error:
108
+ extracted_text += sanitize_utf8(f"[Error processing file: {os.path.basename(path)}] โ€” {str(file_error)}\n")
109
  continue
110
 
111
+ message = sanitize_utf8(
112
+ f"{context}\n\n--- Uploaded File Content ---\n\n{extracted_text.strip()}\n\n--- End of File ---\n\nNow begin your reasoning:"
 
113
  )
114
 
115
  generator = agent.run_gradio_chat(
 
133
  and not (
134
  msg.role == "assistant"
135
  and hasattr(msg, 'content')
136
+ and sanitize_utf8(msg.content.strip()).startswith("๐Ÿง ")
137
  )
138
  ]
139
  if cleaned:
140
  yield cleaned
141
+ elif isinstance(update, str) and not sanitize_utf8(update.strip()).startswith("๐Ÿง "):
142
+ yield sanitize_utf8(update)
143
  except Exception as update_error:
144
  print(f"Error processing update: {update_error}")
145
  continue