Ali2206 commited on
Commit
1b3a021
·
verified ·
1 Parent(s): 3325f59

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +28 -35
ui/ui_core.py CHANGED
@@ -12,25 +12,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
- return re.sub(r'[\ud800-\udfff]', '', text)
16
-
17
- def chunk_text(text: str, max_tokens=8000) -> List[str]:
18
- chunks = []
19
- lines = text.split("\n")
20
- current_chunk = []
21
- current_tokens = 0
22
- for line in lines:
23
- line_tokens = len(line.split())
24
- if current_tokens + line_tokens > max_tokens:
25
- chunks.append("\n".join(current_chunk))
26
- current_chunk = [line]
27
- current_tokens = line_tokens
28
- else:
29
- current_chunk.append(line)
30
- current_tokens += line_tokens
31
- if current_chunk:
32
- chunks.append("\n".join(current_chunk))
33
- return chunks
34
 
35
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
36
  try:
@@ -78,6 +60,23 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
78
  except Exception as e:
79
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def create_ui(agent: TxAgent):
82
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
83
  gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
@@ -109,6 +108,7 @@ def create_ui(agent: TxAgent):
109
  for index, file in enumerate(uploaded_files):
110
  if not hasattr(file, 'name'):
111
  continue
 
112
  path = file.name
113
  try:
114
  if path.endswith((".csv", ".xls", ".xlsx")):
@@ -122,15 +122,14 @@ def create_ui(agent: TxAgent):
122
  continue
123
 
124
  sanitized = sanitize_utf8(extracted_text.strip())
125
- chunks = chunk_text(sanitized, max_tokens=8000)
126
 
127
  for i, chunk in enumerate(chunks):
128
- chunked_prompt = (
129
- f"{context}\n\n--- Uploaded File Content (Chunk {i+1}/{len(chunks)}) ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
130
  )
131
-
132
  generator = agent.run_gradio_chat(
133
- message=chunked_prompt,
134
  history=history,
135
  temperature=0.3,
136
  max_new_tokens=1024,
@@ -142,20 +141,14 @@ def create_ui(agent: TxAgent):
142
  )
143
 
144
  for update in generator:
145
- try:
146
- if isinstance(update, list):
147
- cleaned = [msg for msg in update if hasattr(msg, 'role') and hasattr(msg, 'content')]
148
- if cleaned:
149
- yield cleaned
150
- elif isinstance(update, str):
151
- yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
152
- except Exception as update_error:
153
- print(f"Error processing update: {update_error}")
154
- continue
155
 
156
  except Exception as chat_error:
157
  print(f"Chat handling error: {chat_error}")
158
- yield "An error occurred while processing your request. Please try again."
159
 
160
  inputs = [message_input, chatbot, conversation_state, file_upload]
161
  send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot)
 
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
+ return text.encode("utf-8", "ignore").decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
18
  try:
 
60
  except Exception as e:
61
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
62
 
63
+ def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
64
+ chunks = []
65
+ words = text.split()
66
+ chunk = []
67
+ token_count = 0
68
+ for word in words:
69
+ token_count += len(word) // 4 + 1
70
+ if token_count > max_tokens:
71
+ chunks.append(" ".join(chunk))
72
+ chunk = [word]
73
+ token_count = len(word) // 4 + 1
74
+ else:
75
+ chunk.append(word)
76
+ if chunk:
77
+ chunks.append(" ".join(chunk))
78
+ return chunks
79
+
80
  def create_ui(agent: TxAgent):
81
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
82
  gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
 
108
  for index, file in enumerate(uploaded_files):
109
  if not hasattr(file, 'name'):
110
  continue
111
+
112
  path = file.name
113
  try:
114
  if path.endswith((".csv", ".xls", ".xlsx")):
 
122
  continue
123
 
124
  sanitized = sanitize_utf8(extracted_text.strip())
125
+ chunks = chunk_text(sanitized, max_tokens=8192)
126
 
127
  for i, chunk in enumerate(chunks):
128
+ full_message = (
129
+ f"{context}\n\n--- Uploaded File Chunk {i+1}/{len(chunks)} ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
130
  )
 
131
  generator = agent.run_gradio_chat(
132
+ message=full_message,
133
  history=history,
134
  temperature=0.3,
135
  max_new_tokens=1024,
 
141
  )
142
 
143
  for update in generator:
144
+ if isinstance(update, list):
145
+ yield update
146
+ elif isinstance(update, str):
147
+ yield [("assistant", sanitize_utf8(update))]
 
 
 
 
 
 
148
 
149
  except Exception as chat_error:
150
  print(f"Chat handling error: {chat_error}")
151
+ yield [("assistant", "An error occurred while processing your request. Please try again.")]
152
 
153
  inputs = [message_input, chatbot, conversation_state, file_upload]
154
  send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot)