Ali2206 commited on
Commit
5ff2c92
Β·
verified Β·
1 Parent(s): ec09abd

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +67 -45
ui/ui_core.py CHANGED
@@ -12,9 +12,11 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
- # Remove surrogate pairs and re-encode safely
16
- clean_text = re.sub(r'[\ud800-\udfff]', '', text)
17
- return clean_text.encode("utf-8", "ignore").decode("utf-8", "ignore")
 
 
18
 
19
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
20
  try:
@@ -36,11 +38,10 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
36
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
37
  if line:
38
  lines.append(line)
39
- content = f"πŸ“„ {os.path.basename(file_path)}\n\n" + "\n".join(lines)
40
- return sanitize_utf8(content)
41
 
42
  except Exception as e:
43
- return sanitize_utf8(f"[Error reading {os.path.basename(file_path)}]: {str(e)}")
44
 
45
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
46
  try:
@@ -58,11 +59,10 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
58
  progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
59
  except Exception as e:
60
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
61
- content = f"πŸ“„ {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
62
- return sanitize_utf8(content)
63
 
64
  except Exception as e:
65
- return sanitize_utf8(f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}")
66
 
67
  def create_ui(agent: TxAgent):
68
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -103,46 +103,68 @@ def create_ui(agent: TxAgent):
103
  elif path.endswith(".pdf"):
104
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
105
  else:
106
- extracted_text += sanitize_utf8(f"(Uploaded file: {os.path.basename(path)})\n")
107
  except Exception as file_error:
108
- extracted_text += sanitize_utf8(f"[Error processing file: {os.path.basename(path)}] β€” {str(file_error)}\n")
109
  continue
110
 
111
- message = sanitize_utf8(
112
- f"{context}\n\n--- Uploaded File Content ---\n\n{extracted_text.strip()}\n\n--- End of File ---\n\nNow begin your reasoning:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  )
114
 
115
- generator = agent.run_gradio_chat(
116
- message=message,
117
- history=history,
118
- temperature=0.3,
119
- max_new_tokens=1024,
120
- max_token=8192,
121
- call_agent=False,
122
- conversation=conversation,
123
- uploaded_files=uploaded_files,
124
- max_round=30
125
- )
126
-
127
- for update in generator:
128
- try:
129
- if isinstance(update, list):
130
- cleaned = [
131
- msg for msg in update
132
- if hasattr(msg, 'role')
133
- and not (
134
- msg.role == "assistant"
135
- and hasattr(msg, 'content')
136
- and sanitize_utf8(msg.content.strip()).startswith("🧠")
137
- )
138
- ]
139
- if cleaned:
140
- yield cleaned
141
- elif isinstance(update, str) and not sanitize_utf8(update.strip()).startswith("🧠"):
142
- yield sanitize_utf8(update)
143
- except Exception as update_error:
144
- print(f"Error processing update: {update_error}")
145
- continue
146
 
147
  except Exception as chat_error:
148
  print(f"Chat handling error: {chat_error}")
@@ -158,4 +180,4 @@ def create_ui(agent: TxAgent):
158
  ["Is there anything abnormal in the attached blood work report?"]
159
  ], inputs=message_input)
160
 
161
- return demo
 
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
+ return re.sub(r'[\ud800-\udfff]', '', text)
16
+
17
+ def chunk_text(text: str, chunk_size: int = 8000) -> List[str]:
18
+ words = text.split()
19
+ return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
20
 
21
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
22
  try:
 
38
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
39
  if line:
40
  lines.append(line)
41
+ return f"πŸ“„ {os.path.basename(file_path)}\n\n" + "\n".join(lines)
 
42
 
43
  except Exception as e:
44
+ return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
45
 
46
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
47
  try:
 
59
  progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
60
  except Exception as e:
61
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
62
+ return f"πŸ“„ {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
 
63
 
64
  except Exception as e:
65
+ return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
66
 
67
  def create_ui(agent: TxAgent):
68
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
103
  elif path.endswith(".pdf"):
104
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
105
  else:
106
+ extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
107
  except Exception as file_error:
108
+ extracted_text += f"[Error processing file: {os.path.basename(path)}] β€” {str(file_error)}\n"
109
  continue
110
 
111
+ sanitized_text = sanitize_utf8(extracted_text.strip())
112
+ chunks = chunk_text(sanitized_text)
113
+
114
+ for i, chunk in enumerate(chunks):
115
+ message_chunk = (
116
+ f"{context}\n\n--- Uploaded File Chunk {i + 1} ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
117
+ )
118
+ generator = agent.run_gradio_chat(
119
+ message=message_chunk,
120
+ history=history,
121
+ temperature=0.3,
122
+ max_new_tokens=1024,
123
+ max_token=8192,
124
+ call_agent=False,
125
+ conversation=conversation,
126
+ uploaded_files=uploaded_files,
127
+ max_round=30
128
+ )
129
+
130
+ for update in generator:
131
+ try:
132
+ if isinstance(update, list):
133
+ cleaned = [
134
+ msg for msg in update
135
+ if hasattr(msg, 'role') and not (
136
+ msg.role == "assistant"
137
+ and hasattr(msg, 'content')
138
+ and msg.content.strip().startswith("🧠")
139
+ )
140
+ ]
141
+ if cleaned:
142
+ yield cleaned
143
+ elif isinstance(update, str) and not update.strip().startswith("🧠"):
144
+ yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
145
+ except Exception as update_error:
146
+ print(f"Error processing update: {update_error}")
147
+ continue
148
+
149
+ else:
150
+ # Fallback for message-only interactions
151
+ generator = agent.run_gradio_chat(
152
+ message=message,
153
+ history=history,
154
+ temperature=0.3,
155
+ max_new_tokens=1024,
156
+ max_token=8192,
157
+ call_agent=False,
158
+ conversation=conversation,
159
+ uploaded_files=uploaded_files,
160
+ max_round=30
161
  )
162
 
163
+ for update in generator:
164
+ if isinstance(update, str):
165
+ yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
166
+ else:
167
+ yield update
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  except Exception as chat_error:
170
  print(f"Chat handling error: {chat_error}")
 
180
  ["Is there anything abnormal in the attached blood work report?"]
181
  ], inputs=message_input)
182
 
183
+ return demo