Ali2206 commited on
Commit
c87fc4e
·
verified ·
1 Parent(s): f09f5b5

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +37 -50
ui/ui_core.py CHANGED
@@ -14,9 +14,23 @@ from txagent.txagent import TxAgent
14
  def sanitize_utf8(text: str) -> str:
15
  return re.sub(r'[\ud800-\udfff]', '', text)
16
 
17
- def chunk_text(text: str, chunk_size: int = 8000) -> List[str]:
18
- words = text.split()
19
- return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
22
  try:
@@ -95,7 +109,6 @@ def create_ui(agent: TxAgent):
95
  for index, file in enumerate(uploaded_files):
96
  if not hasattr(file, 'name'):
97
  continue
98
-
99
  path = file.name
100
  try:
101
  if path.endswith((".csv", ".xls", ".xlsx")):
@@ -108,48 +121,16 @@ def create_ui(agent: TxAgent):
108
  extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
109
  continue
110
 
111
- sanitized_text = sanitize_utf8(extracted_text.strip())
112
- chunks = chunk_text(sanitized_text)
113
-
114
- for i, chunk in enumerate(chunks):
115
- message_chunk = (
116
- f"{context}\n\n--- Uploaded File Chunk {i + 1} ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
117
- )
118
- generator = agent.run_gradio_chat(
119
- message=message_chunk,
120
- history=history,
121
- temperature=0.3,
122
- max_new_tokens=1024,
123
- max_token=8192,
124
- call_agent=False,
125
- conversation=conversation,
126
- uploaded_files=uploaded_files,
127
- max_round=30
128
- )
129
-
130
- for update in generator:
131
- try:
132
- if isinstance(update, list):
133
- cleaned = [
134
- msg for msg in update
135
- if hasattr(msg, 'role') and not (
136
- msg.role == "assistant"
137
- and hasattr(msg, 'content')
138
- and msg.content.strip().startswith("🧠")
139
- )
140
- ]
141
- if cleaned:
142
- yield cleaned
143
- elif isinstance(update, str) and not update.strip().startswith("🧠"):
144
- yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
145
- except Exception as update_error:
146
- print(f"Error processing update: {update_error}")
147
- continue
148
-
149
- else:
150
- # Fallback for message-only interactions
151
  generator = agent.run_gradio_chat(
152
- message=message,
153
  history=history,
154
  temperature=0.3,
155
  max_new_tokens=1024,
@@ -161,10 +142,16 @@ def create_ui(agent: TxAgent):
161
  )
162
 
163
  for update in generator:
164
- if isinstance(update, str):
165
- yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
166
- else:
167
- yield update
 
 
 
 
 
 
168
 
169
  except Exception as chat_error:
170
  print(f"Chat handling error: {chat_error}")
@@ -180,4 +167,4 @@ def create_ui(agent: TxAgent):
180
  ["Is there anything abnormal in the attached blood work report?"]
181
  ], inputs=message_input)
182
 
183
- return demo
 
14
  def sanitize_utf8(text: str) -> str:
15
  return re.sub(r'[\ud800-\udfff]', '', text)
16
 
17
+ def chunk_text(text: str, max_tokens=8000) -> List[str]:
18
+ chunks = []
19
+ lines = text.split("\n")
20
+ current_chunk = []
21
+ current_tokens = 0
22
+ for line in lines:
23
+ line_tokens = len(line.split())
24
+ if current_tokens + line_tokens > max_tokens:
25
+ chunks.append("\n".join(current_chunk))
26
+ current_chunk = [line]
27
+ current_tokens = line_tokens
28
+ else:
29
+ current_chunk.append(line)
30
+ current_tokens += line_tokens
31
+ if current_chunk:
32
+ chunks.append("\n".join(current_chunk))
33
+ return chunks
34
 
35
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
36
  try:
 
109
  for index, file in enumerate(uploaded_files):
110
  if not hasattr(file, 'name'):
111
  continue
 
112
  path = file.name
113
  try:
114
  if path.endswith((".csv", ".xls", ".xlsx")):
 
121
  extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
122
  continue
123
 
124
+ sanitized = sanitize_utf8(extracted_text.strip())
125
+ chunks = chunk_text(sanitized, max_tokens=8000)
126
+
127
+ for i, chunk in enumerate(chunks):
128
+ chunked_prompt = (
129
+ f"{context}\n\n--- Uploaded File Content (Chunk {i+1}/{len(chunks)}) ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
130
+ )
131
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  generator = agent.run_gradio_chat(
133
+ message=chunked_prompt,
134
  history=history,
135
  temperature=0.3,
136
  max_new_tokens=1024,
 
142
  )
143
 
144
  for update in generator:
145
+ try:
146
+ if isinstance(update, list):
147
+ cleaned = [msg for msg in update if hasattr(msg, 'role') and hasattr(msg, 'content')]
148
+ if cleaned:
149
+ yield cleaned
150
+ elif isinstance(update, str):
151
+ yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
152
+ except Exception as update_error:
153
+ print(f"Error processing update: {update_error}")
154
+ continue
155
 
156
  except Exception as chat_error:
157
  print(f"Chat handling error: {chat_error}")
 
167
  ["Is there anything abnormal in the attached blood work report?"]
168
  ], inputs=message_input)
169
 
170
+ return demo