Update ui/ui_core.py
Browse files- ui/ui_core.py +37 -50
ui/ui_core.py
CHANGED
@@ -14,9 +14,23 @@ from txagent.txagent import TxAgent
|
|
14 |
def sanitize_utf8(text: str) -> str:
|
15 |
return re.sub(r'[\ud800-\udfff]', '', text)
|
16 |
|
17 |
-
def chunk_text(text: str,
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
|
22 |
try:
|
@@ -95,7 +109,6 @@ def create_ui(agent: TxAgent):
|
|
95 |
for index, file in enumerate(uploaded_files):
|
96 |
if not hasattr(file, 'name'):
|
97 |
continue
|
98 |
-
|
99 |
path = file.name
|
100 |
try:
|
101 |
if path.endswith((".csv", ".xls", ".xlsx")):
|
@@ -108,48 +121,16 @@ def create_ui(agent: TxAgent):
|
|
108 |
extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
|
109 |
continue
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
message=message_chunk,
|
120 |
-
history=history,
|
121 |
-
temperature=0.3,
|
122 |
-
max_new_tokens=1024,
|
123 |
-
max_token=8192,
|
124 |
-
call_agent=False,
|
125 |
-
conversation=conversation,
|
126 |
-
uploaded_files=uploaded_files,
|
127 |
-
max_round=30
|
128 |
-
)
|
129 |
-
|
130 |
-
for update in generator:
|
131 |
-
try:
|
132 |
-
if isinstance(update, list):
|
133 |
-
cleaned = [
|
134 |
-
msg for msg in update
|
135 |
-
if hasattr(msg, 'role') and not (
|
136 |
-
msg.role == "assistant"
|
137 |
-
and hasattr(msg, 'content')
|
138 |
-
and msg.content.strip().startswith("🧠")
|
139 |
-
)
|
140 |
-
]
|
141 |
-
if cleaned:
|
142 |
-
yield cleaned
|
143 |
-
elif isinstance(update, str) and not update.strip().startswith("🧠"):
|
144 |
-
yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
|
145 |
-
except Exception as update_error:
|
146 |
-
print(f"Error processing update: {update_error}")
|
147 |
-
continue
|
148 |
-
|
149 |
-
else:
|
150 |
-
# Fallback for message-only interactions
|
151 |
generator = agent.run_gradio_chat(
|
152 |
-
message=
|
153 |
history=history,
|
154 |
temperature=0.3,
|
155 |
max_new_tokens=1024,
|
@@ -161,10 +142,16 @@ def create_ui(agent: TxAgent):
|
|
161 |
)
|
162 |
|
163 |
for update in generator:
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
except Exception as chat_error:
|
170 |
print(f"Chat handling error: {chat_error}")
|
@@ -180,4 +167,4 @@ def create_ui(agent: TxAgent):
|
|
180 |
["Is there anything abnormal in the attached blood work report?"]
|
181 |
], inputs=message_input)
|
182 |
|
183 |
-
return demo
|
|
|
14 |
def sanitize_utf8(text: str) -> str:
|
15 |
return re.sub(r'[\ud800-\udfff]', '', text)
|
16 |
|
17 |
+
def chunk_text(text: str, max_tokens=8000) -> List[str]:
|
18 |
+
chunks = []
|
19 |
+
lines = text.split("\n")
|
20 |
+
current_chunk = []
|
21 |
+
current_tokens = 0
|
22 |
+
for line in lines:
|
23 |
+
line_tokens = len(line.split())
|
24 |
+
if current_tokens + line_tokens > max_tokens:
|
25 |
+
chunks.append("\n".join(current_chunk))
|
26 |
+
current_chunk = [line]
|
27 |
+
current_tokens = line_tokens
|
28 |
+
else:
|
29 |
+
current_chunk.append(line)
|
30 |
+
current_tokens += line_tokens
|
31 |
+
if current_chunk:
|
32 |
+
chunks.append("\n".join(current_chunk))
|
33 |
+
return chunks
|
34 |
|
35 |
def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
|
36 |
try:
|
|
|
109 |
for index, file in enumerate(uploaded_files):
|
110 |
if not hasattr(file, 'name'):
|
111 |
continue
|
|
|
112 |
path = file.name
|
113 |
try:
|
114 |
if path.endswith((".csv", ".xls", ".xlsx")):
|
|
|
121 |
extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
|
122 |
continue
|
123 |
|
124 |
+
sanitized = sanitize_utf8(extracted_text.strip())
|
125 |
+
chunks = chunk_text(sanitized, max_tokens=8000)
|
126 |
+
|
127 |
+
for i, chunk in enumerate(chunks):
|
128 |
+
chunked_prompt = (
|
129 |
+
f"{context}\n\n--- Uploaded File Content (Chunk {i+1}/{len(chunks)}) ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
|
130 |
+
)
|
131 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
generator = agent.run_gradio_chat(
|
133 |
+
message=chunked_prompt,
|
134 |
history=history,
|
135 |
temperature=0.3,
|
136 |
max_new_tokens=1024,
|
|
|
142 |
)
|
143 |
|
144 |
for update in generator:
|
145 |
+
try:
|
146 |
+
if isinstance(update, list):
|
147 |
+
cleaned = [msg for msg in update if hasattr(msg, 'role') and hasattr(msg, 'content')]
|
148 |
+
if cleaned:
|
149 |
+
yield cleaned
|
150 |
+
elif isinstance(update, str):
|
151 |
+
yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
|
152 |
+
except Exception as update_error:
|
153 |
+
print(f"Error processing update: {update_error}")
|
154 |
+
continue
|
155 |
|
156 |
except Exception as chat_error:
|
157 |
print(f"Chat handling error: {chat_error}")
|
|
|
167 |
["Is there anything abnormal in the attached blood work report?"]
|
168 |
], inputs=message_input)
|
169 |
|
170 |
+
return demo
|