CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 9

Commit

c87fc4e

verified ·

1 Parent(s): f09f5b5

Update ui/ui_core.py

Browse files

Files changed (1) hide show

ui/ui_core.py +37 -50

ui/ui_core.py CHANGED Viewed

@@ -14,9 +14,23 @@ from txagent.txagent import TxAgent
 def sanitize_utf8(text: str) -> str:
     return re.sub(r'[\ud800-\udfff]', '', text)
-def chunk_text(text: str, chunk_size: int = 8000) -> List[str]:
-    words = text.split()
-    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
 def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
     try:
@@ -95,7 +109,6 @@ def create_ui(agent: TxAgent):
                     for index, file in enumerate(uploaded_files):
                         if not hasattr(file, 'name'):
                             continue
                         path = file.name
                         try:
                             if path.endswith((".csv", ".xls", ".xlsx")):
@@ -108,48 +121,16 @@ def create_ui(agent: TxAgent):
                             extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
                             continue
-                    sanitized_text = sanitize_utf8(extracted_text.strip())
-                    chunks = chunk_text(sanitized_text)
-                    for i, chunk in enumerate(chunks):
-                        message_chunk = (
-                            f"{context}\n\n--- Uploaded File Chunk {i + 1} ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
-                        )
-                        generator = agent.run_gradio_chat(
-                            message=message_chunk,
-                            history=history,
-                            temperature=0.3,
-                            max_new_tokens=1024,
-                            max_token=8192,
-                            call_agent=False,
-                            conversation=conversation,
-                            uploaded_files=uploaded_files,
-                            max_round=30
-                        )
-                        for update in generator:
-                            try:
-                                if isinstance(update, list):
-                                    cleaned = [
-                                        msg for msg in update
-                                        if hasattr(msg, 'role') and not (
-                                            msg.role == "assistant"
-                                            and hasattr(msg, 'content')
-                                            and msg.content.strip().startswith("🧠")
-                                        )
-                                    ]
-                                    if cleaned:
-                                        yield cleaned
-                                elif isinstance(update, str) and not update.strip().startswith("🧠"):
-                                    yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
-                            except Exception as update_error:
-                                print(f"Error processing update: {update_error}")
-                                continue
-                else:
-                    # Fallback for message-only interactions
                     generator = agent.run_gradio_chat(
-                        message=message,
                         history=history,
                         temperature=0.3,
                         max_new_tokens=1024,
@@ -161,10 +142,16 @@ def create_ui(agent: TxAgent):
                     )
                     for update in generator:
-                        if isinstance(update, str):
-                            yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
-                        else:
-                            yield update
             except Exception as chat_error:
                 print(f"Chat handling error: {chat_error}")
@@ -180,4 +167,4 @@ def create_ui(agent: TxAgent):
             ["Is there anything abnormal in the attached blood work report?"]
         ], inputs=message_input)
-    return demo

 def sanitize_utf8(text: str) -> str:
     return re.sub(r'[\ud800-\udfff]', '', text)
+def chunk_text(text: str, max_tokens=8000) -> List[str]:
+    chunks = []
+    lines = text.split("\n")
+    current_chunk = []
+    current_tokens = 0
+    for line in lines:
+        line_tokens = len(line.split())
+        if current_tokens + line_tokens > max_tokens:
+            chunks.append("\n".join(current_chunk))
+            current_chunk = [line]
+            current_tokens = line_tokens
+        else:
+            current_chunk.append(line)
+            current_tokens += line_tokens
+    if current_chunk:
+        chunks.append("\n".join(current_chunk))
+    return chunks
 def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
     try:
                     for index, file in enumerate(uploaded_files):
                         if not hasattr(file, 'name'):
                             continue
                         path = file.name
                         try:
                             if path.endswith((".csv", ".xls", ".xlsx")):
                             extracted_text += f"[Error processing file: {os.path.basename(path)}] — {str(file_error)}\n"
                             continue
+                sanitized = sanitize_utf8(extracted_text.strip())
+                chunks = chunk_text(sanitized, max_tokens=8000)
+                for i, chunk in enumerate(chunks):
+                    chunked_prompt = (
+                        f"{context}\n\n--- Uploaded File Content (Chunk {i+1}/{len(chunks)}) ---\n\n{chunk}\n\n--- End of Chunk ---\n\nNow begin your reasoning:"
+                    )
                     generator = agent.run_gradio_chat(
+                        message=chunked_prompt,
                         history=history,
                         temperature=0.3,
                         max_new_tokens=1024,
                     )
                     for update in generator:
+                        try:
+                            if isinstance(update, list):
+                                cleaned = [msg for msg in update if hasattr(msg, 'role') and hasattr(msg, 'content')]
+                                if cleaned:
+                                    yield cleaned
+                            elif isinstance(update, str):
+                                yield sanitize_utf8(update.encode("utf-8", "replace").decode("utf-8"))
+                        except Exception as update_error:
+                            print(f"Error processing update: {update_error}")
+                            continue
             except Exception as chat_error:
                 print(f"Chat handling error: {chat_error}")
             ["Is there anything abnormal in the attached blood work report?"]
         ], inputs=message_input)
+    return demo