Update ui/ui_core.py
Browse files- ui/ui_core.py +15 -13
ui/ui_core.py
CHANGED
@@ -12,6 +12,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
|
|
12 |
from txagent.txagent import TxAgent
|
13 |
|
14 |
def sanitize_utf8(text: str) -> str:
|
|
|
15 |
clean_text = re.sub(r'[\ud800-\udfff]', '', text)
|
16 |
return clean_text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
17 |
|
@@ -35,10 +36,11 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
|
|
35 |
line = " | ".join(str(cell) for cell in row if pd.notna(cell))
|
36 |
if line:
|
37 |
lines.append(line)
|
38 |
-
|
|
|
39 |
|
40 |
except Exception as e:
|
41 |
-
return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
|
42 |
|
43 |
def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
|
44 |
try:
|
@@ -56,14 +58,15 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
|
|
56 |
progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
|
57 |
except Exception as e:
|
58 |
extracted.append(f"[Error reading page {i+1}]: {str(e)}")
|
59 |
-
|
|
|
60 |
|
61 |
except Exception as e:
|
62 |
-
return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
|
63 |
|
64 |
def create_ui(agent: TxAgent):
|
65 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
66 |
-
gr.Markdown("<h1 style='text-align: center;'
|
67 |
chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
|
68 |
|
69 |
file_upload = gr.File(
|
@@ -100,14 +103,13 @@ def create_ui(agent: TxAgent):
|
|
100 |
elif path.endswith(".pdf"):
|
101 |
extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
|
102 |
else:
|
103 |
-
extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
|
104 |
except Exception as file_error:
|
105 |
-
extracted_text += f"[Error processing file: {os.path.basename(path)}] โ {str(file_error)}\n"
|
106 |
continue
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
f"{context}\n\n--- Uploaded File Content ---\n\n{sanitized}\n\n--- End of File ---\n\nNow begin your reasoning:"
|
111 |
)
|
112 |
|
113 |
generator = agent.run_gradio_chat(
|
@@ -131,13 +133,13 @@ def create_ui(agent: TxAgent):
|
|
131 |
and not (
|
132 |
msg.role == "assistant"
|
133 |
and hasattr(msg, 'content')
|
134 |
-
and msg.content.strip().startswith("
|
135 |
)
|
136 |
]
|
137 |
if cleaned:
|
138 |
yield cleaned
|
139 |
-
elif isinstance(update, str) and not update.strip().startswith("
|
140 |
-
yield sanitize_utf8(update
|
141 |
except Exception as update_error:
|
142 |
print(f"Error processing update: {update_error}")
|
143 |
continue
|
|
|
12 |
from txagent.txagent import TxAgent
|
13 |
|
14 |
def sanitize_utf8(text: str) -> str:
|
15 |
+
# Remove surrogate pairs and re-encode safely
|
16 |
clean_text = re.sub(r'[\ud800-\udfff]', '', text)
|
17 |
return clean_text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
18 |
|
|
|
36 |
line = " | ".join(str(cell) for cell in row if pd.notna(cell))
|
37 |
if line:
|
38 |
lines.append(line)
|
39 |
+
content = f"๐ {os.path.basename(file_path)}\n\n" + "\n".join(lines)
|
40 |
+
return sanitize_utf8(content)
|
41 |
|
42 |
except Exception as e:
|
43 |
+
return sanitize_utf8(f"[Error reading {os.path.basename(file_path)}]: {str(e)}")
|
44 |
|
45 |
def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
|
46 |
try:
|
|
|
58 |
progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
|
59 |
except Exception as e:
|
60 |
extracted.append(f"[Error reading page {i+1}]: {str(e)}")
|
61 |
+
content = f"๐ {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
|
62 |
+
return sanitize_utf8(content)
|
63 |
|
64 |
except Exception as e:
|
65 |
+
return sanitize_utf8(f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}")
|
66 |
|
67 |
def create_ui(agent: TxAgent):
|
68 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
69 |
+
gr.Markdown("<h1 style='text-align: center;'>๐ CPS: Clinical Patient Support System</h1>")
|
70 |
chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
|
71 |
|
72 |
file_upload = gr.File(
|
|
|
103 |
elif path.endswith(".pdf"):
|
104 |
extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
|
105 |
else:
|
106 |
+
extracted_text += sanitize_utf8(f"(Uploaded file: {os.path.basename(path)})\n")
|
107 |
except Exception as file_error:
|
108 |
+
extracted_text += sanitize_utf8(f"[Error processing file: {os.path.basename(path)}] โ {str(file_error)}\n")
|
109 |
continue
|
110 |
|
111 |
+
message = sanitize_utf8(
|
112 |
+
f"{context}\n\n--- Uploaded File Content ---\n\n{extracted_text.strip()}\n\n--- End of File ---\n\nNow begin your reasoning:"
|
|
|
113 |
)
|
114 |
|
115 |
generator = agent.run_gradio_chat(
|
|
|
133 |
and not (
|
134 |
msg.role == "assistant"
|
135 |
and hasattr(msg, 'content')
|
136 |
+
and sanitize_utf8(msg.content.strip()).startswith("๐ง ")
|
137 |
)
|
138 |
]
|
139 |
if cleaned:
|
140 |
yield cleaned
|
141 |
+
elif isinstance(update, str) and not sanitize_utf8(update.strip()).startswith("๐ง "):
|
142 |
+
yield sanitize_utf8(update)
|
143 |
except Exception as update_error:
|
144 |
print(f"Error processing update: {update_error}")
|
145 |
continue
|