Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
import pandas as pd
|
4 |
import json
|
5 |
import gradio as gr
|
6 |
-
from typing import List, Tuple, Union, Generator
|
7 |
import hashlib
|
8 |
import shutil
|
9 |
import re
|
@@ -25,7 +25,7 @@ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
|
|
25 |
os.environ["HF_HOME"] = model_cache_dir
|
26 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
27 |
|
28 |
-
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src"))
|
29 |
from txagent.txagent import TxAgent
|
30 |
|
31 |
MAX_MODEL_TOKENS = 32768
|
@@ -42,17 +42,30 @@ def clean_response(text: str) -> str:
|
|
42 |
def estimate_tokens(text: str) -> int:
|
43 |
return len(text) // 3.5 + 1
|
44 |
|
45 |
-
def extract_text_from_excel(file_obj: Union[str, os.PathLike,
|
|
|
46 |
all_text = []
|
47 |
try:
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
raise ValueError(f"❌ Error reading Excel file: {e}")
|
|
|
51 |
for sheet_name in xls.sheet_names:
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
return "\n".join(all_text)
|
57 |
|
58 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
@@ -95,7 +108,16 @@ Respond in well-structured bullet points with medical reasoning.
|
|
95 |
def init_agent():
|
96 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
97 |
if not os.path.exists(tool_path):
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
agent = TxAgent(
|
100 |
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
101 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
@@ -108,20 +130,14 @@ def init_agent():
|
|
108 |
agent.init_model()
|
109 |
return agent
|
110 |
|
111 |
-
def stream_report(agent, input_file: Union[str,
|
112 |
accumulated_text = ""
|
113 |
try:
|
114 |
if input_file is None:
|
115 |
yield "❌ Please upload a valid Excel file.", None, ""
|
116 |
return
|
117 |
|
118 |
-
|
119 |
-
text = extract_text_from_excel(input_file)
|
120 |
-
elif isinstance(input_file, str) and os.path.exists(input_file):
|
121 |
-
text = extract_text_from_excel(input_file)
|
122 |
-
else:
|
123 |
-
raise ValueError("❌ Invalid or missing file.")
|
124 |
-
|
125 |
chunks = split_text_into_chunks(text)
|
126 |
|
127 |
for i, chunk in enumerate(chunks):
|
@@ -221,7 +237,13 @@ if __name__ == "__main__":
|
|
221 |
try:
|
222 |
agent = init_agent()
|
223 |
demo = create_ui(agent)
|
224 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
except Exception as e:
|
226 |
-
print(f"Error: {str(e)}")
|
227 |
-
sys.exit(1)
|
|
|
3 |
import pandas as pd
|
4 |
import json
|
5 |
import gradio as gr
|
6 |
+
from typing import List, Tuple, Union, Generator, BinaryIO
|
7 |
import hashlib
|
8 |
import shutil
|
9 |
import re
|
|
|
25 |
os.environ["HF_HOME"] = model_cache_dir
|
26 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
27 |
|
28 |
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src"))
|
29 |
from txagent.txagent import TxAgent
|
30 |
|
31 |
MAX_MODEL_TOKENS = 32768
|
|
|
42 |
def estimate_tokens(text: str) -> int:
|
43 |
return len(text) // 3.5 + 1
|
44 |
|
45 |
+
def extract_text_from_excel(file_obj: Union[str, os.PathLike, BinaryIO]) -> str:
|
46 |
+
"""Extract text from Excel file which can be a path, file-like object, or Gradio file object."""
|
47 |
all_text = []
|
48 |
try:
|
49 |
+
# Handle Gradio file object which has .name attribute
|
50 |
+
if hasattr(file_obj, 'name'):
|
51 |
+
file_path = file_obj.name
|
52 |
+
else:
|
53 |
+
file_path = file_obj
|
54 |
+
|
55 |
+
xls = pd.ExcelFile(file_path)
|
56 |
except Exception as e:
|
57 |
raise ValueError(f"❌ Error reading Excel file: {e}")
|
58 |
+
|
59 |
for sheet_name in xls.sheet_names:
|
60 |
+
try:
|
61 |
+
df = xls.parse(sheet_name).astype(str).fillna("")
|
62 |
+
rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
|
63 |
+
sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
|
64 |
+
all_text.extend(sheet_text)
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Warning: Could not parse sheet {sheet_name}: {e}")
|
67 |
+
continue
|
68 |
+
|
69 |
return "\n".join(all_text)
|
70 |
|
71 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
|
|
108 |
def init_agent():
|
109 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
110 |
if not os.path.exists(tool_path):
|
111 |
+
# Create default tool file if it doesn't exist
|
112 |
+
default_tool = {
|
113 |
+
"name": "new_tool",
|
114 |
+
"description": "Default tool configuration",
|
115 |
+
"version": "1.0",
|
116 |
+
"tools": []
|
117 |
+
}
|
118 |
+
with open(tool_path, 'w') as f:
|
119 |
+
json.dump(default_tool, f)
|
120 |
+
|
121 |
agent = TxAgent(
|
122 |
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
123 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
|
|
130 |
agent.init_model()
|
131 |
return agent
|
132 |
|
133 |
+
def stream_report(agent, input_file: Union[str, BinaryIO], full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
|
134 |
accumulated_text = ""
|
135 |
try:
|
136 |
if input_file is None:
|
137 |
yield "❌ Please upload a valid Excel file.", None, ""
|
138 |
return
|
139 |
|
140 |
+
text = extract_text_from_excel(input_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
chunks = split_text_into_chunks(text)
|
142 |
|
143 |
for i, chunk in enumerate(chunks):
|
|
|
237 |
try:
|
238 |
agent = init_agent()
|
239 |
demo = create_ui(agent)
|
240 |
+
demo.launch(
|
241 |
+
server_name="0.0.0.0",
|
242 |
+
server_port=7860,
|
243 |
+
allowed_paths=["/data/hf_cache/reports"],
|
244 |
+
share=True,
|
245 |
+
show_error=True
|
246 |
+
)
|
247 |
except Exception as e:
|
248 |
+
print(f"Error: {str(e)}", file=sys.stderr)
|
249 |
+
sys.exit(1)
|