Ali2206 commited on
Commit
7061d83
·
verified ·
1 Parent(s): ca6d5de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -20
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import pandas as pd
4
  import json
5
  import gradio as gr
6
- from typing import List, Tuple, Union, Generator
7
  import hashlib
8
  import shutil
9
  import re
@@ -25,7 +25,7 @@ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
25
  os.environ["HF_HOME"] = model_cache_dir
26
  os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
27
 
28
- sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
29
  from txagent.txagent import TxAgent
30
 
31
  MAX_MODEL_TOKENS = 32768
@@ -42,17 +42,30 @@ def clean_response(text: str) -> str:
42
  def estimate_tokens(text: str) -> int:
43
  return len(text) // 3.5 + 1
44
 
45
- def extract_text_from_excel(file_obj: Union[str, os.PathLike, 'file']) -> str:
 
46
  all_text = []
47
  try:
48
- xls = pd.ExcelFile(file_obj)
 
 
 
 
 
 
49
  except Exception as e:
50
  raise ValueError(f"❌ Error reading Excel file: {e}")
 
51
  for sheet_name in xls.sheet_names:
52
- df = xls.parse(sheet_name).astype(str).fillna("")
53
- rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
54
- sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
55
- all_text.extend(sheet_text)
 
 
 
 
 
56
  return "\n".join(all_text)
57
 
58
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
@@ -95,7 +108,16 @@ Respond in well-structured bullet points with medical reasoning.
95
  def init_agent():
96
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
97
  if not os.path.exists(tool_path):
98
- shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
 
 
 
 
 
 
 
 
 
99
  agent = TxAgent(
100
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
101
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -108,20 +130,14 @@ def init_agent():
108
  agent.init_model()
109
  return agent
110
 
111
- def stream_report(agent, input_file: Union[str, 'file'], full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
112
  accumulated_text = ""
113
  try:
114
  if input_file is None:
115
  yield "❌ Please upload a valid Excel file.", None, ""
116
  return
117
 
118
- if hasattr(input_file, "read"):
119
- text = extract_text_from_excel(input_file)
120
- elif isinstance(input_file, str) and os.path.exists(input_file):
121
- text = extract_text_from_excel(input_file)
122
- else:
123
- raise ValueError("❌ Invalid or missing file.")
124
-
125
  chunks = split_text_into_chunks(text)
126
 
127
  for i, chunk in enumerate(chunks):
@@ -221,7 +237,13 @@ if __name__ == "__main__":
221
  try:
222
  agent = init_agent()
223
  demo = create_ui(agent)
224
- demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=True)
 
 
 
 
 
 
225
  except Exception as e:
226
- print(f"Error: {str(e)}")
227
- sys.exit(1)
 
3
  import pandas as pd
4
  import json
5
  import gradio as gr
6
+ from typing import List, Tuple, Union, Generator, BinaryIO
7
  import hashlib
8
  import shutil
9
  import re
 
25
  os.environ["HF_HOME"] = model_cache_dir
26
  os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
27
 
28
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src"))
29
  from txagent.txagent import TxAgent
30
 
31
  MAX_MODEL_TOKENS = 32768
 
42
  def estimate_tokens(text: str) -> int:
43
  return len(text) // 3.5 + 1
44
 
45
+ def extract_text_from_excel(file_obj: Union[str, os.PathLike, BinaryIO]) -> str:
46
+ """Extract text from Excel file which can be a path, file-like object, or Gradio file object."""
47
  all_text = []
48
  try:
49
+ # Handle Gradio file object which has .name attribute
50
+ if hasattr(file_obj, 'name'):
51
+ file_path = file_obj.name
52
+ else:
53
+ file_path = file_obj
54
+
55
+ xls = pd.ExcelFile(file_path)
56
  except Exception as e:
57
  raise ValueError(f"❌ Error reading Excel file: {e}")
58
+
59
  for sheet_name in xls.sheet_names:
60
+ try:
61
+ df = xls.parse(sheet_name).astype(str).fillna("")
62
+ rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
63
+ sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
64
+ all_text.extend(sheet_text)
65
+ except Exception as e:
66
+ print(f"Warning: Could not parse sheet {sheet_name}: {e}")
67
+ continue
68
+
69
  return "\n".join(all_text)
70
 
71
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
 
108
  def init_agent():
109
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
110
  if not os.path.exists(tool_path):
111
+ # Create default tool file if it doesn't exist
112
+ default_tool = {
113
+ "name": "new_tool",
114
+ "description": "Default tool configuration",
115
+ "version": "1.0",
116
+ "tools": []
117
+ }
118
+ with open(tool_path, 'w') as f:
119
+ json.dump(default_tool, f)
120
+
121
  agent = TxAgent(
122
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
123
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
 
130
  agent.init_model()
131
  return agent
132
 
133
+ def stream_report(agent, input_file: Union[str, BinaryIO], full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
134
  accumulated_text = ""
135
  try:
136
  if input_file is None:
137
  yield "❌ Please upload a valid Excel file.", None, ""
138
  return
139
 
140
+ text = extract_text_from_excel(input_file)
 
 
 
 
 
 
141
  chunks = split_text_into_chunks(text)
142
 
143
  for i, chunk in enumerate(chunks):
 
237
  try:
238
  agent = init_agent()
239
  demo = create_ui(agent)
240
+ demo.launch(
241
+ server_name="0.0.0.0",
242
+ server_port=7860,
243
+ allowed_paths=["/data/hf_cache/reports"],
244
+ share=True,
245
+ show_error=True
246
+ )
247
  except Exception as e:
248
+ print(f"Error: {str(e)}", file=sys.stderr)
249
+ sys.exit(1)