Ali2206 commited on
Commit
4bfbcac
·
verified ·
1 Parent(s): dc9cc58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -24,7 +24,7 @@ report_dir = os.path.join(persistent_dir, "reports")
24
  for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
25
  os.makedirs(directory, exist_ok=True)
26
 
27
- os.environ["HF_HOME"] = model_cache_dir # Using HF_HOME as specified
28
 
29
  current_dir = os.path.dirname(os.path.abspath(__file__))
30
  src_path = os.path.abspath(os.path.join(current_dir, "src"))
@@ -37,7 +37,7 @@ MAX_MODEL_TOKENS = 131072 # TxAgent's max token limit
37
  MAX_CHUNK_TOKENS = 32768 # Larger chunks to reduce number of chunks
38
  MAX_NEW_TOKENS = 512 # Optimized for fast generation
39
  PROMPT_OVERHEAD = 500 # Estimated tokens for prompt template
40
- MAX_CONCURRENT = 8 # High concurrency for A100 80GB
41
 
42
  # Initialize tokenizer for precise token counting
43
  try:
@@ -64,7 +64,7 @@ def estimate_tokens(text: str) -> int:
64
  """Estimate tokens using tokenizer if available, else fall back to heuristic."""
65
  if tokenizer:
66
  return len(tokenizer.encode(text, add_special_tokens=False))
67
- return len(text) // 3.5 + 1 # Consistent with your heuristic
68
 
69
  def extract_text_from_excel(file_path: str) -> str:
70
  """Extract text from all sheets in an Excel file."""
@@ -153,7 +153,7 @@ def init_agent():
153
  return agent
154
 
155
  async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
156
- """Process a single chunk and return index, response, and status message."""
157
  logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
158
  prompt = build_prompt_from_text(chunk)
159
  prompt_tokens = estimate_tokens(prompt)
@@ -280,7 +280,7 @@ async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]])
280
  message=final_prompt,
281
  history=[],
282
  temperature=0.2,
283
- max_new_tokens=MAX_NEW_TOKENS * 2, # Allow more tokens for summary, as in your code
284
  max_token=MAX_MODEL_TOKENS,
285
  call_agent=False,
286
  conversation=[],
 
24
  for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
25
  os.makedirs(directory, exist_ok=True)
26
 
27
+ os.environ["HF_HOME"] = model_cache_dir
28
 
29
  current_dir = os.path.dirname(os.path.abspath(__file__))
30
  src_path = os.path.abspath(os.path.join(current_dir, "src"))
 
37
  MAX_CHUNK_TOKENS = 32768 # Larger chunks to reduce number of chunks
38
  MAX_NEW_TOKENS = 512 # Optimized for fast generation
39
  PROMPT_OVERHEAD = 500 # Estimated tokens for prompt template
40
+ MAX_CONCURRENT = 4 # Reduced concurrency to avoid vLLM socket issues
41
 
42
  # Initialize tokenizer for precise token counting
43
  try:
 
64
  """Estimate tokens using tokenizer if available, else fall back to heuristic."""
65
  if tokenizer:
66
  return len(tokenizer.encode(text, add_special_tokens=False))
67
+ return len(text) // 3.5 + 1
68
 
69
  def extract_text_from_excel(file_path: str) -> str:
70
  """Extract text from all sheets in an Excel file."""
 
153
  return agent
154
 
155
  async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
156
+ """Process a single chunk with enhanced error handling."""
157
  logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
158
  prompt = build_prompt_from_text(chunk)
159
  prompt_tokens = estimate_tokens(prompt)
 
280
  message=final_prompt,
281
  history=[],
282
  temperature=0.2,
283
+ max_new_tokens=MAX_NEW_TOKENS * 2,
284
  max_token=MAX_MODEL_TOKENS,
285
  call_agent=False,
286
  conversation=[],