CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 23

Commit

4bfbcac

verified ·

1 Parent(s): dc9cc58

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ report_dir = os.path.join(persistent_dir, "reports")
 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
     os.makedirs(directory, exist_ok=True)
-os.environ["HF_HOME"] = model_cache_dir  # Using HF_HOME as specified
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
@@ -37,7 +37,7 @@ MAX_MODEL_TOKENS = 131072  # TxAgent's max token limit
 MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
 MAX_NEW_TOKENS = 512       # Optimized for fast generation
 PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
-MAX_CONCURRENT = 8         # High concurrency for A100 80GB
 # Initialize tokenizer for precise token counting
 try:
@@ -64,7 +64,7 @@ def estimate_tokens(text: str) -> int:
     """Estimate tokens using tokenizer if available, else fall back to heuristic."""
     if tokenizer:
         return len(tokenizer.encode(text, add_special_tokens=False))
-    return len(text) // 3.5 + 1  # Consistent with your heuristic
 def extract_text_from_excel(file_path: str) -> str:
     """Extract text from all sheets in an Excel file."""
@@ -153,7 +153,7 @@ def init_agent():
     return agent
 async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
-    """Process a single chunk and return index, response, and status message."""
     logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
     prompt = build_prompt_from_text(chunk)
     prompt_tokens = estimate_tokens(prompt)
@@ -280,7 +280,7 @@ async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]])
                 message=final_prompt,
                 history=[],
                 temperature=0.2,
-                max_new_tokens=MAX_NEW_TOKENS * 2,  # Allow more tokens for summary, as in your code
                 max_token=MAX_MODEL_TOKENS,
                 call_agent=False,
                 conversation=[],

 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
     os.makedirs(directory, exist_ok=True)
+os.environ["HF_HOME"] = model_cache_dir
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
 MAX_NEW_TOKENS = 512       # Optimized for fast generation
 PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
+MAX_CONCURRENT = 4         # Reduced concurrency to avoid vLLM socket issues
 # Initialize tokenizer for precise token counting
 try:
     """Estimate tokens using tokenizer if available, else fall back to heuristic."""
     if tokenizer:
         return len(tokenizer.encode(text, add_special_tokens=False))
+    return len(text) // 3.5 + 1
 def extract_text_from_excel(file_path: str) -> str:
     """Extract text from all sheets in an Excel file."""
     return agent
 async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
+    """Process a single chunk with enhanced error handling."""
     logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
     prompt = build_prompt_from_text(chunk)
     prompt_tokens = estimate_tokens(prompt)
                 message=final_prompt,
                 history=[],
                 temperature=0.2,
+                max_new_tokens=MAX_NEW_TOKENS * 2,
                 max_token=MAX_MODEL_TOKENS,
                 call_agent=False,
                 conversation=[],