Update app.py
Browse files
app.py
CHANGED
@@ -24,7 +24,7 @@ report_dir = os.path.join(persistent_dir, "reports")
|
|
24 |
for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
|
25 |
os.makedirs(directory, exist_ok=True)
|
26 |
|
27 |
-
os.environ["HF_HOME"] = model_cache_dir
|
28 |
|
29 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
30 |
src_path = os.path.abspath(os.path.join(current_dir, "src"))
|
@@ -37,7 +37,7 @@ MAX_MODEL_TOKENS = 131072 # TxAgent's max token limit
|
|
37 |
MAX_CHUNK_TOKENS = 32768 # Larger chunks to reduce number of chunks
|
38 |
MAX_NEW_TOKENS = 512 # Optimized for fast generation
|
39 |
PROMPT_OVERHEAD = 500 # Estimated tokens for prompt template
|
40 |
-
MAX_CONCURRENT =
|
41 |
|
42 |
# Initialize tokenizer for precise token counting
|
43 |
try:
|
@@ -64,7 +64,7 @@ def estimate_tokens(text: str) -> int:
|
|
64 |
"""Estimate tokens using tokenizer if available, else fall back to heuristic."""
|
65 |
if tokenizer:
|
66 |
return len(tokenizer.encode(text, add_special_tokens=False))
|
67 |
-
return len(text) // 3.5 + 1
|
68 |
|
69 |
def extract_text_from_excel(file_path: str) -> str:
|
70 |
"""Extract text from all sheets in an Excel file."""
|
@@ -153,7 +153,7 @@ def init_agent():
|
|
153 |
return agent
|
154 |
|
155 |
async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
|
156 |
-
"""Process a single chunk
|
157 |
logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
|
158 |
prompt = build_prompt_from_text(chunk)
|
159 |
prompt_tokens = estimate_tokens(prompt)
|
@@ -280,7 +280,7 @@ async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]])
|
|
280 |
message=final_prompt,
|
281 |
history=[],
|
282 |
temperature=0.2,
|
283 |
-
max_new_tokens=MAX_NEW_TOKENS * 2,
|
284 |
max_token=MAX_MODEL_TOKENS,
|
285 |
call_agent=False,
|
286 |
conversation=[],
|
|
|
24 |
for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
|
25 |
os.makedirs(directory, exist_ok=True)
|
26 |
|
27 |
+
os.environ["HF_HOME"] = model_cache_dir
|
28 |
|
29 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
30 |
src_path = os.path.abspath(os.path.join(current_dir, "src"))
|
|
|
37 |
MAX_CHUNK_TOKENS = 32768 # Larger chunks to reduce number of chunks
|
38 |
MAX_NEW_TOKENS = 512 # Optimized for fast generation
|
39 |
PROMPT_OVERHEAD = 500 # Estimated tokens for prompt template
|
40 |
+
MAX_CONCURRENT = 4 # Reduced concurrency to avoid vLLM socket issues
|
41 |
|
42 |
# Initialize tokenizer for precise token counting
|
43 |
try:
|
|
|
64 |
"""Estimate tokens using tokenizer if available, else fall back to heuristic."""
|
65 |
if tokenizer:
|
66 |
return len(tokenizer.encode(text, add_special_tokens=False))
|
67 |
+
return len(text) // 3.5 + 1
|
68 |
|
69 |
def extract_text_from_excel(file_path: str) -> str:
|
70 |
"""Extract text from all sheets in an Excel file."""
|
|
|
153 |
return agent
|
154 |
|
155 |
async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
|
156 |
+
"""Process a single chunk with enhanced error handling."""
|
157 |
logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
|
158 |
prompt = build_prompt_from_text(chunk)
|
159 |
prompt_tokens = estimate_tokens(prompt)
|
|
|
280 |
message=final_prompt,
|
281 |
history=[],
|
282 |
temperature=0.2,
|
283 |
+
max_new_tokens=MAX_NEW_TOKENS * 2,
|
284 |
max_token=MAX_MODEL_TOKENS,
|
285 |
call_agent=False,
|
286 |
conversation=[],
|