Ali2206 commited on
Commit
9569e68
·
verified ·
1 Parent(s): 083dc3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -6
app.py CHANGED
@@ -30,6 +30,7 @@ MAX_TOKENS = 1800
30
  BATCH_SIZE = 2
31
  MAX_WORKERS = 4
32
  CHUNK_SIZE = 10 # For PDF processing
 
33
 
34
  # Persistent directory setup
35
  persistent_dir = "/data/hf_cache"
@@ -190,13 +191,41 @@ def process_file_cached(file_path: str, file_type: str) -> List[Dict]:
190
  return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
191
 
192
  def tokenize_and_chunk(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
193
- """Optimized tokenization and chunking"""
194
  tokenizer = get_tokenizer()
195
  tokens = tokenizer.encode(text, add_special_tokens=False)
196
- return [
197
- tokenizer.decode(tokens[i:i + max_tokens])
198
- for i in range(0, len(tokens), max_tokens)
199
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  def log_system_usage(tag=""):
202
  """Optimized system monitoring"""
@@ -402,7 +431,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
402
  del extracted
403
  gc.collect()
404
 
405
- chunks = tokenize_and_chunk(text_content)
 
 
 
 
 
 
 
406
  del text_content
407
  gc.collect()
408
 
@@ -450,6 +486,10 @@ Patient Record Excerpt (Chunk {0} of {1}):
450
  seen_responses.add(quick_response)
451
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
452
  yield history, None, ""
 
 
 
 
453
  finally:
454
  del future
455
  torch.cuda.empty_cache()
@@ -475,6 +515,10 @@ Patient Record Excerpt (Chunk {0} of {1}):
475
  combined_response += clean_response(msg.content) + "\n"
476
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
477
  yield history, report_path, ""
 
 
 
 
478
  finally:
479
  del future
480
  torch.cuda.empty_cache()
 
30
  BATCH_SIZE = 2
31
  MAX_WORKERS = 4
32
  CHUNK_SIZE = 10 # For PDF processing
33
+ MODEL_MAX_TOKENS = 131072 # Model's maximum token limit
34
 
35
  # Persistent directory setup
36
  persistent_dir = "/data/hf_cache"
 
191
  return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
192
 
193
  def tokenize_and_chunk(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
194
+ """Optimized tokenization and chunking with strict token limit enforcement"""
195
  tokenizer = get_tokenizer()
196
  tokens = tokenizer.encode(text, add_special_tokens=False)
197
+ chunks = []
198
+ current_chunk = []
199
+ current_length = 0
200
+
201
+ for token in tokens:
202
+ if current_length + 1 > max_tokens:
203
+ chunks.append(tokenizer.decode(current_chunk))
204
+ current_chunk = [token]
205
+ current_length = 1
206
+ else:
207
+ current_chunk.append(token)
208
+ current_length += 1
209
+
210
+ if current_chunk:
211
+ chunks.append(tokenizer.decode(current_chunk))
212
+
213
+ # Validate total tokens
214
+ total_tokens = sum(len(tokenizer.encode(chunk, add_special_tokens=False)) for chunk in chunks)
215
+ if total_tokens > MODEL_MAX_TOKENS:
216
+ logger.warning(f"Total tokens ({total_tokens}) exceed model limit ({MODEL_MAX_TOKENS}). Truncating.")
217
+ truncated_chunks = []
218
+ current_tokens = 0
219
+ for chunk in chunks:
220
+ chunk_tokens = len(tokenizer.encode(chunk, add_special_tokens=False))
221
+ if current_tokens + chunk_tokens <= MODEL_MAX_TOKENS:
222
+ truncated_chunks.append(chunk)
223
+ current_tokens += chunk_tokens
224
+ else:
225
+ break
226
+ chunks = truncated_chunks
227
+
228
+ return chunks
229
 
230
  def log_system_usage(tag=""):
231
  """Optimized system monitoring"""
 
431
  del extracted
432
  gc.collect()
433
 
434
+ try:
435
+ chunks = tokenize_and_chunk(text_content)
436
+ except Exception as e:
437
+ logger.error(f"Tokenization error: {e}")
438
+ history.append({"role": "assistant", "content": f"❌ Error: Input too large to process. Please upload a smaller file."})
439
+ yield history, None, f"Error: Input too large to process."
440
+ return
441
+
442
  del text_content
443
  gc.collect()
444
 
 
486
  seen_responses.add(quick_response)
487
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
488
  yield history, None, ""
489
+ except Exception as e:
490
+ logger.error(f"Quick summary error for chunk {batch_idx + chunk_idx + 1}: {e}")
491
+ history[-1] = {"role": "assistant", "content": f"Error processing chunk {batch_idx + chunk_idx + 1}: {str(e)}"}
492
+ yield history, None, ""
493
  finally:
494
  del future
495
  torch.cuda.empty_cache()
 
515
  combined_response += clean_response(msg.content) + "\n"
516
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
517
  yield history, report_path, ""
518
+ except Exception as e:
519
+ logger.error(f"Detailed analysis error for chunk {batch_idx + chunk_idx + 1}: {e}")
520
+ history[-1] = {"role": "assistant", "content": f"Error in detailed analysis for chunk {batch_idx + chunk_idx + 1}: {str(e)}"}
521
+ yield history, None, ""
522
  finally:
523
  del future
524
  torch.cuda.empty_cache()