CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

9569e68

verified ·

1 Parent(s): 083dc3a

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -6

app.py CHANGED Viewed

@@ -30,6 +30,7 @@ MAX_TOKENS = 1800
 BATCH_SIZE = 2
 MAX_WORKERS = 4
 CHUNK_SIZE = 10  # For PDF processing
 # Persistent directory setup
 persistent_dir = "/data/hf_cache"
@@ -190,13 +191,41 @@ def process_file_cached(file_path: str, file_type: str) -> List[Dict]:
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
 def tokenize_and_chunk(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
-    """Optimized tokenization and chunking"""
     tokenizer = get_tokenizer()
     tokens = tokenizer.encode(text, add_special_tokens=False)
-    return [
-        tokenizer.decode(tokens[i:i + max_tokens])
-        for i in range(0, len(tokens), max_tokens)
-    ]
 def log_system_usage(tag=""):
     """Optimized system monitoring"""
@@ -402,7 +431,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
             del extracted
             gc.collect()
-            chunks = tokenize_and_chunk(text_content)
             del text_content
             gc.collect()
@@ -450,6 +486,10 @@ Patient Record Excerpt (Chunk {0} of {1}):
                                         seen_responses.add(quick_response)
                                         history[-1] = {"role": "assistant", "content": combined_response.strip()}
                                         yield history, None, ""
                             finally:
                                 del future
                                 torch.cuda.empty_cache()
@@ -475,6 +515,10 @@ Patient Record Excerpt (Chunk {0} of {1}):
                                                 combined_response += clean_response(msg.content) + "\n"
                                                 history[-1] = {"role": "assistant", "content": combined_response.strip()}
                                                 yield history, report_path, ""
                             finally:
                                 del future
                                 torch.cuda.empty_cache()

 BATCH_SIZE = 2
 MAX_WORKERS = 4
 CHUNK_SIZE = 10  # For PDF processing
+MODEL_MAX_TOKENS = 131072  # Model's maximum token limit
 # Persistent directory setup
 persistent_dir = "/data/hf_cache"
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
 def tokenize_and_chunk(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
+    """Optimized tokenization and chunking with strict token limit enforcement"""
     tokenizer = get_tokenizer()
     tokens = tokenizer.encode(text, add_special_tokens=False)
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for token in tokens:
+        if current_length + 1 > max_tokens:
+            chunks.append(tokenizer.decode(current_chunk))
+            current_chunk = [token]
+            current_length = 1
+        else:
+            current_chunk.append(token)
+            current_length += 1
+    if current_chunk:
+        chunks.append(tokenizer.decode(current_chunk))
+    # Validate total tokens
+    total_tokens = sum(len(tokenizer.encode(chunk, add_special_tokens=False)) for chunk in chunks)
+    if total_tokens > MODEL_MAX_TOKENS:
+        logger.warning(f"Total tokens ({total_tokens}) exceed model limit ({MODEL_MAX_TOKENS}). Truncating.")
+        truncated_chunks = []
+        current_tokens = 0
+        for chunk in chunks:
+            chunk_tokens = len(tokenizer.encode(chunk, add_special_tokens=False))
+            if current_tokens + chunk_tokens <= MODEL_MAX_TOKENS:
+                truncated_chunks.append(chunk)
+                current_tokens += chunk_tokens
+            else:
+                break
+        chunks = truncated_chunks
+    return chunks
 def log_system_usage(tag=""):
     """Optimized system monitoring"""
             del extracted
             gc.collect()
+            try:
+                chunks = tokenize_and_chunk(text_content)
+            except Exception as e:
+                logger.error(f"Tokenization error: {e}")
+                history.append({"role": "assistant", "content": f"❌ Error: Input too large to process. Please upload a smaller file."})
+                yield history, None, f"Error: Input too large to process."
+                return
             del text_content
             gc.collect()
                                         seen_responses.add(quick_response)
                                         history[-1] = {"role": "assistant", "content": combined_response.strip()}
                                         yield history, None, ""
+                            except Exception as e:
+                                logger.error(f"Quick summary error for chunk {batch_idx + chunk_idx + 1}: {e}")
+                                history[-1] = {"role": "assistant", "content": f"Error processing chunk {batch_idx + chunk_idx + 1}: {str(e)}"}
+                                yield history, None, ""
                             finally:
                                 del future
                                 torch.cuda.empty_cache()
                                                 combined_response += clean_response(msg.content) + "\n"
                                                 history[-1] = {"role": "assistant", "content": combined_response.strip()}
                                                 yield history, report_path, ""
+                            except Exception as e:
+                                logger.error(f"Detailed analysis error for chunk {batch_idx + chunk_idx + 1}: {e}")
+                                history[-1] = {"role": "assistant", "content": f"Error in detailed analysis for chunk {batch_idx + chunk_idx + 1}: {str(e)}"}
+                                yield history, None, ""
                             finally:
                                 del future
                                 torch.cuda.empty_cache()