MAX_CHARS_PER_CHUNK = 10000 # Approximate 8k tokens limit (very rough estimate) def format_code_summary(code: str, summary: str) -> str: """Format code and summary into a single string""" return f"CODE:\n{code}\n\nSUMMARY:\n{summary}" def truncate_chunk(text: str, max_chars: int = MAX_CHARS_PER_CHUNK) -> str: """Truncate text to approximate token limit while preserving structure""" if len(text) <= max_chars: return text # Split into CODE and SUMMARY sections parts = text.split("\nSUMMARY:\n") if len(parts) != 2: # If structure not found, just truncate return text[:max_chars] code, summary = parts # Calculate available space for each section (proportionally) total_len = len(code) + len(summary) code_ratio = len(code) / total_len # Allocate characters proportionally code_chars = int(max_chars * code_ratio) summary_chars = max_chars - code_chars truncated_code = code[:code_chars] truncated_summary = summary[:summary_chars] return f"{truncated_code}\nSUMMARY:\n{truncated_summary}"