File size: 1,118 Bytes
028eb6e
 
 
 
3a5efa8
 
 
 
028eb6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

MAX_CHARS_PER_CHUNK = 10000  # Approximate 8k tokens limit (very rough estimate)


def format_code_summary(code: str, summary: str) -> str:
    """Format code and summary into a single string"""
    return f"CODE:\n{code}\n\nSUMMARY:\n{summary}"

def truncate_chunk(text: str, max_chars: int = MAX_CHARS_PER_CHUNK) -> str:
    """Truncate text to approximate token limit while preserving structure"""
    if len(text) <= max_chars:
        return text
    
    # Split into CODE and SUMMARY sections
    parts = text.split("\nSUMMARY:\n")
    if len(parts) != 2:
        # If structure not found, just truncate
        return text[:max_chars]
    
    code, summary = parts
    
    # Calculate available space for each section (proportionally)
    total_len = len(code) + len(summary)
    code_ratio = len(code) / total_len
    
    # Allocate characters proportionally
    code_chars = int(max_chars * code_ratio)
    summary_chars = max_chars - code_chars
    
    truncated_code = code[:code_chars]
    truncated_summary = summary[:summary_chars]
    
    return f"{truncated_code}\nSUMMARY:\n{truncated_summary}"