File size: 4,402 Bytes
d3cd5c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import time
import tracemalloc

from PIL import Image

import moondream as md
from moondream.preprocess import create_patches

MODEL_PATH = "../../onnx/out/moondream-latest-int4.bin"


class Colors:
    HEADER = "\033[95m"  # Purple
    BLUE = "\033[94m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    ENDC = "\033[0m"
    BOLD = "\033[1m"


def format_memory(memory_mb):
    """Format memory size with appropriate unit"""
    if memory_mb < 1024:
        return f"{memory_mb:.2f} MB"
    else:
        return f"{memory_mb/1024:.2f} GB"


def print_section(title):
    """Print a section header with dynamic padding to center the text"""
    total_width = 65
    text_length = len(title) + 2  # Add 2 for spaces around title
    total_padding = total_width - text_length
    left_padding = total_padding // 2
    right_padding = total_padding - left_padding
    print(
        f"\n{Colors.HEADER}{Colors.BOLD}{'-'*left_padding} {title} {'-'*right_padding}{Colors.ENDC}"
    )


def print_metric(label, value, color=Colors.BLUE):
    """Print a metric with consistent formatting"""
    print(f"| {color}{label}{Colors.ENDC}: {value}")


def log_memory_and_time(operation_name, start_time, start_memory):
    """Log memory and time differences for an operation"""
    end_time = time.time()
    current_memory = get_memory_usage()
    time_diff = end_time - start_time
    memory_diff = current_memory - start_memory

    print("\nStats")
    print_metric("Time", f"{time_diff:.2f} seconds")
    print_metric("Memory usage", format_memory(current_memory))

    # Color-code memory increase based on significance
    color = (
        Colors.GREEN
        if memory_diff < 10
        else Colors.YELLOW if memory_diff < 100 else Colors.RED
    )
    print_metric("Memory increase", format_memory(memory_diff), color)

    return end_time, current_memory


def get_memory_usage():
    """Get current memory usage in MB"""
    current, peak = tracemalloc.get_traced_memory()
    return current / 1024 / 1024


# Start tracking memory
tracemalloc.start()

# Initial memory measurement
initial_memory = get_memory_usage()
print_section("Initial State")
print_metric("Initial memory usage", format_memory(initial_memory))

# Load image
print_section("Image Loading")
start_time = time.time()
start_memory = get_memory_usage()
image = Image.open("../../assets/demo-1.jpg")
log_memory_and_time("Image Loading", start_time, start_memory)

# Initialize model
print_section("Model Initialization")
start_time = time.time()
start_memory = get_memory_usage()
model = md.VL(MODEL_PATH)
log_memory_and_time("Model Initialization", start_time, start_memory)

# Encode image
print_section("Image Encoding")
start_time = time.time()
start_memory = get_memory_usage()
encoded_image = model.encode_image(image)
log_memory_and_time("Image Encoding", start_time, start_memory)

# Generate caption
print_section("Caption Generation")
print(f"{Colors.BOLD}Caption:{Colors.ENDC}", end="", flush=True)
start_time = time.time()
start_memory = get_memory_usage()
tokens = 0
for tok in model.caption(encoded_image, stream=True)["caption"]:
    print(tok, end="", flush=True)
    tokens += 1
print()
end_time, end_memory = log_memory_and_time("Caption Stats", start_time, start_memory)
print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s")

# Generate answer to question
question = "How many people are in this image? Answer briefly."
print_section("Question Answering")
print(f"{Colors.BOLD}Question:{Colors.ENDC} {question}")
print(f"{Colors.BOLD}Answer:{Colors.ENDC}", end="", flush=True)
start_time = time.time()
start_memory = get_memory_usage()
tokens = 0
for tok in model.query(encoded_image, question, stream=True)["answer"]:
    print(tok, end="", flush=True)
    tokens += 1
print()
end_time, end_memory = log_memory_and_time(
    "Question Answering Stats", start_time, start_memory
)
print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s")

# Final summary
print_section("Final Summary")
final_memory = get_memory_usage()
current, peak = tracemalloc.get_traced_memory()

print_metric("Final memory usage", format_memory(final_memory))
print_metric("Total memory increase", format_memory(final_memory - initial_memory))
print_metric("Peak memory usage", format_memory(peak / 1024 / 1024))

# Stop tracking memory
tracemalloc.stop()