import time import tracemalloc from PIL import Image import moondream as md from moondream.preprocess import create_patches MODEL_PATH = "../../onnx/out/moondream-latest-int4.bin" class Colors: HEADER = "\033[95m" # Purple BLUE = "\033[94m" GREEN = "\033[92m" YELLOW = "\033[93m" RED = "\033[91m" ENDC = "\033[0m" BOLD = "\033[1m" def format_memory(memory_mb): """Format memory size with appropriate unit""" if memory_mb < 1024: return f"{memory_mb:.2f} MB" else: return f"{memory_mb/1024:.2f} GB" def print_section(title): """Print a section header with dynamic padding to center the text""" total_width = 65 text_length = len(title) + 2 # Add 2 for spaces around title total_padding = total_width - text_length left_padding = total_padding // 2 right_padding = total_padding - left_padding print( f"\n{Colors.HEADER}{Colors.BOLD}{'-'*left_padding} {title} {'-'*right_padding}{Colors.ENDC}" ) def print_metric(label, value, color=Colors.BLUE): """Print a metric with consistent formatting""" print(f"| {color}{label}{Colors.ENDC}: {value}") def log_memory_and_time(operation_name, start_time, start_memory): """Log memory and time differences for an operation""" end_time = time.time() current_memory = get_memory_usage() time_diff = end_time - start_time memory_diff = current_memory - start_memory print("\nStats") print_metric("Time", f"{time_diff:.2f} seconds") print_metric("Memory usage", format_memory(current_memory)) # Color-code memory increase based on significance color = ( Colors.GREEN if memory_diff < 10 else Colors.YELLOW if memory_diff < 100 else Colors.RED ) print_metric("Memory increase", format_memory(memory_diff), color) return end_time, current_memory def get_memory_usage(): """Get current memory usage in MB""" current, peak = tracemalloc.get_traced_memory() return current / 1024 / 1024 # Start tracking memory tracemalloc.start() # Initial memory measurement initial_memory = get_memory_usage() print_section("Initial State") print_metric("Initial memory usage", format_memory(initial_memory)) # Load image print_section("Image Loading") start_time = time.time() start_memory = get_memory_usage() image = Image.open("../../assets/demo-1.jpg") log_memory_and_time("Image Loading", start_time, start_memory) # Initialize model print_section("Model Initialization") start_time = time.time() start_memory = get_memory_usage() model = md.VL(MODEL_PATH) log_memory_and_time("Model Initialization", start_time, start_memory) # Encode image print_section("Image Encoding") start_time = time.time() start_memory = get_memory_usage() encoded_image = model.encode_image(image) log_memory_and_time("Image Encoding", start_time, start_memory) # Generate caption print_section("Caption Generation") print(f"{Colors.BOLD}Caption:{Colors.ENDC}", end="", flush=True) start_time = time.time() start_memory = get_memory_usage() tokens = 0 for tok in model.caption(encoded_image, stream=True)["caption"]: print(tok, end="", flush=True) tokens += 1 print() end_time, end_memory = log_memory_and_time("Caption Stats", start_time, start_memory) print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s") # Generate answer to question question = "How many people are in this image? Answer briefly." print_section("Question Answering") print(f"{Colors.BOLD}Question:{Colors.ENDC} {question}") print(f"{Colors.BOLD}Answer:{Colors.ENDC}", end="", flush=True) start_time = time.time() start_memory = get_memory_usage() tokens = 0 for tok in model.query(encoded_image, question, stream=True)["answer"]: print(tok, end="", flush=True) tokens += 1 print() end_time, end_memory = log_memory_and_time( "Question Answering Stats", start_time, start_memory ) print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s") # Final summary print_section("Final Summary") final_memory = get_memory_usage() current, peak = tracemalloc.get_traced_memory() print_metric("Final memory usage", format_memory(final_memory)) print_metric("Total memory increase", format_memory(final_memory - initial_memory)) print_metric("Peak memory usage", format_memory(peak / 1024 / 1024)) # Stop tracking memory tracemalloc.stop()