Spaces:
Running
Running
import time | |
import tracemalloc | |
from PIL import Image | |
import moondream as md | |
from moondream.preprocess import create_patches | |
MODEL_PATH = "../../onnx/out/moondream-latest-int4.bin" | |
class Colors: | |
HEADER = "\033[95m" # Purple | |
BLUE = "\033[94m" | |
GREEN = "\033[92m" | |
YELLOW = "\033[93m" | |
RED = "\033[91m" | |
ENDC = "\033[0m" | |
BOLD = "\033[1m" | |
def format_memory(memory_mb): | |
"""Format memory size with appropriate unit""" | |
if memory_mb < 1024: | |
return f"{memory_mb:.2f} MB" | |
else: | |
return f"{memory_mb/1024:.2f} GB" | |
def print_section(title): | |
"""Print a section header with dynamic padding to center the text""" | |
total_width = 65 | |
text_length = len(title) + 2 # Add 2 for spaces around title | |
total_padding = total_width - text_length | |
left_padding = total_padding // 2 | |
right_padding = total_padding - left_padding | |
print( | |
f"\n{Colors.HEADER}{Colors.BOLD}{'-'*left_padding} {title} {'-'*right_padding}{Colors.ENDC}" | |
) | |
def print_metric(label, value, color=Colors.BLUE): | |
"""Print a metric with consistent formatting""" | |
print(f"| {color}{label}{Colors.ENDC}: {value}") | |
def log_memory_and_time(operation_name, start_time, start_memory): | |
"""Log memory and time differences for an operation""" | |
end_time = time.time() | |
current_memory = get_memory_usage() | |
time_diff = end_time - start_time | |
memory_diff = current_memory - start_memory | |
print("\nStats") | |
print_metric("Time", f"{time_diff:.2f} seconds") | |
print_metric("Memory usage", format_memory(current_memory)) | |
# Color-code memory increase based on significance | |
color = ( | |
Colors.GREEN | |
if memory_diff < 10 | |
else Colors.YELLOW if memory_diff < 100 else Colors.RED | |
) | |
print_metric("Memory increase", format_memory(memory_diff), color) | |
return end_time, current_memory | |
def get_memory_usage(): | |
"""Get current memory usage in MB""" | |
current, peak = tracemalloc.get_traced_memory() | |
return current / 1024 / 1024 | |
# Start tracking memory | |
tracemalloc.start() | |
# Initial memory measurement | |
initial_memory = get_memory_usage() | |
print_section("Initial State") | |
print_metric("Initial memory usage", format_memory(initial_memory)) | |
# Load image | |
print_section("Image Loading") | |
start_time = time.time() | |
start_memory = get_memory_usage() | |
image = Image.open("../../assets/demo-1.jpg") | |
log_memory_and_time("Image Loading", start_time, start_memory) | |
# Initialize model | |
print_section("Model Initialization") | |
start_time = time.time() | |
start_memory = get_memory_usage() | |
model = md.VL(MODEL_PATH) | |
log_memory_and_time("Model Initialization", start_time, start_memory) | |
# Encode image | |
print_section("Image Encoding") | |
start_time = time.time() | |
start_memory = get_memory_usage() | |
encoded_image = model.encode_image(image) | |
log_memory_and_time("Image Encoding", start_time, start_memory) | |
# Generate caption | |
print_section("Caption Generation") | |
print(f"{Colors.BOLD}Caption:{Colors.ENDC}", end="", flush=True) | |
start_time = time.time() | |
start_memory = get_memory_usage() | |
tokens = 0 | |
for tok in model.caption(encoded_image, stream=True)["caption"]: | |
print(tok, end="", flush=True) | |
tokens += 1 | |
print() | |
end_time, end_memory = log_memory_and_time("Caption Stats", start_time, start_memory) | |
print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s") | |
# Generate answer to question | |
question = "How many people are in this image? Answer briefly." | |
print_section("Question Answering") | |
print(f"{Colors.BOLD}Question:{Colors.ENDC} {question}") | |
print(f"{Colors.BOLD}Answer:{Colors.ENDC}", end="", flush=True) | |
start_time = time.time() | |
start_memory = get_memory_usage() | |
tokens = 0 | |
for tok in model.query(encoded_image, question, stream=True)["answer"]: | |
print(tok, end="", flush=True) | |
tokens += 1 | |
print() | |
end_time, end_memory = log_memory_and_time( | |
"Question Answering Stats", start_time, start_memory | |
) | |
print_metric("Token generation speed", f"{tokens / (end_time - start_time):.2f} tok/s") | |
# Final summary | |
print_section("Final Summary") | |
final_memory = get_memory_usage() | |
current, peak = tracemalloc.get_traced_memory() | |
print_metric("Final memory usage", format_memory(final_memory)) | |
print_metric("Total memory increase", format_memory(final_memory - initial_memory)) | |
print_metric("Peak memory usage", format_memory(peak / 1024 / 1024)) | |
# Stop tracking memory | |
tracemalloc.stop() | |