lvwerra HF staff commited on
Commit
1c2f6af
·
verified ·
1 Parent(s): 41efdb8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ import matplotlib.patches as patches
4
+ import math
5
+
6
+ plt.rcParams['axes.prop_cycle'] = plt.cycler(color=["#FF6600","#FFBE00","#496767", "#87A19E", "#FF9200", "#0F3538","#F8E08E", "#0F2021","#FAFAF0"])
7
+
8
+ def visualize_cluster(nodes, tp, pp, node_spacing=0.5, gpu_spacing=0.1, nodes_per_row=8):
9
+ gpus_per_row = 2
10
+ gpus_per_column = 4
11
+
12
+ nnodes_x = min(nodes, nodes_per_row)
13
+ nnodes_y = math.ceil(nodes/nodes_per_row)
14
+
15
+ fig, ax = plt.subplots(figsize=(2*nnodes_x, 2*nnodes_y), dpi=200)
16
+
17
+ ax.set_xlim(-node_spacing, 2*nnodes_x + nnodes_x*gpu_spacing + (nnodes_x-1)*node_spacing+ node_spacing)
18
+ ax.set_ylim(-node_spacing, 4*nnodes_y + nnodes_y*3*gpu_spacing + (nnodes_y-1)*node_spacing+ node_spacing)# gpus_per_column *(1 + 2*gpu_spacing))
19
+ ax.set_xticks([])
20
+ ax.set_yticks([])
21
+ ax.grid(False)
22
+ ax.set_aspect('equal', 'box')
23
+ ax.invert_yaxis()
24
+
25
+ model_instance = tp*pp
26
+ dp = (nodes*gpus_per_column*gpus_per_row)//model_instance
27
+ max_gpu_usage = dp*model_instance
28
+ gpu_i = 0
29
+
30
+ # Draw nodes with spacing
31
+ for node in range(nodes):
32
+ node_x = (node%nodes_per_row) * (gpus_per_row+(gpus_per_row-1)*gpu_spacing+node_spacing)
33
+ node_y = (node//nodes_per_row) * (gpus_per_column+(gpus_per_column-1)*gpu_spacing+node_spacing)
34
+
35
+ # Draw GPUs within the node with spacing (2 rows x 4 columns)
36
+ for j in range(gpus_per_column):
37
+ for i in range(gpus_per_row):
38
+
39
+ model_instance_group = gpu_i//model_instance
40
+ tp_instance_group = (gpu_i%model_instance)//tp
41
+
42
+ alpha = (1+tp_instance_group)/pp
43
+ if pp==1:
44
+ alpha=1
45
+
46
+ x = node_x + i * (1 + gpu_spacing)
47
+ y = node_y + j * (1 + gpu_spacing)
48
+
49
+ color = f'C{model_instance_group}'
50
+
51
+ if gpu_i >=max_gpu_usage:
52
+ color="black"
53
+ alpha=1
54
+
55
+ rect = patches.Rectangle((x, y), 1, 1, linewidth=1, edgecolor='black', facecolor=color, alpha=alpha)
56
+ ax.add_patch(rect)
57
+ if pp>1:
58
+ ax.annotate(f"{tp_instance_group+1}", (x+0.5, y+0.5), color='black', weight='bold', fontsize=9, ha='center', va='center')
59
+ gpu_i += 1
60
+ plt.tight_layout()
61
+
62
+ md = f"## Resulting configuration\
63
+
64
+ TP={tp}, PP={pp}, DP={dp}, where one model instance requires {model_instance} GPUs and {nodes*8-max_gpu_usage} GPUs (in black) cannot be utilized in this configuration. Numbers and color shades indicate pipeline stage if PP>1."
65
+
66
+ return md, fig
67
+
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("# 3D Parallelism")
70
+
71
+ gr.Markdown("## 3D Configuration")
72
+ with gr.Row():
73
+
74
+ tp = gr.Number(value=4, label="Tensor Parallel")
75
+ pp = gr.Number(value=2, label="Pipeline Parallel")
76
+ nodes = gr.Number(value=8, label="Number of Compute Nodes")
77
+ nodes_per_row = gr.Number(value=8, label="Number Nodes per Row")
78
+
79
+ button = gr.Button("Compute!")
80
+
81
+ with gr.Row():
82
+ with gr.Column():
83
+ md = gr.Markdown("## Resulting configuration:")
84
+ plot = gr.Plot(value=plt)
85
+
86
+ button.click(fn=visualize_cluster, inputs=[tp, pp, nodes, nodes_per_row], outputs=[md, plot])
87
+ demo.load(fn=visualize_cluster, inputs=[tp, pp, nodes, nodes_per_row], outputs=[md, plot])
88
+ demo.launch()