Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,8 @@ beta = 0.283
|
|
12 |
Bn = 10**9
|
13 |
|
14 |
G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
|
15 |
-
###
|
16 |
|
|
|
17 |
def to_flops(N, D):
|
18 |
return 6 * N * D
|
19 |
|
@@ -80,7 +80,10 @@ Inference cost fraction:\t {kn*100:.2f}%"""
|
|
80 |
return text, fig
|
81 |
|
82 |
with gr.Blocks() as demo:
|
83 |
-
gr.Markdown("# Harm's law
|
|
|
|
|
|
|
84 |
N = gr.Number(value=1, label="Model size (in B parameters):")
|
85 |
D = gr.Number(value=100, label="Dataset size (in B tokens):")
|
86 |
button = gr.Button("Compute!")
|
|
|
12 |
Bn = 10**9
|
13 |
|
14 |
G = ((alpha*A)/(beta*B))**(1/(alpha+beta))
|
|
|
15 |
|
16 |
+
### FUNCTIONS
|
17 |
def to_flops(N, D):
|
18 |
return 6 * N * D
|
19 |
|
|
|
80 |
return text, fig
|
81 |
|
82 |
with gr.Blocks() as demo:
|
83 |
+
gr.Markdown("# Harm's law\
|
84 |
+
The Chinchilla scaling laws focus on optimally scaling training compute but often we also care about inference cost.
|
85 |
+
This tool follows [Harm de Vries' blog post](https://www.harmdevries.com/post/model-size-vs-compute-overhead/) and visualizes the tradeoff between training comput and inference cost (i.e. model size).
|
86 |
+
")
|
87 |
N = gr.Number(value=1, label="Model size (in B parameters):")
|
88 |
D = gr.Number(value=100, label="Dataset size (in B tokens):")
|
89 |
button = gr.Button("Compute!")
|