rlasseri commited on
Commit
0333948
·
1 Parent(s): 631d7c5

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +20 -20
models.py CHANGED
@@ -65,19 +65,19 @@ class OpenAIModelGPT4(BaseTCOModel):
65
  self.context_length = gr.Dropdown(["128K"], value="128K", interactive=True,
66
  label="Context size",
67
  visible=False, info="Number of tokens the model considers when processing text")
68
- self.input_tokens_cost_per_token = gr.Number(0.01, visible=False,
69
- label="($) Price/1K input prompt tokens",
70
  interactive=False
71
  )
72
- self.output_tokens_cost_per_token = gr.Number(0.03, visible=False,
73
- label="($) Price/1K output prompt tokens",
74
  interactive=False
75
  )
76
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
77
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
78
 
79
- self.labor = gr.Number(1000, visible=False,
80
- label="($) Labor cost per month",
81
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model§/maitenance",
82
  interactive=True
83
  )
@@ -112,17 +112,17 @@ class MistralO(BaseTCOModel):
112
  label="Context size",
113
  visible=False, info="Number of tokens the model considers when processing text")
114
  self.input_tokens_cost_per_token = gr.Number(0.0025, visible=False,
115
- label="($) Price/1K input prompt tokens",
116
  interactive=False
117
  )
118
  self.output_tokens_cost_per_token = gr.Number(0.0075, visible=False,
119
- label="($) Price/1K output prompt tokens",
120
  interactive=False
121
  )
122
  self.info = gr.Markdown("The cost per input and output tokens values are from Mistral API", interactive=False, visible=False)
123
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
124
 
125
- self.labor = gr.Number(1000, visible=False,
126
  label="($) Labor cost per month",
127
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/maitenance",
128
  interactive=True
@@ -201,7 +201,7 @@ class DIYLlama2Model70(BaseTCOModel):
201
  visible=False,
202
  label="Instance of VM with GPU",
203
  )
204
- self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
205
  interactive=False, visible=False)
206
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
207
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
@@ -214,19 +214,19 @@ class DIYLlama2Model70(BaseTCOModel):
214
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
215
  $MO$ = Maxed Out, <br>
216
  """, interactive=False, visible=False)
217
- self.input_tokens_cost_per_token = gr.Number(0.00052, visible=False,
218
- label="($) Price/1K input prompt tokens",
219
  interactive=False
220
  )
221
- self.output_tokens_cost_per_token = gr.Number(0.06656, visible=False,
222
- label="($) Price/1K output prompt tokens",
223
  interactive=False
224
  )
225
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
226
 
227
  self.labor = gr.Number(5000, visible=False,
228
- label="($) Labor cost per month",
229
- info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
230
  interactive=True
231
  )
232
 
@@ -258,7 +258,7 @@ class DIYLlama2Model(BaseTCOModel):
258
  visible=False,
259
  label="Instance of VM with GPU",
260
  )
261
- self.vm_cost_per_hour = gr.Number(2.58, label="Instance cost ($) per hour",
262
  interactive=False, visible=False)
263
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
264
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
@@ -271,17 +271,17 @@ class DIYLlama2Model(BaseTCOModel):
271
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
272
  $MO$ = Maxed Out, <br>
273
  """, interactive=False, visible=False)
274
- self.input_tokens_cost_per_token = gr.Number(0.00032, visible=False,
275
  label="($) Price/1K input prompt tokens",
276
  interactive=False
277
  )
278
- self.output_tokens_cost_per_token = gr.Number(0.02656, visible=False,
279
  label="($) Price/1K output prompt tokens",
280
  interactive=False
281
  )
282
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
283
 
284
- self.labor = gr.Number(3000, visible=False,
285
  label="(€) Labor cost per month",
286
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/Maitenance",
287
  interactive=True
 
65
  self.context_length = gr.Dropdown(["128K"], value="128K", interactive=True,
66
  label="Context size",
67
  visible=False, info="Number of tokens the model considers when processing text")
68
+ self.input_tokens_cost_per_token = gr.Number(0.0095, visible=False,
69
+ label="() Price/1K input prompt tokens",
70
  interactive=False
71
  )
72
+ self.output_tokens_cost_per_token = gr.Number(0.028, visible=False,
73
+ label="() Price/1K output prompt tokens",
74
  interactive=False
75
  )
76
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
77
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
78
 
79
+ self.labor = gr.Number(2000, visible=False,
80
+ label="() Labor cost per month",
81
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model§/maitenance",
82
  interactive=True
83
  )
 
112
  label="Context size",
113
  visible=False, info="Number of tokens the model considers when processing text")
114
  self.input_tokens_cost_per_token = gr.Number(0.0025, visible=False,
115
+ label="() Price/1K input prompt tokens",
116
  interactive=False
117
  )
118
  self.output_tokens_cost_per_token = gr.Number(0.0075, visible=False,
119
+ label="() Price/1K output prompt tokens",
120
  interactive=False
121
  )
122
  self.info = gr.Markdown("The cost per input and output tokens values are from Mistral API", interactive=False, visible=False)
123
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
124
 
125
+ self.labor = gr.Number(2000, visible=False,
126
  label="($) Labor cost per month",
127
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/maitenance",
128
  interactive=True
 
201
  visible=False,
202
  label="Instance of VM with GPU",
203
  )
204
+ self.vm_cost_per_hour = gr.Number(4.05, label="Instance cost () per hour",
205
  interactive=False, visible=False)
206
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
207
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
 
214
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
215
  $MO$ = Maxed Out, <br>
216
  """, interactive=False, visible=False)
217
+ self.input_tokens_cost_per_token = gr.Number(0.00046, visible=False,
218
+ label="() Price/1K input prompt tokens",
219
  interactive=False
220
  )
221
+ self.output_tokens_cost_per_token = gr.Number(0.061, visible=False,
222
+ label="() Price/1K output prompt tokens",
223
  interactive=False
224
  )
225
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
226
 
227
  self.labor = gr.Number(5000, visible=False,
228
+ label="() Labor cost per month",
229
+ info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/maitenance",
230
  interactive=True
231
  )
232
 
 
258
  visible=False,
259
  label="Instance of VM with GPU",
260
  )
261
+ self.vm_cost_per_hour = gr.Number(2.37, label="Instance cost () per hour",
262
  interactive=False, visible=False)
263
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
264
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
 
271
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
272
  $MO$ = Maxed Out, <br>
273
  """, interactive=False, visible=False)
274
+ self.input_tokens_cost_per_token = gr.Number(0.00029, visible=False,
275
  label="($) Price/1K input prompt tokens",
276
  interactive=False
277
  )
278
+ self.output_tokens_cost_per_token = gr.Number(0.0024, visible=False,
279
  label="($) Price/1K output prompt tokens",
280
  interactive=False
281
  )
282
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
283
 
284
+ self.labor = gr.Number(5000, visible=False,
285
  label="(€) Labor cost per month",
286
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/Maitenance",
287
  interactive=True