rlasseri commited on
Commit
2fe1675
·
1 Parent(s): 6573e96

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +17 -17
models.py CHANGED
@@ -76,9 +76,9 @@ class OpenAIModelGPT4(BaseTCOModel):
76
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
77
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
78
 
79
- self.labor = gr.Number(0, visible=False,
80
  label="($) Labor cost per month",
81
- info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
82
  interactive=True
83
  )
84
 
@@ -100,7 +100,7 @@ class MistralO(BaseTCOModel):
100
 
101
  def render(self):
102
  def define_cost_per_token(context_length):
103
- if context_length == "4K":
104
  cost_per_1k_input_tokens = 0.0015
105
  cost_per_1k_output_tokens = 0.002
106
  else:
@@ -108,23 +108,23 @@ class MistralO(BaseTCOModel):
108
  cost_per_1k_output_tokens = 0.004
109
  return cost_per_1k_input_tokens, cost_per_1k_output_tokens
110
 
111
- self.context_length = gr.Dropdown(choices=["4K", "16K"], value="4K", interactive=True,
112
  label="Context size",
113
  visible=False, info="Number of tokens the model considers when processing text")
114
- self.input_tokens_cost_per_token = gr.Number(0.0015, visible=False,
115
  label="($) Price/1K input prompt tokens",
116
  interactive=False
117
  )
118
- self.output_tokens_cost_per_token = gr.Number(0.002, visible=False,
119
  label="($) Price/1K output prompt tokens",
120
  interactive=False
121
  )
122
- self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
123
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
124
 
125
- self.labor = gr.Number(0, visible=False,
126
  label="($) Labor cost per month",
127
- info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
128
  interactive=True
129
  )
130
 
@@ -239,7 +239,7 @@ class DIYLlama2Model(BaseTCOModel):
239
  class DIYLlama2Model(BaseTCOModel):
240
 
241
  def __init__(self):
242
- self.set_name("(Deploy yourself) Llama 2/Mistral (et variante 7B)")
243
  self.set_latency("6s")
244
  super().__init__()
245
 
@@ -254,11 +254,11 @@ class DIYLlama2Model(BaseTCOModel):
254
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
255
  interactive=False,
256
  visible=False)
257
- self.vm = gr.Textbox(value="2x A100 80GB NVLINK",
258
  visible=False,
259
  label="Instance of VM with GPU",
260
  )
261
- self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
262
  interactive=False, visible=False)
263
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
264
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
@@ -271,19 +271,19 @@ class DIYLlama2Model(BaseTCOModel):
271
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
272
  $MO$ = Maxed Out, <br>
273
  """, interactive=False, visible=False)
274
- self.input_tokens_cost_per_token = gr.Number(0.00052, visible=False,
275
  label="($) Price/1K input prompt tokens",
276
  interactive=False
277
  )
278
- self.output_tokens_cost_per_token = gr.Number(0.06656, visible=False,
279
  label="($) Price/1K output prompt tokens",
280
  interactive=False
281
  )
282
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
283
 
284
- self.labor = gr.Number(5000, visible=False,
285
- label="($) Labor cost per month",
286
- info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
287
  interactive=True
288
  )
289
 
 
76
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
77
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
78
 
79
+ self.labor = gr.Number(1000, visible=False,
80
  label="($) Labor cost per month",
81
+ info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model§/maitenance",
82
  interactive=True
83
  )
84
 
 
100
 
101
  def render(self):
102
  def define_cost_per_token(context_length):
103
+ if context_length == "32K":
104
  cost_per_1k_input_tokens = 0.0015
105
  cost_per_1k_output_tokens = 0.002
106
  else:
 
108
  cost_per_1k_output_tokens = 0.004
109
  return cost_per_1k_input_tokens, cost_per_1k_output_tokens
110
 
111
+ self.context_length = gr.Dropdown(choices=["32K"], value="32K", interactive=True,
112
  label="Context size",
113
  visible=False, info="Number of tokens the model considers when processing text")
114
+ self.input_tokens_cost_per_token = gr.Number(0.0025, visible=False,
115
  label="($) Price/1K input prompt tokens",
116
  interactive=False
117
  )
118
+ self.output_tokens_cost_per_token = gr.Number(0.0075, visible=False,
119
  label="($) Price/1K output prompt tokens",
120
  interactive=False
121
  )
122
+ self.info = gr.Markdown("The cost per input and output tokens values are from Mistral API", interactive=False, visible=False)
123
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
124
 
125
+ self.labor = gr.Number(1000, visible=False,
126
  label="($) Labor cost per month",
127
+ info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/maitenance",
128
  interactive=True
129
  )
130
 
 
239
  class DIYLlama2Model(BaseTCOModel):
240
 
241
  def __init__(self):
242
+ self.set_name("(Deploy yourself) Llama 2/Mistral (et variante 7B ou 13B)")
243
  self.set_latency("6s")
244
  super().__init__()
245
 
 
254
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
255
  interactive=False,
256
  visible=False)
257
+ self.vm = gr.Textbox(value="2x A6000",
258
  visible=False,
259
  label="Instance of VM with GPU",
260
  )
261
+ self.vm_cost_per_hour = gr.Number(2.58, label="Instance cost ($) per hour",
262
  interactive=False, visible=False)
263
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
264
  self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
 
271
  $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
272
  $MO$ = Maxed Out, <br>
273
  """, interactive=False, visible=False)
274
+ self.input_tokens_cost_per_token = gr.Number(0.00032, visible=False,
275
  label="($) Price/1K input prompt tokens",
276
  interactive=False
277
  )
278
+ self.output_tokens_cost_per_token = gr.Number(0.02656, visible=False,
279
  label="($) Price/1K output prompt tokens",
280
  interactive=False
281
  )
282
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
283
 
284
+ self.labor = gr.Number(3000, visible=False,
285
+ label="() Labor cost per month",
286
+ info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model/Maitenance",
287
  interactive=True
288
  )
289