simone-papicchio commited on
Commit
6b75ebd
·
1 Parent(s): e993e1b

feat: add cost for llama 8b

Browse files
Files changed (1) hide show
  1. prediction.py +12 -1
prediction.py CHANGED
@@ -108,12 +108,23 @@ class ModelPrediction:
108
 
109
  @spaces.GPU
110
  def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
 
 
111
  outputs = pipeline(
112
  [{"role": "user", "content": prompt}],
113
  max_new_tokens=256,
114
  )
 
 
 
 
 
115
  response = outputs[0]["generated_text"][-1]
116
- return {"response": response, "cost": 0.0}
 
 
 
 
117
 
118
  def _init_model_prediction(self, model_name):
119
  predict_fun = self.predict_with_api
 
108
 
109
  @spaces.GPU
110
  def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
111
+
112
+ start_time = time.time()
113
  outputs = pipeline(
114
  [{"role": "user", "content": prompt}],
115
  max_new_tokens=256,
116
  )
117
+ end_time = time.time()
118
+ elapsed_time = end_time - start_time
119
+ # inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
120
+ # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
121
+ cost_per_second=0.001
122
  response = outputs[0]["generated_text"][-1]
123
+ print(response)
124
+ return {
125
+ "response": response,
126
+ "cost": elapsed_time * cost_per_second
127
+ }
128
 
129
  def _init_model_prediction(self, model_name):
130
  predict_fun = self.predict_with_api