simone-papicchio commited on
Commit
a1a2a18
·
1 Parent(s): d4aa01a

feat add llama8 with API

Browse files
Files changed (1) hide show
  1. prediction.py +4 -7
prediction.py CHANGED
@@ -18,7 +18,7 @@ else:
18
  return wrapper
19
 
20
  from transformers import pipeline as hf_pipeline
21
-
22
  import litellm
23
 
24
  from tqdm import tqdm
@@ -109,9 +109,8 @@ class ModelPrediction:
109
  "cost": response._hidden_params["response_cost"],
110
  }
111
 
112
- @spaces.GPU
113
  def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
114
-
115
  start_time = time.time()
116
  outputs = pipeline(
117
  [{"role": "user", "content": prompt}],
@@ -120,10 +119,9 @@ class ModelPrediction:
120
  end_time = time.time()
121
  elapsed_time = end_time - start_time
122
  # inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
123
- # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
124
  cost_per_second=0.001
125
  response = outputs[0]["generated_text"][-1]['content']
126
- print(response)
127
  return {
128
  "response": response,
129
  "cost": elapsed_time * cost_per_second
@@ -142,8 +140,7 @@ class ModelPrediction:
142
  elif "DeepSeek-R1-Distill-Llama-70B" in model_name:
143
  model_name = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
144
  elif "llama-8" in model_name:
145
- model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
146
- predict_fun = self.predict_with_hf
147
  else:
148
  raise ValueError("Model forbidden")
149
 
 
18
  return wrapper
19
 
20
  from transformers import pipeline as hf_pipeline
21
+ import torch
22
  import litellm
23
 
24
  from tqdm import tqdm
 
109
  "cost": response._hidden_params["response_cost"],
110
  }
111
 
112
+ @spaces.GPU(duration=20)
113
  def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
 
114
  start_time = time.time()
115
  outputs = pipeline(
116
  [{"role": "user", "content": prompt}],
 
119
  end_time = time.time()
120
  elapsed_time = end_time - start_time
121
  # inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
 
122
  cost_per_second=0.001
123
  response = outputs[0]["generated_text"][-1]['content']
124
+ # print(response)
125
  return {
126
  "response": response,
127
  "cost": elapsed_time * cost_per_second
 
140
  elif "DeepSeek-R1-Distill-Llama-70B" in model_name:
141
  model_name = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
142
  elif "llama-8" in model_name:
143
+ model_name = "together_ai/meta-llama/Meta-Llama-3-8B-Instruct"
 
144
  else:
145
  raise ValueError("Model forbidden")
146