Spaces:
Sleeping
Sleeping
Commit
·
a1a2a18
1
Parent(s):
d4aa01a
feat add llama8 with API
Browse files- prediction.py +4 -7
prediction.py
CHANGED
@@ -18,7 +18,7 @@ else:
|
|
18 |
return wrapper
|
19 |
|
20 |
from transformers import pipeline as hf_pipeline
|
21 |
-
|
22 |
import litellm
|
23 |
|
24 |
from tqdm import tqdm
|
@@ -109,9 +109,8 @@ class ModelPrediction:
|
|
109 |
"cost": response._hidden_params["response_cost"],
|
110 |
}
|
111 |
|
112 |
-
@spaces.GPU
|
113 |
def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
|
114 |
-
|
115 |
start_time = time.time()
|
116 |
outputs = pipeline(
|
117 |
[{"role": "user", "content": prompt}],
|
@@ -120,10 +119,9 @@ class ModelPrediction:
|
|
120 |
end_time = time.time()
|
121 |
elapsed_time = end_time - start_time
|
122 |
# inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
|
123 |
-
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
124 |
cost_per_second=0.001
|
125 |
response = outputs[0]["generated_text"][-1]['content']
|
126 |
-
print(response)
|
127 |
return {
|
128 |
"response": response,
|
129 |
"cost": elapsed_time * cost_per_second
|
@@ -142,8 +140,7 @@ class ModelPrediction:
|
|
142 |
elif "DeepSeek-R1-Distill-Llama-70B" in model_name:
|
143 |
model_name = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
144 |
elif "llama-8" in model_name:
|
145 |
-
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
|
146 |
-
predict_fun = self.predict_with_hf
|
147 |
else:
|
148 |
raise ValueError("Model forbidden")
|
149 |
|
|
|
18 |
return wrapper
|
19 |
|
20 |
from transformers import pipeline as hf_pipeline
|
21 |
+
import torch
|
22 |
import litellm
|
23 |
|
24 |
from tqdm import tqdm
|
|
|
109 |
"cost": response._hidden_params["response_cost"],
|
110 |
}
|
111 |
|
112 |
+
@spaces.GPU(duration=20)
|
113 |
def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
|
|
|
114 |
start_time = time.time()
|
115 |
outputs = pipeline(
|
116 |
[{"role": "user", "content": prompt}],
|
|
|
119 |
end_time = time.time()
|
120 |
elapsed_time = end_time - start_time
|
121 |
# inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
|
|
|
122 |
cost_per_second=0.001
|
123 |
response = outputs[0]["generated_text"][-1]['content']
|
124 |
+
# print(response)
|
125 |
return {
|
126 |
"response": response,
|
127 |
"cost": elapsed_time * cost_per_second
|
|
|
140 |
elif "DeepSeek-R1-Distill-Llama-70B" in model_name:
|
141 |
model_name = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
142 |
elif "llama-8" in model_name:
|
143 |
+
model_name = "together_ai/meta-llama/Meta-Llama-3-8B-Instruct"
|
|
|
144 |
else:
|
145 |
raise ValueError("Model forbidden")
|
146 |
|