Spaces:
Sleeping
Sleeping
Commit
·
6b75ebd
1
Parent(s):
e993e1b
feat: add cost for llama 8b
Browse files- prediction.py +12 -1
prediction.py
CHANGED
@@ -108,12 +108,23 @@ class ModelPrediction:
|
|
108 |
|
109 |
@spaces.GPU
|
110 |
def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
|
|
|
|
|
111 |
outputs = pipeline(
|
112 |
[{"role": "user", "content": prompt}],
|
113 |
max_new_tokens=256,
|
114 |
)
|
|
|
|
|
|
|
|
|
|
|
115 |
response = outputs[0]["generated_text"][-1]
|
116 |
-
|
|
|
|
|
|
|
|
|
117 |
|
118 |
def _init_model_prediction(self, model_name):
|
119 |
predict_fun = self.predict_with_api
|
|
|
108 |
|
109 |
@spaces.GPU
|
110 |
def predict_with_hf(self, prompt, model_name): # -> dict[str, Any | float]:
|
111 |
+
|
112 |
+
start_time = time.time()
|
113 |
outputs = pipeline(
|
114 |
[{"role": "user", "content": prompt}],
|
115 |
max_new_tokens=256,
|
116 |
)
|
117 |
+
end_time = time.time()
|
118 |
+
elapsed_time = end_time - start_time
|
119 |
+
# inference endpoint costs HF per Hour 3.6$/h -> 0.001 $ per second
|
120 |
+
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
121 |
+
cost_per_second=0.001
|
122 |
response = outputs[0]["generated_text"][-1]
|
123 |
+
print(response)
|
124 |
+
return {
|
125 |
+
"response": response,
|
126 |
+
"cost": elapsed_time * cost_per_second
|
127 |
+
}
|
128 |
|
129 |
def _init_model_prediction(self, model_name):
|
130 |
predict_fun = self.predict_with_api
|