Spaces:
Sleeping
Sleeping
Commit
·
d4aa01a
1
Parent(s):
14052a3
fix: remove zerogpu-offload before prediction to avoid OOM on disk
Browse files- prediction.py +4 -0
prediction.py
CHANGED
@@ -22,7 +22,10 @@ from transformers import pipeline as hf_pipeline
|
|
22 |
import litellm
|
23 |
|
24 |
from tqdm import tqdm
|
|
|
25 |
|
|
|
|
|
26 |
|
27 |
pipeline = hf_pipeline(
|
28 |
"text-generation",
|
@@ -120,6 +123,7 @@ class ModelPrediction:
|
|
120 |
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
121 |
cost_per_second=0.001
|
122 |
response = outputs[0]["generated_text"][-1]['content']
|
|
|
123 |
return {
|
124 |
"response": response,
|
125 |
"cost": elapsed_time * cost_per_second
|
|
|
22 |
import litellm
|
23 |
|
24 |
from tqdm import tqdm
|
25 |
+
import subprocess
|
26 |
|
27 |
+
# https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/132
|
28 |
+
subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
|
29 |
|
30 |
pipeline = hf_pipeline(
|
31 |
"text-generation",
|
|
|
123 |
# https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
|
124 |
cost_per_second=0.001
|
125 |
response = outputs[0]["generated_text"][-1]['content']
|
126 |
+
print(response)
|
127 |
return {
|
128 |
"response": response,
|
129 |
"cost": elapsed_time * cost_per_second
|