simone-papicchio commited on
Commit
d4aa01a
·
1 Parent(s): 14052a3

fix: remove zerogpu-offload before prediction to avoid OOM on disk

Browse files
Files changed (1) hide show
  1. prediction.py +4 -0
prediction.py CHANGED
@@ -22,7 +22,10 @@ from transformers import pipeline as hf_pipeline
22
  import litellm
23
 
24
  from tqdm import tqdm
 
25
 
 
 
26
 
27
  pipeline = hf_pipeline(
28
  "text-generation",
@@ -120,6 +123,7 @@ class ModelPrediction:
120
  # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
121
  cost_per_second=0.001
122
  response = outputs[0]["generated_text"][-1]['content']
 
123
  return {
124
  "response": response,
125
  "cost": elapsed_time * cost_per_second
 
22
  import litellm
23
 
24
  from tqdm import tqdm
25
+ import subprocess
26
 
27
+ # https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/132
28
+ subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
29
 
30
  pipeline = hf_pipeline(
31
  "text-generation",
 
123
  # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
124
  cost_per_second=0.001
125
  response = outputs[0]["generated_text"][-1]['content']
126
+ print(response)
127
  return {
128
  "response": response,
129
  "cost": elapsed_time * cost_per_second