Spaces:

simone-papicchio
/

qatch-demo

Running

simone-papicchio commited on Apr 6

Commit

d4aa01a

1 Parent(s): 14052a3

fix: remove zerogpu-offload before prediction to avoid OOM on disk

Files changed (1) hide show

prediction.py CHANGED Viewed

@@ -22,7 +22,10 @@ from transformers import pipeline as hf_pipeline
 import litellm
 from tqdm import tqdm
 pipeline = hf_pipeline(
     "text-generation",
@@ -120,6 +123,7 @@ class ModelPrediction:
         # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
         cost_per_second=0.001
         response = outputs[0]["generated_text"][-1]['content']
         return {
             "response": response,
             "cost": elapsed_time * cost_per_second

 import litellm
 from tqdm import tqdm
+import subprocess
+# https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/132
+subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 pipeline = hf_pipeline(
     "text-generation",
         # https://huggingface.co/docs/inference-endpoints/en/pricing?utm_source=chatgpt.com
         cost_per_second=0.001
         response = outputs[0]["generated_text"][-1]['content']
+        print(response)
         return {
             "response": response,
             "cost": elapsed_time * cost_per_second