Spaces:

not-lain
/

gpu-utils

Paused

not-lain commited on Feb 14

Commit

767912f

2 Parent(s): 3aa0053 7b04047

Merge branch 'main' of https://huggingface.co/spaces/not-lain/gpu-utils

Files changed (2) hide show

README.md CHANGED Viewed

@@ -4,9 +4,9 @@ emoji: 🏃
 colorFrom: red
 colorTo: purple
 sdk: gradio
-sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorFrom: red
 colorTo: purple
 sdk: gradio
+sdk_version: 5.14.0
 app_file: app.py
 pinned: false
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -20,6 +20,12 @@ def float32_high_matmul_precision():
     finally:
         torch.set_float32_matmul_precision("highest")
 pipe = FluxFillPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
@@ -135,15 +141,16 @@ def rmbg(image=None, url=None):
 def mask_generation(image=None, d=None):
     d = eval(d)  # convert this to dictionary
-    predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
-    predictor.set_image(image)
-    input_point = np.array(d["input_points"])
-    input_label = np.array(d["input_labels"])
-    masks, scores, logits = predictor.predict(
-        point_coords=input_point,
-        point_labels=input_label,
-        multimask_output=True,
-    )
     sorted_ind = np.argsort(scores)[::-1]
     masks = masks[sorted_ind]
     scores = scores[sorted_ind]
@@ -165,7 +172,7 @@ def erase(image=None, mask=None):
     return simple_lama(image, mask)
-@spaces.GPU
 def main(*args):
     api_num = args[0]
     args = args[1:]

     finally:
         torch.set_float32_matmul_precision("highest")
+# use bfloat16 for the entire notebook
+torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
+# turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
+if torch.cuda.get_device_properties(0).major >= 8:
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
 pipe = FluxFillPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
 def mask_generation(image=None, d=None):
     d = eval(d)  # convert this to dictionary
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+        predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
+        predictor.set_image(image)
+        input_point = np.array(d["input_points"])
+        input_label = np.array(d["input_labels"])
+        masks, scores, logits = predictor.predict(
+            point_coords=input_point,
+            point_labels=input_label,
+            multimask_output=True,
+        )
     sorted_ind = np.argsort(scores)[::-1]
     masks = masks[sorted_ind]
     scores = scores[sorted_ind]
     return simple_lama(image, mask)
+@spaces.GPU(duration=120)
 def main(*args):
     api_num = args[0]
     args = args[1:]