Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,21 +33,28 @@ def _gpu_duration_gallery(images: List[Image.Image], *_, **__) -> int:
|
|
| 33 |
return min(600, 45 * n + 60)
|
| 34 |
|
| 35 |
def _load(model_id: str):
|
| 36 |
-
#
|
| 37 |
-
processor = AutoImageProcessor.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
model = AutoModel.from_pretrained(
|
| 39 |
model_id,
|
| 40 |
torch_dtype=torch.float16,
|
| 41 |
low_cpu_mem_usage=True,
|
| 42 |
token=HF_TOKEN if HF_TOKEN else None,
|
| 43 |
-
)
|
| 44 |
-
model.to("cuda").eval()
|
| 45 |
return processor, model
|
| 46 |
|
| 47 |
def _extract_core(image: Image.Image, model_id: str, pooling: str, want_overlay: bool):
|
| 48 |
t0 = time.time()
|
| 49 |
processor, model = _load(model_id)
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
with torch.cuda.amp.autocast(dtype=torch.float16), torch.inference_mode():
|
| 52 |
out = model(**inputs)
|
| 53 |
|
|
|
|
| 33 |
return min(600, 45 * n + 60)
|
| 34 |
|
| 35 |
def _load(model_id: str):
|
| 36 |
+
# token works on current Transformers
|
| 37 |
+
processor = AutoImageProcessor.from_pretrained(
|
| 38 |
+
model_id,
|
| 39 |
+
use_fast=True,
|
| 40 |
+
token=HF_TOKEN if HF_TOKEN else None,
|
| 41 |
+
)
|
| 42 |
model = AutoModel.from_pretrained(
|
| 43 |
model_id,
|
| 44 |
torch_dtype=torch.float16,
|
| 45 |
low_cpu_mem_usage=True,
|
| 46 |
token=HF_TOKEN if HF_TOKEN else None,
|
| 47 |
+
).to("cuda").eval()
|
|
|
|
| 48 |
return processor, model
|
| 49 |
|
| 50 |
def _extract_core(image: Image.Image, model_id: str, pooling: str, want_overlay: bool):
|
| 51 |
t0 = time.time()
|
| 52 |
processor, model = _load(model_id)
|
| 53 |
+
|
| 54 |
+
# safer move to cuda for BatchFeature
|
| 55 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 56 |
+
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
| 57 |
+
|
| 58 |
with torch.cuda.amp.autocast(dtype=torch.float16), torch.inference_mode():
|
| 59 |
out = model(**inputs)
|
| 60 |
|