Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
e16e634
1
Parent(s):
ee03864
Fix: preload CLIP model during Docker build
Browse files- Dockerfile +3 -0
- vit_captioning/generate.py +2 -1
- vit_captioning/models/encoder.py +2 -1
Dockerfile
CHANGED
@@ -10,6 +10,9 @@ RUN apt-get update && apt-get install -y wget
|
|
10 |
WORKDIR /app
|
11 |
COPY . .
|
12 |
|
|
|
|
|
|
|
13 |
RUN mkdir -p vit_captioning/artifacts && \
|
14 |
wget https://huggingface.co/datasets/ClemSummer/clip-checkpoints/resolve/main/CLIPEncoder_40epochs_unfreeze12.pth \
|
15 |
-O vit_captioning/artifacts/CLIPEncoder_40epochs_unfreeze12.pth
|
|
|
10 |
WORKDIR /app
|
11 |
COPY . .
|
12 |
|
13 |
+
RUN mkdir -p /models/clip && \
|
14 |
+
python3 -c "from transformers import CLIPModel; CLIPModel.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
|
15 |
+
|
16 |
RUN mkdir -p vit_captioning/artifacts && \
|
17 |
wget https://huggingface.co/datasets/ClemSummer/clip-checkpoints/resolve/main/CLIPEncoder_40epochs_unfreeze12.pth \
|
18 |
-O vit_captioning/artifacts/CLIPEncoder_40epochs_unfreeze12.pth
|
vit_captioning/generate.py
CHANGED
@@ -40,7 +40,8 @@ class CaptionGenerator:
|
|
40 |
self.encoder_dim = 512
|
41 |
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
42 |
#HF needs all model downloads to a special read-write cache dir
|
43 |
-
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
|
|
44 |
else:
|
45 |
raise ValueError("Unknown model type")
|
46 |
|
|
|
40 |
self.encoder_dim = 512
|
41 |
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
42 |
#HF needs all model downloads to a special read-write cache dir
|
43 |
+
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
44 |
+
self.processor = CLIPProcessor.from_pretrained("/models/clip")
|
45 |
else:
|
46 |
raise ValueError("Unknown model type")
|
47 |
|
vit_captioning/models/encoder.py
CHANGED
@@ -35,7 +35,8 @@ class CLIPEncoder(nn.Module):
|
|
35 |
super(CLIPEncoder, self).__init__()
|
36 |
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
37 |
#HF needs all model downloads to a special read-write cache dir
|
38 |
-
self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
|
|
39 |
|
40 |
def forward(self, pixel_values):
|
41 |
# ✅ Directly get the pooled image features (already the final representation)
|
|
|
35 |
super(CLIPEncoder, self).__init__()
|
36 |
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
37 |
#HF needs all model downloads to a special read-write cache dir
|
38 |
+
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
39 |
+
self.clip = CLIPModel.from_pretrained("/models/clip")
|
40 |
|
41 |
def forward(self, pixel_values):
|
42 |
# ✅ Directly get the pooled image features (already the final representation)
|