ClemSummer commited on
Commit
86103f1
·
1 Parent(s): bbf7f29

Fix: preload bert tokenizer during Docker build

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -0
  2. vit_captioning/generate.py +2 -1
Dockerfile CHANGED
@@ -20,4 +20,6 @@ RUN pip install -r requirements.txt
20
  RUN mkdir -p /models/clip && \
21
  python3 -c "from transformers import CLIPModel; CLIPModel.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
22
 
 
 
23
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
20
  RUN mkdir -p /models/clip && \
21
  python3 -c "from transformers import CLIPModel; CLIPModel.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
22
 
23
+ RUN python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('bert-base-uncased').save_pretrained('/models/bert-tokenizer')"
24
+
25
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
vit_captioning/generate.py CHANGED
@@ -26,7 +26,8 @@ class CaptionGenerator:
26
  # Load tokenizer
27
  #self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
28
  #HF needs all model downloads to a special read-write cache dir
29
- self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', cache_dir="/tmp")
 
30
 
31
  # Select encoder, processor, output dim
32
  if model_type == "ViTEncoder":
 
26
  # Load tokenizer
27
  #self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
28
  #HF needs all model downloads to a special read-write cache dir
29
+ #self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', cache_dir="/tmp")
30
+ self.tokenizer = AutoTokenizer.from_pretrained('/models/bert-tokenizer')
31
 
32
  # Select encoder, processor, output dim
33
  if model_type == "ViTEncoder":