sachin commited on
Commit
0699851
·
1 Parent(s): 57245bd
Files changed (2) hide show
  1. Dockerfile +3 -2
  2. download_models.py +30 -0
Dockerfile CHANGED
@@ -33,8 +33,9 @@ RUN pip install --no-cache-dir -r requirements.txt
33
  # Create a directory for pre-downloaded models
34
  RUN mkdir -p /app/models
35
 
36
- # Pre-download models using a single-line Python script
37
- RUN python -c "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoProcessor, AutoModel; from transformers import Gemma3ForConditionalGeneration; import os; models = {'llm_model': ('google/gemma-3-4b-it', Gemma3ForConditionalGeneration, AutoProcessor), 'tts_model': ('ai4bharat/IndicF5', AutoModel, None), 'asr_model': ('ai4bharat/indic-conformer-600m-multilingual', AutoModel, None), 'trans_en_indic': ('ai4bharat/indictrans2-en-indic-dist-200M', AutoModelForSeq2SeqLM, AutoTokenizer), 'trans_indic_en': ('ai4bharat/indictrans2-indic-en-dist-200M', AutoModelForSeq2SeqLM, AutoTokenizer), 'trans_indic_indic': ('ai4bharat/indictrans2-indic-indic-dist-320M', AutoModelForSeq2SeqLM, AutoTokenizer)}; for name, (model_name, model_class, processor_class) in models.items(): print(f'Downloading {model_name}...'); model = model_class.from_pretrained(model_name, trust_remote_code=True); model.save_pretrained(f'/app/models/{name}'); if processor_class: processor = processor_class.from_pretrained(model_name, trust_remote_code=True); processor.save_pretrained(f'/app/models/{name}');"
 
38
 
39
  # Copy application code
40
  COPY . .
 
33
  # Create a directory for pre-downloaded models
34
  RUN mkdir -p /app/models
35
 
36
+ # Copy and run the model download script
37
+ COPY download_models.py .
38
+ RUN python download_models.py
39
 
40
  # Copy application code
41
  COPY . .
download_models.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoProcessor, AutoModel
3
+ from transformers import Gemma3ForConditionalGeneration
4
+ import os
5
+
6
+ # Define the models to download
7
+ models = {
8
+ 'llm_model': ('google/gemma-3-4b-it', Gemma3ForConditionalGeneration, AutoProcessor),
9
+ 'tts_model': ('ai4bharat/IndicF5', AutoModel, None),
10
+ 'asr_model': ('ai4bharat/indic-conformer-600m-multilingual', AutoModel, None),
11
+ 'trans_en_indic': ('ai4bharat/indictrans2-en-indic-dist-200M', AutoModelForSeq2SeqLM, AutoTokenizer),
12
+ 'trans_indic_en': ('ai4bharat/indictrans2-indic-en-dist-200M', AutoModelForSeq2SeqLM, AutoTokenizer),
13
+ 'trans_indic_indic': ('ai4bharat/indictrans2-indic-indic-dist-320M', AutoModelForSeq2SeqLM, AutoTokenizer),
14
+ }
15
+
16
+ # Directory to save models
17
+ save_dir = '/app/models'
18
+
19
+ # Ensure the directory exists
20
+ os.makedirs(save_dir, exist_ok=True)
21
+
22
+ # Download and save each model
23
+ for name, (model_name, model_class, processor_class) in models.items():
24
+ print(f'Downloading {model_name}...')
25
+ model = model_class.from_pretrained(model_name, trust_remote_code=True)
26
+ model.save_pretrained(f'{save_dir}/{name}')
27
+ if processor_class:
28
+ processor = processor_class.from_pretrained(model_name, trust_remote_code=True)
29
+ processor.save_pretrained(f'{save_dir}/{name}')
30
+ print(f'Saved {model_name} to {save_dir}/{name}')