ClemSummer commited on
Commit
b48ecb9
Β·
1 Parent(s): e497915

fixing qwen model loading from HF dataset

Browse files
Files changed (2) hide show
  1. Dockerfile +11 -5
  2. ppo_logic.py +3 -2
Dockerfile CHANGED
@@ -31,12 +31,18 @@ RUN mkdir -p /models/cbow && \
31
  # RUN mkdir -p /models/qwen && \
32
  # python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
33
  # RUN python3 -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
34
- RUN mkdir -p /models/qwen && \
35
- python3 -c "from transformers import AutoTokenizer; \
36
- AutoTokenizer.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
37
 
38
- RUN python3 -c "from transformers import AutoModelForCausalLM; \
39
- AutoModelForCausalLM.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
 
 
 
 
 
 
 
 
 
40
  EXPOSE 7860
41
 
42
  # Install curl if it's not already installed
 
31
  # RUN mkdir -p /models/qwen && \
32
  # python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
33
  # RUN python3 -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
 
 
 
34
 
35
+ # RUN mkdir -p /models/qwen && \
36
+ # python3 -c "from transformers import AutoTokenizer; \
37
+ # AutoTokenizer.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
38
+ # RUN python3 -c "from transformers import AutoModelForCausalLM; \
39
+ # AutoModelForCausalLM.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
40
+
41
+ # Install git & git-lfs, then clone your dataset repo into /models/qwen
42
+ RUN apt-get update && apt-get install -y git git-lfs && \
43
+ git lfs install && \
44
+ git clone https://huggingface.co/datasets/ClemSummer/qwen-model-cache /models/qwen
45
+
46
  EXPOSE 7860
47
 
48
  # Install curl if it's not already installed
ppo_logic.py CHANGED
@@ -19,13 +19,14 @@ MAX_NEW_TOKENS = 256
19
  # Load tokenizer and model
20
  # -------------------------------
21
  print("πŸ”„ Loading tokenizer and model...")
22
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
  tokenizer.pad_token = tokenizer.eos_token
24
 
25
  base_model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_NAME,
27
  torch_dtype=torch.float16,
28
- device_map="auto"
 
29
  )
30
 
31
  model = PeftModel.from_pretrained(base_model, CHECKPOINT_DIR)
 
19
  # Load tokenizer and model
20
  # -------------------------------
21
  print("πŸ”„ Loading tokenizer and model...")
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
23
  tokenizer.pad_token = tokenizer.eos_token
24
 
25
  base_model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_NAME,
27
  torch_dtype=torch.float16,
28
+ device_map="auto",
29
+ trust_remote_code=True
30
  )
31
 
32
  model = PeftModel.from_pretrained(base_model, CHECKPOINT_DIR)