ClemSummer commited on
Commit
e497915
·
1 Parent(s): d32cb1d

moved both cbow and qwen to cache HF dataset

Browse files
Files changed (4) hide show
  1. .dockerignore +3 -1
  2. .gitignore +1 -0
  3. Dockerfile +11 -3
  4. cbow_logic.py +1 -1
.dockerignore CHANGED
@@ -31,4 +31,6 @@ models/
31
 
32
  # Docker or Space-specific
33
  docker-compose.yaml
34
- Caddyfile
 
 
 
31
 
32
  # Docker or Space-specific
33
  docker-compose.yaml
34
+ Caddyfile
35
+ qwen_model/
36
+ models/
.gitignore CHANGED
@@ -2,3 +2,4 @@ __pycache__/
2
  *.png
3
  **/artifacts/
4
  models/
 
 
2
  *.png
3
  **/artifacts/
4
  models/
5
+ qwen_model/
Dockerfile CHANGED
@@ -22,13 +22,21 @@ RUN mkdir -p /models/clip && \
22
  RUN python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('bert-base-uncased').save_pretrained('/models/bert-tokenizer')"
23
  RUN python3 -c "from transformers import CLIPProcessor; CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
24
 
 
 
25
  RUN mkdir -p /models/cbow && \
26
- python3 -c "import gensim.downloader as api; model = api.load('glove-twitter-200'); model.save('/models/cbow_model.kv')"
 
27
 
 
 
 
28
  RUN mkdir -p /models/qwen && \
29
- python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
30
- RUN python3 -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
31
 
 
 
32
  EXPOSE 7860
33
 
34
  # Install curl if it's not already installed
 
22
  RUN python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('bert-base-uncased').save_pretrained('/models/bert-tokenizer')"
23
  RUN python3 -c "from transformers import CLIPProcessor; CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
24
 
25
+ # RUN mkdir -p /models/cbow && \
26
+ # python3 -c "import gensim.downloader as api; model = api.load('glove-twitter-200'); model.save('/models/cbow_model.kv')"
27
  RUN mkdir -p /models/cbow && \
28
+ wget https://huggingface.co/datasets/ClemSummer/cbow-model-cache/resolve/main/cbow_model.kv -O /models/cbow/cbow_model.kv && \
29
+ wget https://huggingface.co/datasets/ClemSummer/cbow-model-cache/resolve/main/cbow_model.kv.vectors.npy -O /models/cbow/cbow_model.kv.vectors.npy
30
 
31
+ # RUN mkdir -p /models/qwen && \
32
+ # python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
33
+ # RUN python3 -c "from transformers import AutoModelForCausalLM; AutoModelForCausalLM.from_pretrained('Qwen/Qwen3-0.6B-Base').save_pretrained('/models/qwen')"
34
  RUN mkdir -p /models/qwen && \
35
+ python3 -c "from transformers import AutoTokenizer; \
36
+ AutoTokenizer.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
37
 
38
+ RUN python3 -c "from transformers import AutoModelForCausalLM; \
39
+ AutoModelForCausalLM.from_pretrained('ClemSummer/qwen-model-cache', trust_remote_code=True).save_pretrained('/models/qwen')"
40
  EXPOSE 7860
41
 
42
  # Install curl if it's not already installed
cbow_logic.py CHANGED
@@ -7,7 +7,7 @@ import shlex
7
 
8
 
9
  class MeaningCalculator:
10
- def __init__(self, model_path: str = "/models/cbow_model.kv"):
11
  if not os.path.exists(model_path):
12
  raise FileNotFoundError(f"Model not found at: {model_path}")
13
  self.model = gensim.models.KeyedVectors.load(model_path, mmap='r')
 
7
 
8
 
9
  class MeaningCalculator:
10
+ def __init__(self, model_path: str = "/models/cbow/cbow_model.kv"):
11
  if not os.path.exists(model_path):
12
  raise FileNotFoundError(f"Model not found at: {model_path}")
13
  self.model = gensim.models.KeyedVectors.load(model_path, mmap='r')