Wisdom Chen commited on
Commit
fadbe71
·
unverified ·
1 Parent(s): 5bd193a

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +66 -8
model.py CHANGED
@@ -47,10 +47,68 @@ embeddings_df: Optional[pd.DataFrame] = None
47
  text_faiss: Optional[object] = None
48
  image_faiss: Optional[object] = None
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def initialize_models() -> bool:
51
  global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
52
 
53
  try:
 
 
54
  print(f"Initializing models on device: {device}")
55
 
56
  # Initialize CLIP model with error handling
@@ -65,14 +123,14 @@ def initialize_models() -> bool:
65
  except Exception as e:
66
  raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
67
 
68
- # Initialize LLM with optimized settings
69
  try:
70
  model_name = "mistralai/Mistral-7B-v0.1"
 
 
71
  quantization_config = BitsAndBytesConfig(
72
- load_in_4bit=True,
73
- bnb_4bit_compute_dtype=torch.float16,
74
- bnb_4bit_use_double_quant=True,
75
- bnb_4bit_quant_type="nf4"
76
  )
77
 
78
  # Get token from Streamlit secrets
@@ -82,7 +140,7 @@ def initialize_models() -> bool:
82
  model_name,
83
  padding_side="left",
84
  truncation_side="left",
85
- token=hf_token # Add token here
86
  )
87
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
88
 
@@ -90,8 +148,8 @@ def initialize_models() -> bool:
90
  model_name,
91
  quantization_config=quantization_config,
92
  device_map="auto",
93
- torch_dtype=torch.float16,
94
- token=hf_token # Add token here
95
  )
96
  llm_model.eval()
97
  print("LLM initialized successfully")
 
47
  text_faiss: Optional[object] = None
48
  image_faiss: Optional[object] = None
49
 
50
+ # def initialize_models() -> bool:
51
+ # global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
52
+
53
+ # try:
54
+ # print(f"Initializing models on device: {device}")
55
+
56
+ # # Initialize CLIP model with error handling
57
+ # try:
58
+ # clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
59
+ # 'hf-hub:Marqo/marqo-fashionCLIP'
60
+ # )
61
+ # clip_model = clip_model.to(device)
62
+ # clip_model.eval()
63
+ # clip_tokenizer = open_clip.get_tokenizer('hf-hub:Marqo/marqo-fashionCLIP')
64
+ # print("CLIP model initialized successfully")
65
+ # except Exception as e:
66
+ # raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
67
+
68
+ # # Initialize LLM with optimized settings
69
+ # try:
70
+ # model_name = "mistralai/Mistral-7B-v0.1"
71
+ # quantization_config = BitsAndBytesConfig(
72
+ # load_in_4bit=True,
73
+ # bnb_4bit_compute_dtype=torch.float16,
74
+ # bnb_4bit_use_double_quant=True,
75
+ # bnb_4bit_quant_type="nf4"
76
+ # )
77
+
78
+ # # Get token from Streamlit secrets
79
+ # hf_token = st.secrets["HUGGINGFACE_TOKEN"]
80
+
81
+ # llm_tokenizer = AutoTokenizer.from_pretrained(
82
+ # model_name,
83
+ # padding_side="left",
84
+ # truncation_side="left",
85
+ # token=hf_token # Add token here
86
+ # )
87
+ # llm_tokenizer.pad_token = llm_tokenizer.eos_token
88
+
89
+ # llm_model = AutoModelForCausalLM.from_pretrained(
90
+ # model_name,
91
+ # quantization_config=quantization_config,
92
+ # device_map="auto",
93
+ # torch_dtype=torch.float16,
94
+ # token=hf_token # Add token here
95
+ # )
96
+ # llm_model.eval()
97
+ # print("LLM initialized successfully")
98
+ # except Exception as e:
99
+ # raise RuntimeError(f"Failed to initialize LLM: {str(e)}")
100
+
101
+ # return True
102
+
103
+ # except Exception as e:
104
+ # raise RuntimeError(f"Model initialization failed: {str(e)}")
105
+
106
  def initialize_models() -> bool:
107
  global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
108
 
109
  try:
110
+ # Force CPU device
111
+ device = "cpu"
112
  print(f"Initializing models on device: {device}")
113
 
114
  # Initialize CLIP model with error handling
 
123
  except Exception as e:
124
  raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
125
 
126
+ # Initialize LLM with CPU-compatible settings
127
  try:
128
  model_name = "mistralai/Mistral-7B-v0.1"
129
+
130
+ # CPU-compatible configuration
131
  quantization_config = BitsAndBytesConfig(
132
+ load_in_8bit=True, # Use 8-bit instead of 4-bit
133
+ llm_int8_enable_fp32_cpu_offload=True # Enable CPU offloading
 
 
134
  )
135
 
136
  # Get token from Streamlit secrets
 
140
  model_name,
141
  padding_side="left",
142
  truncation_side="left",
143
+ token=hf_token
144
  )
145
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
146
 
 
148
  model_name,
149
  quantization_config=quantization_config,
150
  device_map="auto",
151
+ low_cpu_mem_usage=True, # Enable low memory usage
152
+ token=hf_token
153
  )
154
  llm_model.eval()
155
  print("LLM initialized successfully")