Spaces:

chen196473
/

amazon-multimodal-product-assistant

Build error

Wisdom Chen commited on Dec 11, 2024

Commit

4784493

unverified ·

1 Parent(s): 3beffd8

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -78,25 +78,32 @@ def initialize_models() -> bool:
         except Exception as e:
             raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
-        # Initialize LLM with CPU settings
         try:
             model_name = "mistralai/Mistral-7B-v0.1"
-            # Initialize tokenizer
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
-                use_auth_token=hf_token,
                 trust_remote_code=True
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
-            # Initialize model for CPU
             llm_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                use_auth_token=hf_token,
-                torch_dtype=torch.float32,  # Use float32 for CPU
-                trust_remote_code=True,
-                low_cpu_mem_usage=True
             )
             llm_model.eval()
             print("LLM initialized successfully")

         except Exception as e:
             raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
+        # Initialize LLM with optimized settings
         try:
             model_name = "mistralai/Mistral-7B-v0.1"
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4"
+            )
+            # Initialize tokenizer with specific version requirements
             llm_tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
+                use_auth_token=hf_token,  # Changed from token to use_auth_token
                 trust_remote_code=True
             )
             llm_tokenizer.pad_token = llm_tokenizer.eos_token
             llm_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
+                use_auth_token=hf_token,  # Changed from token to use_auth_token
+                quantization_config=quantization_config,
+                device_map='cpu',  # Force CPU usage
+                torch_dtype=torch.float16,
+                low_cpu_mem_usage=True,
+                trust_remote_code=True
             )
             llm_model.eval()
             print("LLM initialized successfully")