Spaces:

Ketengan-Diffusion-Lab
/

Dolphin-Inference

Build error

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

e1a9191

verified ·

1 Parent(s): 4bd91e6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from PIL import Image
 import warnings
 # disable some warnings
 transformers.logging.set_verbosity_error()
@@ -15,6 +16,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 model_name = 'cognitivecomputations/dolphin-vision-72b'
 # Configure 8-bit quantization
 quantization_config = BitsAndBytesConfig(
@@ -23,18 +25,32 @@ quantization_config = BitsAndBytesConfig(
     llm_int8_has_fp16_weight=False
 )
-# create model and load it to the specified device with 8-bit quantization
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
     quantization_config=quantization_config,
-    device_map="auto",  # This will automatically use the GPU if available
     trust_remote_code=True
 )
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    trust_remote_code=True
-)
 def inference(prompt, image):
     messages = [

 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from PIL import Image
 import warnings
+import os
 # disable some warnings
 transformers.logging.set_verbosity_error()
 print(f"Using device: {device}")
 model_name = 'cognitivecomputations/dolphin-vision-72b'
+model_path = '/data/dolphin-vision-72b'
 # Configure 8-bit quantization
 quantization_config = BitsAndBytesConfig(
     llm_int8_has_fp16_weight=False
 )
+# Check if the model is already downloaded
+if not os.path.exists(model_path):
+    print(f"Downloading model to {model_path}")
+    # create model and save it to the specified path
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=quantization_config,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    model.save_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    tokenizer.save_pretrained(model_path)
+else:
+    print(f"Loading model from {model_path}")
+# Load the model from the saved path
 model = AutoModelForCausalLM.from_pretrained(
+    model_path,
     quantization_config=quantization_config,
+    device_map="auto",
     trust_remote_code=True
 )
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 def inference(prompt, image):
     messages = [