Ketengan-Diffusion-Lab commited on
Commit
e1a9191
·
verified ·
1 Parent(s): 4bd91e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -4,6 +4,7 @@ import transformers
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from PIL import Image
6
  import warnings
 
7
 
8
  # disable some warnings
9
  transformers.logging.set_verbosity_error()
@@ -15,6 +16,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  print(f"Using device: {device}")
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
 
18
 
19
  # Configure 8-bit quantization
20
  quantization_config = BitsAndBytesConfig(
@@ -23,18 +25,32 @@ quantization_config = BitsAndBytesConfig(
23
  llm_int8_has_fp16_weight=False
24
  )
25
 
26
- # create model and load it to the specified device with 8-bit quantization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
  quantization_config=quantization_config,
30
- device_map="auto", # This will automatically use the GPU if available
31
  trust_remote_code=True
32
  )
33
 
34
- tokenizer = AutoTokenizer.from_pretrained(
35
- model_name,
36
- trust_remote_code=True
37
- )
38
 
39
  def inference(prompt, image):
40
  messages = [
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from PIL import Image
6
  import warnings
7
+ import os
8
 
9
  # disable some warnings
10
  transformers.logging.set_verbosity_error()
 
16
  print(f"Using device: {device}")
17
 
18
  model_name = 'cognitivecomputations/dolphin-vision-72b'
19
+ model_path = '/data/dolphin-vision-72b'
20
 
21
  # Configure 8-bit quantization
22
  quantization_config = BitsAndBytesConfig(
 
25
  llm_int8_has_fp16_weight=False
26
  )
27
 
28
+ # Check if the model is already downloaded
29
+ if not os.path.exists(model_path):
30
+ print(f"Downloading model to {model_path}")
31
+ # create model and save it to the specified path
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ model_name,
34
+ quantization_config=quantization_config,
35
+ device_map="auto",
36
+ trust_remote_code=True
37
+ )
38
+ model.save_pretrained(model_path)
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
41
+ tokenizer.save_pretrained(model_path)
42
+ else:
43
+ print(f"Loading model from {model_path}")
44
+
45
+ # Load the model from the saved path
46
  model = AutoModelForCausalLM.from_pretrained(
47
+ model_path,
48
  quantization_config=quantization_config,
49
+ device_map="auto",
50
  trust_remote_code=True
51
  )
52
 
53
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
 
 
54
 
55
  def inference(prompt, image):
56
  messages = [