Ketengan-Diffusion-Lab commited on
Commit
c65567a
·
verified ·
1 Parent(s): e1a9191

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -23
app.py CHANGED
@@ -4,7 +4,6 @@ import transformers
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from PIL import Image
6
  import warnings
7
- import os
8
 
9
  # disable some warnings
10
  transformers.logging.set_verbosity_error()
@@ -16,7 +15,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  print(f"Using device: {device}")
17
 
18
  model_name = 'cognitivecomputations/dolphin-vision-72b'
19
- model_path = '/data/dolphin-vision-72b'
20
 
21
  # Configure 8-bit quantization
22
  quantization_config = BitsAndBytesConfig(
@@ -25,32 +23,18 @@ quantization_config = BitsAndBytesConfig(
25
  llm_int8_has_fp16_weight=False
26
  )
27
 
28
- # Check if the model is already downloaded
29
- if not os.path.exists(model_path):
30
- print(f"Downloading model to {model_path}")
31
- # create model and save it to the specified path
32
- model = AutoModelForCausalLM.from_pretrained(
33
- model_name,
34
- quantization_config=quantization_config,
35
- device_map="auto",
36
- trust_remote_code=True
37
- )
38
- model.save_pretrained(model_path)
39
-
40
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
41
- tokenizer.save_pretrained(model_path)
42
- else:
43
- print(f"Loading model from {model_path}")
44
-
45
- # Load the model from the saved path
46
  model = AutoModelForCausalLM.from_pretrained(
47
- model_path,
48
  quantization_config=quantization_config,
49
- device_map="auto",
50
  trust_remote_code=True
51
  )
52
 
53
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
 
 
54
 
55
  def inference(prompt, image):
56
  messages = [
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from PIL import Image
6
  import warnings
 
7
 
8
  # disable some warnings
9
  transformers.logging.set_verbosity_error()
 
15
  print(f"Using device: {device}")
16
 
17
  model_name = 'cognitivecomputations/dolphin-vision-72b'
 
18
 
19
  # Configure 8-bit quantization
20
  quantization_config = BitsAndBytesConfig(
 
23
  llm_int8_has_fp16_weight=False
24
  )
25
 
26
+ # create model and load it to the specified device with 8-bit quantization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
+ model_name,
29
  quantization_config=quantization_config,
30
+ device_map="auto", # This will automatically use the GPU if available
31
  trust_remote_code=True
32
  )
33
 
34
+ tokenizer = AutoTokenizer.from_pretrained(
35
+ model_name,
36
+ trust_remote_code=True
37
+ )
38
 
39
  def inference(prompt, image):
40
  messages = [