5to9 commited on
Commit
65118fd
·
1 Parent(s): 0e52052

0.24 Load in 8bit

Browse files
Files changed (1) hide show
  1. app.py +3 -22
app.py CHANGED
@@ -9,7 +9,6 @@ import os
9
  import traceback
10
 
11
  from threading import Thread
12
- import subprocess
13
 
14
  # Status: Breaks during generation
15
 
@@ -56,22 +55,6 @@ def apply_chat_template(messages, add_generation_prompt=False):
56
  return pharia_template
57
 
58
 
59
- def check_gpu_status():
60
- try:
61
- # Run the nvidia-smi command and capture the output
62
- result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
63
-
64
- # Log the output of nvidia-smi
65
- logging.info("NVIDIA-SMI Output:\n%s", result.stdout)
66
-
67
- # If there's any error in the subprocess, log it as well
68
- if result.stderr:
69
- logging.error("NVIDIA-SMI Error Output:\n%s", result.stderr)
70
-
71
- except Exception as e:
72
- logging.error(f"Error running nvidia-smi: {e}")
73
-
74
-
75
  def load_model_a(model_id):
76
  global tokenizer_a, model_a, model_id_a
77
  try:
@@ -80,13 +63,12 @@ def load_model_a(model_id):
80
  model_a = AutoModelForCausalLM.from_pretrained(
81
  model_id,
82
  torch_dtype=torch.float16,
83
- device_map="auto", # {"": "cuda:0"} returns "must be Stateless GPU environment"
 
84
  trust_remote_code=True,
85
  )
86
  except Exception as e:
87
  logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
88
- finally:
89
- check_gpu_status()
90
  return gr.update(label=model_id)
91
 
92
 
@@ -99,12 +81,11 @@ def load_model_b(model_id):
99
  model_id,
100
  torch_dtype=torch.float16,
101
  device_map="auto",
 
102
  trust_remote_code=True,
103
  )
104
  except Exception as e:
105
  logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
106
- finally:
107
- check_gpu_status()
108
  return gr.update(label=model_id)
109
 
110
  @spaces.GPU()
 
9
  import traceback
10
 
11
  from threading import Thread
 
12
 
13
  # Status: Breaks during generation
14
 
 
55
  return pharia_template
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def load_model_a(model_id):
59
  global tokenizer_a, model_a, model_id_a
60
  try:
 
63
  model_a = AutoModelForCausalLM.from_pretrained(
64
  model_id,
65
  torch_dtype=torch.float16,
66
+ device_map="auto",
67
+ load_in_8bit=True,
68
  trust_remote_code=True,
69
  )
70
  except Exception as e:
71
  logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
 
 
72
  return gr.update(label=model_id)
73
 
74
 
 
81
  model_id,
82
  torch_dtype=torch.float16,
83
  device_map="auto",
84
+ load_in_8bit=True,
85
  trust_remote_code=True,
86
  )
87
  except Exception as e:
88
  logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
 
 
89
  return gr.update(label=model_id)
90
 
91
  @spaces.GPU()