kwabs22 commited on
Commit
1c670bf
·
1 Parent(s): ba8ad86

CUDA location is probably zero issue

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -4,13 +4,21 @@ import torch
4
  import spaces
5
 
6
  # Load model and tokenizer
7
- tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
8
- model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype=torch.float16)
9
- model = model.to('cuda') # Move the model to GPU if available
 
 
 
 
 
 
 
10
 
11
  # Define a function for generating text from a prompt
12
  @spaces.GPU
13
  def generate_text(prompt):
 
14
  inputs = tokenizer(prompt, return_tensors="pt").to('cuda') # Tokenize input and move to GPU
15
  outputs = model.generate(inputs.input_ids, max_length=100) # Generate output text
16
  return tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode and return the text
 
4
  import spaces
5
 
6
  # Load model and tokenizer
7
+
8
+ tokenizer = None
9
+ model = None
10
+
11
+ def loadmodel():
12
+ global tokenizer, model
13
+ tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
14
+ model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype=torch.float16, device_map= 'auto')
15
+ #model = model.to('cuda') # Move the model to GPU if available
16
+ pass
17
 
18
  # Define a function for generating text from a prompt
19
  @spaces.GPU
20
  def generate_text(prompt):
21
+ global tokenizer, model
22
  inputs = tokenizer(prompt, return_tensors="pt").to('cuda') # Tokenize input and move to GPU
23
  outputs = model.generate(inputs.input_ids, max_length=100) # Generate output text
24
  return tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode and return the text