Spaces:

KwabsHug
/

TestCompressedModelzero

Sleeping

kwabs22 commited on Oct 2, 2024

Commit

1c670bf

1 Parent(s): ba8ad86

CUDA location is probably zero issue

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,13 +4,21 @@ import torch
 import spaces
 # Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
-model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype=torch.float16)
-model = model.to('cuda')  # Move the model to GPU if available
 # Define a function for generating text from a prompt
 @spaces.GPU
 def generate_text(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to('cuda')  # Tokenize input and move to GPU
     outputs = model.generate(inputs.input_ids, max_length=100)  # Generate output text
     return tokenizer.decode(outputs[0], skip_special_tokens=True)  # Decode and return the text

 import spaces
 # Load model and tokenizer
+tokenizer = None
+model = None
+def loadmodel():
+    global tokenizer, model
+    tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
+    model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype=torch.float16, device_map= 'auto')
+    #model = model.to('cuda')  # Move the model to GPU if available
+    pass
 # Define a function for generating text from a prompt
 @spaces.GPU
 def generate_text(prompt):
+    global tokenizer, model
     inputs = tokenizer(prompt, return_tensors="pt").to('cuda')  # Tokenize input and move to GPU
     outputs = model.generate(inputs.input_ids, max_length=100)  # Generate output text
     return tokenizer.decode(outputs[0], skip_special_tokens=True)  # Decode and return the text