kwabs22 commited on
Commit
e03ccf8
·
1 Parent(s): a7e3cb8

CUDA location is probably zero issue

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -3,34 +3,30 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import spaces
5
 
6
- # Load model and tokenizer
7
-
8
  tokenizer = None
9
  model = None
10
 
11
  def loadmodel():
12
- global tokenizer, model
13
  tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
14
- model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map= 'auto') #torch_dtype=torch.float16
15
- #model = model.to('cuda') # Move the model to GPU if available
16
- pass
17
 
18
- # Define a function for generating text from a prompt
19
  @spaces.GPU
20
  def generate_text(prompt):
21
  global tokenizer, model
22
- inputs = tokenizer(prompt, return_tensors="pt").to('cuda') # Tokenize input and move to GPU
23
- outputs = model.generate(inputs.input_ids, max_length=100) # Generate output text
24
- return tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode and return the text
 
 
 
25
 
26
- # Create Gradio Interface
27
  interface = gr.Interface(
28
- fn=generate_text, # Function that handles text generation
29
- inputs="text", # Input is a text box
30
- outputs="text", # Output is a text box
31
  title="Meta-Llama-3.1-70B Text Generation",
32
  description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
33
  )
34
 
35
- # Launch the Gradio app
36
  interface.launch()
 
3
  import torch
4
  import spaces
5
 
 
 
6
  tokenizer = None
7
  model = None
8
 
9
  def loadmodel():
 
10
  tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
11
+ model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map='auto')
12
+ return tokenizer, model
 
13
 
 
14
  @spaces.GPU
15
  def generate_text(prompt):
16
  global tokenizer, model
17
+ if tokenizer is None or model is None:
18
+ tokenizer, model = loadmodel()
19
+
20
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
+ outputs = model.generate(inputs.input_ids, max_length=100)
22
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
 
 
24
  interface = gr.Interface(
25
+ fn=generate_text,
26
+ inputs="text",
27
+ outputs="text",
28
  title="Meta-Llama-3.1-70B Text Generation",
29
  description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
30
  )
31
 
 
32
  interface.launch()