kwabs22 commited on
Commit
e95ad42
·
1 Parent(s): e03ccf8

Testing Suggested Code

Browse files
Files changed (1) hide show
  1. app.py +58 -11
app.py CHANGED
@@ -1,22 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- import spaces
 
 
 
 
 
 
 
 
 
5
 
6
- tokenizer = None
7
- model = None
 
 
 
 
 
 
8
 
9
- def loadmodel():
10
- tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
11
- model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map='auto')
12
- return tokenizer, model
 
 
13
 
14
  @spaces.GPU
15
  def generate_text(prompt):
16
- global tokenizer, model
17
- if tokenizer is None or model is None:
18
- tokenizer, model = loadmodel()
19
-
20
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
  outputs = model.generate(inputs.input_ids, max_length=100)
22
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
1
+ # import gradio as gr
2
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ # import torch
4
+ # import spaces
5
+
6
+ # tokenizer = None
7
+ # model = None
8
+
9
+ # def loadmodel():
10
+ # tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
11
+ # model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map='auto')
12
+ # return tokenizer, model
13
+
14
+ # @spaces.GPU
15
+ # def generate_text(prompt):
16
+ # global tokenizer, model
17
+ # if tokenizer is None or model is None:
18
+ # tokenizer, model = loadmodel()
19
+
20
+ # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
+ # outputs = model.generate(inputs.input_ids, max_length=100)
22
+ # return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+
24
+ # interface = gr.Interface(
25
+ # fn=generate_text,
26
+ # inputs="text",
27
+ # outputs="text",
28
+ # title="Meta-Llama-3.1-70B Text Generation",
29
+ # description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
30
+ # )
31
+
32
+ # interface.launch()
33
+
34
+ import spaces
35
  import gradio as gr
36
  from transformers import AutoTokenizer, AutoModelForCausalLM
37
  import torch
38
+ import subprocess
39
+ import os
40
+
41
+ def install_cuda_toolkit():
42
+ # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
43
+ CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
44
+ CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
45
+ subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
46
+ subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
47
+ subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
48
 
49
+ os.environ["CUDA_HOME"] = "/usr/local/cuda"
50
+ os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
51
+ os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
52
+ os.environ["CUDA_HOME"],
53
+ "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
54
+ )
55
+ # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
56
+ os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
57
 
58
+ install_cuda_toolkit()
59
+
60
+ device = "cuda" if torch.cuda.is_available() else "cpu"
61
+
62
+ tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
63
+ model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map='auto').to(device)
64
 
65
  @spaces.GPU
66
  def generate_text(prompt):
 
 
 
 
67
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
68
  outputs = model.generate(inputs.input_ids, max_length=100)
69
  return tokenizer.decode(outputs[0], skip_special_tokens=True)