Spaces:
Runtime error
Runtime error
Commit
·
f1c2135
1
Parent(s):
78d91fc
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,20 @@
|
|
1 |
from peft import PeftModel
|
2 |
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
|
|
|
|
5 |
model = LLaMAForCausalLM.from_pretrained(
|
6 |
"decapoda-research/llama-7b-hf",
|
7 |
load_in_8bit=True,
|
8 |
-
device_map="auto",
|
9 |
)
|
|
|
10 |
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
|
11 |
|
12 |
def generate_prompt(instruction, input=None):
|
|
|
1 |
from peft import PeftModel
|
2 |
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
|
3 |
+
import torch
|
4 |
+
n_gpus = torch.cuda.device_count()
|
5 |
+
max_memory = {i: max_memory for i in range(n_gpus)}
|
6 |
+
|
7 |
+
print(f'Max memory : {max_memory}')
|
8 |
|
9 |
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
10 |
+
max_memory = '40GB'
|
11 |
+
|
12 |
model = LLaMAForCausalLM.from_pretrained(
|
13 |
"decapoda-research/llama-7b-hf",
|
14 |
load_in_8bit=True,
|
15 |
+
device_map="auto",max_memory=max_memory
|
16 |
)
|
17 |
+
|
18 |
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
|
19 |
|
20 |
def generate_prompt(instruction, input=None):
|