Update README.md
Browse files
README.md
CHANGED
|
@@ -48,11 +48,7 @@ You can run the model on a GPU using the following code.
|
|
| 48 |
import torch
|
| 49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 50 |
import time
|
| 51 |
-
import warnings
|
| 52 |
-
warnings.filterwarnings("ignore")
|
| 53 |
torch.random.manual_seed(0)
|
| 54 |
-
import json
|
| 55 |
-
|
| 56 |
|
| 57 |
model = AutoModelForCausalLM.from_pretrained(
|
| 58 |
"NexaAIDev/Octopus-v4",
|
|
@@ -62,8 +58,6 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 62 |
)
|
| 63 |
tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
question = "Tell me the result of derivative of x^3 when x is 2?"
|
| 68 |
|
| 69 |
inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
|
|
@@ -71,7 +65,6 @@ inputs = f"<|system|>You are a router. Below is the query from the users, please
|
|
| 71 |
print(inputs)
|
| 72 |
print('\n============= Below is the response ==============\n')
|
| 73 |
|
| 74 |
-
|
| 75 |
# You should consider to use early stopping with <nexa_end> token to accelerate
|
| 76 |
input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
|
| 77 |
|
|
@@ -83,7 +76,6 @@ for i in range(200):
|
|
| 83 |
next_token = model(input_ids).logits[:, -1].argmax(-1)
|
| 84 |
generated_token_ids.append(next_token.item())
|
| 85 |
|
| 86 |
-
print(next_token.item())
|
| 87 |
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
| 88 |
|
| 89 |
# 32041 is the token id of <nexa_end>
|
|
|
|
| 48 |
import torch
|
| 49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 50 |
import time
|
|
|
|
|
|
|
| 51 |
torch.random.manual_seed(0)
|
|
|
|
|
|
|
| 52 |
|
| 53 |
model = AutoModelForCausalLM.from_pretrained(
|
| 54 |
"NexaAIDev/Octopus-v4",
|
|
|
|
| 58 |
)
|
| 59 |
tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
|
| 60 |
|
|
|
|
|
|
|
| 61 |
question = "Tell me the result of derivative of x^3 when x is 2?"
|
| 62 |
|
| 63 |
inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
|
|
|
|
| 65 |
print(inputs)
|
| 66 |
print('\n============= Below is the response ==============\n')
|
| 67 |
|
|
|
|
| 68 |
# You should consider to use early stopping with <nexa_end> token to accelerate
|
| 69 |
input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
|
| 70 |
|
|
|
|
| 76 |
next_token = model(input_ids).logits[:, -1].argmax(-1)
|
| 77 |
generated_token_ids.append(next_token.item())
|
| 78 |
|
|
|
|
| 79 |
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
|
| 80 |
|
| 81 |
# 32041 is the token id of <nexa_end>
|