NexaAI
/

octo-net

Text Generation

text-generation-inference

Model card Files Files and versions

alexchen4ai commited on Apr 29, 2024

Commit

740b2a8

·

verified ·

1 Parent(s): 313c2a6

Update README.md

Files changed (1) hide show

README.md +0 -8

README.md CHANGED Viewed

@@ -48,11 +48,7 @@ You can run the model on a GPU using the following code.
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import time
-import warnings
-warnings.filterwarnings("ignore")
 torch.random.manual_seed(0)
-import json
 model = AutoModelForCausalLM.from_pretrained(
     "NexaAIDev/Octopus-v4",
@@ -62,8 +58,6 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
 question = "Tell me the result of derivative of x^3 when x is 2?"
 inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
@@ -71,7 +65,6 @@ inputs = f"<|system|>You are a router. Below is the query from the users, please
 print(inputs)
 print('\n============= Below is the response ==============\n')
 # You should consider to use early stopping with <nexa_end> token to accelerate
 input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
@@ -83,7 +76,6 @@ for i in range(200):
     next_token = model(input_ids).logits[:, -1].argmax(-1)
     generated_token_ids.append(next_token.item())
-    print(next_token.item())
     input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
     # 32041 is the token id of <nexa_end>

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import time
 torch.random.manual_seed(0)
 model = AutoModelForCausalLM.from_pretrained(
     "NexaAIDev/Octopus-v4",
 )
 tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/octopus-v4-finetuned-v1")
 question = "Tell me the result of derivative of x^3 when x is 2?"
 inputs = f"<|system|>You are a router. Below is the query from the users, please call the correct function and generate the parameters to call the function.<|end|><|user|>{question}<|end|><|assistant|>"
 print(inputs)
 print('\n============= Below is the response ==============\n')
 # You should consider to use early stopping with <nexa_end> token to accelerate
 input_ids = tokenizer(inputs, return_tensors="pt")['input_ids'].to(model.device)
     next_token = model(input_ids).logits[:, -1].argmax(-1)
     generated_token_ids.append(next_token.item())
     input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=-1)
     # 32041 is the token id of <nexa_end>