Felladrin
/

TinyMistral-248M-Chat-v3

Text Generation

text-generation-inference

Model card Files Files and versions Community

Felladrin commited on 13 days ago

Commit

dc16f28

·

verified ·

1 Parent(s): 65dd0d0

Update usage example

Files changed (1) hide show

README.md +2 -7

README.md CHANGED Viewed

@@ -88,7 +88,7 @@ widget:
 ## Usage Example
 ```python
-from transformers import pipeline, TextStreamer, AutoModelForCausalLM, AutoTokenizer
 import torch
 model_path = "Felladrin/TinyMistral-248M-Chat-v3"
@@ -96,8 +96,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
 streamer = TextStreamer(tokenizer)
-generate = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
 messages = [
     {
         "role": "system",
@@ -116,14 +114,11 @@ messages = [
         "content": "What are some potential applications for quantum computing?",
     },
 ]
 prompt = tokenizer.apply_chat_template(
     messages, tokenize=False, add_generation_prompt=True
 )
 inputs = tokenizer(prompt, return_tensors="pt").to(device)
-outputs = model.generate(
     inputs.input_ids,
     attention_mask=inputs.attention_mask,
     max_length=tokenizer.model_max_length,

 ## Usage Example
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
 model_path = "Felladrin/TinyMistral-248M-Chat-v3"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
 streamer = TextStreamer(tokenizer)
 messages = [
     {
         "role": "system",
         "content": "What are some potential applications for quantum computing?",
     },
 ]
 prompt = tokenizer.apply_chat_template(
     messages, tokenize=False, add_generation_prompt=True
 )
 inputs = tokenizer(prompt, return_tensors="pt").to(device)
+model.generate(
     inputs.input_ids,
     attention_mask=inputs.attention_mask,
     max_length=tokenizer.model_max_length,