Update usage example
Browse files
README.md
CHANGED
@@ -88,7 +88,7 @@ widget:
|
|
88 |
## Usage Example
|
89 |
|
90 |
```python
|
91 |
-
from transformers import
|
92 |
import torch
|
93 |
|
94 |
model_path = "Felladrin/TinyMistral-248M-Chat-v3"
|
@@ -96,8 +96,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
98 |
streamer = TextStreamer(tokenizer)
|
99 |
-
generate = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
|
100 |
-
|
101 |
messages = [
|
102 |
{
|
103 |
"role": "system",
|
@@ -116,14 +114,11 @@ messages = [
|
|
116 |
"content": "What are some potential applications for quantum computing?",
|
117 |
},
|
118 |
]
|
119 |
-
|
120 |
prompt = tokenizer.apply_chat_template(
|
121 |
messages, tokenize=False, add_generation_prompt=True
|
122 |
)
|
123 |
-
|
124 |
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
125 |
-
|
126 |
-
outputs = model.generate(
|
127 |
inputs.input_ids,
|
128 |
attention_mask=inputs.attention_mask,
|
129 |
max_length=tokenizer.model_max_length,
|
|
|
88 |
## Usage Example
|
89 |
|
90 |
```python
|
91 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
92 |
import torch
|
93 |
|
94 |
model_path = "Felladrin/TinyMistral-248M-Chat-v3"
|
|
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
98 |
streamer = TextStreamer(tokenizer)
|
|
|
|
|
99 |
messages = [
|
100 |
{
|
101 |
"role": "system",
|
|
|
114 |
"content": "What are some potential applications for quantum computing?",
|
115 |
},
|
116 |
]
|
|
|
117 |
prompt = tokenizer.apply_chat_template(
|
118 |
messages, tokenize=False, add_generation_prompt=True
|
119 |
)
|
|
|
120 |
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
121 |
+
model.generate(
|
|
|
122 |
inputs.input_ids,
|
123 |
attention_mask=inputs.attention_mask,
|
124 |
max_length=tokenizer.model_max_length,
|