|
--- |
|
library_name: transformers |
|
tags: |
|
- trl |
|
- sft |
|
license: apache-2.0 |
|
datasets: |
|
- gokaygokay/prompt-enhancement-75k |
|
language: |
|
- en |
|
base_model: |
|
- HuggingFaceTB/SmolLM2-135M-Instruct |
|
pipeline_tag: text-generation |
|
--- |
|
|
|
```python |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model_id = "gokaygokay/SmolLM2-135M-Instruct-Prompt-Enhance" |
|
tokenizer_id = "HuggingFaceTB/SmolLM2-135M-Instruct" |
|
# Load model and tokenizer |
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id ) |
|
model = AutoModelForCausalLM.from_pretrained(model_id).to(device) |
|
|
|
# Model response generation functions |
|
def generate_response(model, tokenizer, instruction, device="cpu"): |
|
"""Generate a response from the model based on an instruction.""" |
|
messages = [{"role": "user", "content": instruction}] |
|
input_text = tokenizer.apply_chat_template( |
|
messages, tokenize=False, add_generation_prompt=True |
|
) |
|
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) |
|
outputs = model.generate( |
|
inputs, max_new_tokens=256, repetition_penalty=1.2 |
|
) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|
|
def print_response(response): |
|
"""Print the model's response.""" |
|
print(f"Model response:") |
|
print(response.split("assistant\n")[-1]) |
|
print("-" * 100) |
|
|
|
prompt = "cat" |
|
|
|
response = generate_response(model, tokenizer, prompt, device) |
|
print_response(response) |
|
``` |