|
--- |
|
license: apache-2.0 |
|
language: |
|
- en |
|
base_model: |
|
- microsoft/Phi-3.5-mini-instruct |
|
pipeline_tag: text-generation |
|
library_name: transformers |
|
--- |
|
|
|
# Pico V1 |
|
|
|
Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection. |
|
|
|
When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section. |
|
|
|
Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc). |
|
|
|
Here is a example of how you can use it: |
|
|
|
```from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
phi3_template = ( |
|
"{{ bos_token }}" |
|
"{% for message in messages %}" |
|
"{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}" |
|
"{% endfor %}" |
|
"{% if add_generation_prompt %}" |
|
"{{ '<|assistant|>\\n' }}" |
|
"{% endif %}" |
|
) |
|
phi3_template_eos_token = "<|end|>" |
|
|
|
def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True): |
|
""" |
|
Build a prompt using the PHI 3.5 template. |
|
""" |
|
prompt = bos_token |
|
for message in messages: |
|
prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n" |
|
if add_generation_prompt: |
|
prompt += "<|assistant|>\n" |
|
return prompt |
|
|
|
def chat_with_model(): |
|
# Load the model and tokenizer |
|
model_name = "LucidityAI/Pico-v1-3b" |
|
print("Loading model and tokenizer...") |
|
|
|
# Enforce GPU usage |
|
if not torch.cuda.is_available(): |
|
raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.") |
|
|
|
device = torch.device("cuda") |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) |
|
print("Model and tokenizer loaded successfully.") |
|
|
|
# Chat loop |
|
print("Start chatting with the model! Type 'exit' to quit.") |
|
conversation = [] |
|
while True: |
|
user_input = input("You: ") |
|
if user_input.lower() == "exit": |
|
print("Goodbye!") |
|
break |
|
|
|
# Append user's message to the conversation |
|
conversation.append({"role": "user", "content": user_input}) |
|
|
|
# Build the input prompt using the PHI 3.5 template |
|
prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>") |
|
|
|
# Tokenize the input prompt |
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device) |
|
|
|
# Generate a response |
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_length=1024, |
|
num_return_sequences=1, |
|
temperature=0.5, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
# Decode the response |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
# Extract the assistant's reply |
|
assistant_reply = response[len(prompt):].strip() |
|
print(f"Model: {assistant_reply}") |
|
|
|
# Append the assistant's reply to the conversation |
|
conversation.append({"role": "assistant", "content": assistant_reply}) |
|
|
|
if __name__ == "__main__": |
|
chat_with_model() |
|
``` |