| """Module containing the AlpacaQAPromptTokenizingStrategy class""" | |
| from typing import Tuple | |
| from axolotl.prompt_tokenizers import ( | |
| AlpacaPromptTokenizingStrategy, | |
| InstructionPromptTokenizingStrategy, | |
| ) | |
| from axolotl.prompters import AlpacaPrompter, PromptStyle | |
| def load(tokenizer, cfg): | |
| return AlpacaPromptTokenizingStrategy( | |
| AlpacaPrompter(PromptStyle.CHAT.value), | |
| tokenizer, | |
| cfg.train_on_inputs, | |
| cfg.sequence_len, | |
| ) | |
| class AlpacaConcisePrompter(AlpacaPrompter): | |
| """ | |
| Alpaca Prompter extending the system prompt to ask for concise answers | |
| """ | |
| system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that concisely and appropriately completes the request.\n\n" | |
| system_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately and concisely completes the request.\n\n" | |
| class AlpacaQAPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): | |
| """ | |
| Tokenizing strategy for AlpacaQA | |
| """ | |
| def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: | |
| return ( | |
| prompt["question"], | |
| "", | |
| prompt["answer"], | |
| ) | |
| class CamelAIPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): | |
| """ | |
| Tokenizing strategy for CamelAI datasets | |
| """ | |
| def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: | |
| return ( | |
| prompt["message_1"], | |
| "", | |
| prompt["message_2"], | |
| ) | |
| def load_concise(tokenizer, cfg): | |
| return AlpacaPromptTokenizingStrategy( | |
| AlpacaConcisePrompter(PromptStyle.CHAT.value), | |
| tokenizer, | |
| cfg.train_on_inputs, | |
| cfg.sequence_len, | |
| ) | |
| def load_qa(tokenizer, cfg): | |
| return AlpacaQAPromptTokenizingStrategy( | |
| AlpacaPrompter(PromptStyle.CHAT.value), | |
| tokenizer, | |
| cfg.train_on_inputs, | |
| cfg.sequence_len, | |
| ) | |
| def load_camel_ai(tokenizer, cfg): | |
| return CamelAIPromptTokenizingStrategy( | |
| AlpacaPrompter(PromptStyle.CHAT.value), | |
| tokenizer, | |
| cfg.train_on_inputs, | |
| cfg.sequence_len, | |
| ) | |