Spaces:
Configuration error
Configuration error
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
import re | |
from pydantic import BaseModel, Field, field_validator | |
class AlpacaItem(BaseModel): | |
r"""Represents an instruction-response item in the Alpaca format. | |
Appropripate for both cases where input field is empty, or populated. | |
Provides parsing from string format using the class method from_string(). | |
Args: | |
instruction (str): The instruction/question/prompt | |
input (str): Input context or examples (put empty string if none) | |
output (str): The response/answer to the instruction | |
""" | |
instruction: str = Field(description="The instruction/question/prompt") | |
input: str = Field( | |
description="Optional context or input for the task." | |
" For example, when the instruction is \"Summarize the " | |
"following article\", the input is the article." | |
) | |
output: str = Field(description="The response/answer to the instruction") | |
def no_section_markers(cls, value: str) -> str: | |
r"""Ensures fields don't contain section markers like '### | |
Response:' | |
""" | |
if ( | |
'### Response' in value | |
or '### Instruction' in value | |
or '### Input' in value | |
): | |
raise ValueError("Field cannot contain section markers") | |
return value.strip() | |
def from_string(cls, text: str) -> "AlpacaItem": | |
r"""Creates an AlpacaItem from a formatted string. | |
Args: | |
text: String in either of these formats: | |
With input: | |
### Instruction: | |
{instruction} | |
### Input: | |
{input} | |
### Response: | |
{response} | |
Without input: | |
### Instruction: | |
{instruction} | |
### Response: | |
{response} | |
Returns: | |
AlpacaItem: Parsed instance | |
Raises: | |
ValueError: text doesn't match expected format or sections missing | |
""" | |
# Strip and standardize newlines | |
text = text.strip().replace('\r\n', '\n') | |
# Try to extract sections using regex | |
instruction_match = re.search( | |
r'###\s*Instruction:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
) | |
input_match = re.search( | |
r'###\s*Input:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
) | |
response_match = re.search( | |
r'###\s*Response:\s*\n(.+?)(?=\n###|\Z)', text, re.DOTALL | |
) | |
if not instruction_match or not response_match: | |
raise ValueError( | |
"Text must contain '### Instruction:'" | |
" and '### Response:' sections" | |
) | |
return cls( | |
instruction=instruction_match.group(1).strip(), | |
input=input_match.group(1).strip() if input_match else "", | |
output=response_match.group(1).strip(), | |
) | |
def to_string(self) -> str: | |
r"""Converts the AlpacaItem to its string representation. | |
Returns: | |
str: Formatted string representation with sections markers | |
""" | |
return "\n".join( | |
[ | |
"### Instruction:", | |
self.instruction, | |
"", | |
"### Input:", | |
self.input, | |
"", | |
"### Response:", | |
self.output, | |
] | |
) | |