Spaces:
Configuration error
Configuration error
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
import json | |
import logging | |
from functools import wraps | |
from typing import List, Optional, Union | |
from camel.agents import ChatAgent | |
from camel.configs import ChatGPTConfig | |
from camel.models import BaseModelBackend, ModelFactory | |
from camel.runtime import BaseRuntime | |
from camel.runtime.utils import FunctionRiskToolkit, IgnoreRiskToolkit | |
from camel.toolkits import FunctionTool | |
from camel.types import ModelPlatformType, ModelType | |
logger = logging.getLogger(__name__) | |
GUARDPROMPT = """You are a function safety evaluator tasked with assessing the | |
potential risk level of a given function based on both its description | |
and parameters. Your goal is to determine if the function may pose any | |
harm to the user's environment, such as deleting files, executing | |
arbitrary code, or accessing sensitive information. Pay special attention | |
to the provided parameters — even if a function has the potential to be | |
dangerous, its actual parameters may indicate harmless behavior, and the | |
risk level should be adjusted accordingly. Use the `function_risk` tool to | |
assign a risk score to the function, based on the following criteria: | |
- **Score 1**: No harm. This includes simple operations like mathematical | |
calculations, content searches, or data retrievals that do not impact | |
the user's environment or access sensitive resources. This also | |
includes functions with potentially dangerous capabilities that have | |
harmless, controlled parameters that ensure safe execution. | |
- **Score 2**: Minimal harm. The function might read user files, retrieve | |
non-sensitive data, or access other low-risk resources, | |
posing little risk to the user. | |
- **Score 3**: Risk present. The function might delete files, modify the | |
file system, execute arbitrary code, or access sensitive data, which | |
could negatively impact the user's environment. However, if the | |
actual parameters provided clearly indicate safe and restricted | |
usage, this risk should be downgraded accordingly. | |
When evaluating, always consider both the function's description and its | |
specific parameters. If the function appears risky due to its design but | |
the provided parameters indicate a safe and non-impactful operation, | |
adjust the risk score to reflect this. Assign an appropriate risk score | |
and provide a brief explanation of your reasoning based on the function's | |
description and the actual parameters given. | |
YOU MUST USE THE `function_risk` TOOL TO ASSESS THE RISK | |
LEVEL OF EACH FUNCTION. | |
""" | |
class LLMGuardRuntime(BaseRuntime): | |
r"""A runtime that evaluates the risk level of functions using | |
a language model. | |
Arguments: | |
prompt (str): The prompt to use for the language model. (default: | |
:obj:`GUARDPROMPT`) | |
model (BaseModelBackend): The language model to use. (default::obj: | |
`None`) | |
verbose (bool): Whether to print verbose output. (default::obj: | |
`False`) | |
""" | |
def __init__( | |
self, | |
prompt: str = GUARDPROMPT, | |
model: Optional[BaseModelBackend] = None, | |
verbose: bool = False, | |
): | |
super().__init__() | |
self.prompt = prompt | |
self.model = model | |
self.verbose = verbose | |
if not self.model: | |
self.model = ModelFactory.create( | |
model_platform=ModelPlatformType.DEFAULT, | |
model_type=ModelType.DEFAULT, | |
model_config_dict=ChatGPTConfig().as_dict(), | |
) | |
self.ignore_toolkit = IgnoreRiskToolkit(verbose=verbose) | |
self.ignore_tool = self.ignore_toolkit.get_tools()[0] | |
self.tools_map[self.ignore_tool.get_function_name()] = self.ignore_tool | |
self.agent = ChatAgent( | |
system_message=self.prompt, | |
model=self.model, | |
external_tools=[ | |
*FunctionRiskToolkit(verbose=verbose).get_tools(), | |
], | |
) | |
def add( # type: ignore[override] | |
self, | |
funcs: Union[FunctionTool, List[FunctionTool]], | |
threshold: int = 2, | |
) -> "LLMGuardRuntime": | |
r"""Add a function or list of functions to the runtime. | |
Args: | |
funcs (FunctionTool or List[FunctionTool]): The function or | |
list of functions to add. | |
threshold (int): The risk threshold for functions. | |
(default::obj:`2`) | |
Returns: | |
LLMGuardRuntime: The current runtime. | |
""" | |
if not isinstance(funcs, list): | |
funcs = [funcs] | |
for func in funcs: | |
inner_func = func.func | |
# Create a wrapper that explicitly binds `func` | |
def wrapper( | |
*args, | |
func=func, | |
inner_func=inner_func, | |
threshold=threshold, | |
**kwargs, | |
): | |
function_name = func.get_function_name() | |
if function_name in self.ignore_toolkit.ignored_risks: | |
reason = self.ignore_toolkit.ignored_risks.pop( | |
function_name | |
) | |
logger.info( | |
f"Ignored risk for function {function_name}: {reason}" | |
) | |
return inner_func(*args, **kwargs) | |
self.agent.init_messages() | |
resp = self.agent.step( | |
f""" | |
Function is: {function_name} | |
Function description: {func.get_function_description()} | |
Args: {args} | |
Kwargs: {kwargs} | |
""" | |
) | |
tool_call = resp.info.get("external_tool_request", None) | |
if not tool_call: | |
logger.error("No tool call found in response.") | |
return { | |
"error": "Risk assessment failed. Disabling function." | |
} | |
data = tool_call.function.arguments | |
data = json.loads(data) | |
if threshold < data["score"]: | |
message = ( | |
f"Risk assessment not passed for {function_name}." | |
f"Score: {data['score']} > Threshold: {threshold}" | |
f"\nReason: {data['reason']}" | |
) | |
logger.warning(message) | |
return {"error": message} | |
logger.info( | |
( | |
f"Function {function_name} passed risk assessment." | |
f"Score: {data['score']}, Reason: {data['reason']}" | |
) | |
) | |
if self.verbose: | |
print( | |
( | |
f"Function {function_name} passed risk assessment." | |
f"Score: {data['score']}, Reason: {data['reason']}" | |
) | |
) | |
return inner_func(*args, **kwargs) | |
func.func = wrapper | |
self.tools_map[func.get_function_name()] = func | |
self.ignore_toolkit.add(func.get_function_name()) | |
return self | |
def reset(self) -> "LLMGuardRuntime": | |
r"""Resets the runtime to its initial state.""" | |
self.ignore_toolkit.ignored_risks = dict() | |
self.agent.reset() | |
return self | |