Spaces:
Configuration error
Configuration error
File size: 4,040 Bytes
1d6bdab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from dotenv import load_dotenv
load_dotenv()
import os
from loguru import logger
from camel.models import ModelFactory
from camel.toolkits import (
AudioAnalysisToolkit,
CodeExecutionToolkit,
DocumentProcessingToolkit,
ExcelToolkit,
ImageAnalysisToolkit,
SearchToolkit,
VideoAnalysisToolkit,
WebToolkit,
)
from camel.types import ModelPlatformType, ModelType
from camel.configs import ChatGPTConfig
from utils import GAIABenchmark
# Configuration
LEVEL = 1
SAVE_RESULT = True
test_idx = [0]
def main():
"""Main function to run the GAIA benchmark."""
# Create cache directory
cache_dir = "tmp/"
os.makedirs(cache_dir, exist_ok=True)
# Create models for different components
models = {
"user": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"assistant": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"web": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"planning": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"video": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"image": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"search": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
}
# Configure toolkits
tools = [
*WebToolkit(
headless=False, # Set to True for headless mode (e.g., on remote servers)
web_agent_model=models["web"],
planning_agent_model=models["planning"],
).get_tools(),
*DocumentProcessingToolkit().get_tools(),
*VideoAnalysisToolkit(model=models["video"]).get_tools(), # This requires OpenAI Key
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
*SearchToolkit(model=models["search"]).get_tools(),
*ExcelToolkit().get_tools(),
]
# Configure agent roles and parameters
user_agent_kwargs = {"model": models["user"]}
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
# Initialize benchmark
benchmark = GAIABenchmark(
data_dir="data/gaia",
save_to=f"results/result.json"
)
# Print benchmark information
print(f"Number of validation examples: {len(benchmark.valid)}")
print(f"Number of test examples: {len(benchmark.test)}")
# Run benchmark
result = benchmark.run(
on="valid",
level=LEVEL,
idx=test_idx,
save_result=SAVE_RESULT,
user_role_name="user",
user_agent_kwargs=user_agent_kwargs,
assistant_role_name="assistant",
assistant_agent_kwargs=assistant_agent_kwargs,
)
# Output results
logger.success(f"Correct: {result['correct']}, Total: {result['total']}")
logger.success(f"Accuracy: {result['accuracy']}")
if __name__ == "__main__":
main()
|