callanwu commited on
Commit
0b51b5a
·
1 Parent(s): 41100e9

add deep search benchmark

Browse files
Files changed (1) hide show
  1. deepsearch_result.jsonl +1 -0
deepsearch_result.jsonl CHANGED
@@ -7,6 +7,7 @@
7
  {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwq-32b", "overall": 0.497}
8
  {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-72b-instruct", "overall": 0.522}
9
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
 
10
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
11
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-8b", "overall": 0.457}
12
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-32b", "overall": 0.457}
 
7
  {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwq-32b", "overall": 0.497}
8
  {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-72b-instruct", "overall": 0.522}
9
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
10
+ {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3-0324", "overall": 0.6071}
11
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
12
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-8b", "overall": 0.457}
13
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-32b", "overall": 0.457}