Spaces:
Running
Running
add deep search benchmark
Browse files- deepsearch_result.jsonl +6 -6
deepsearch_result.jsonl
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-Base", "model": "qwq-32B", "overall": 0.419}
|
2 |
{"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-RL", "model": "qwq-32B", "overall": 0.465}
|
3 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
4 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
5 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
6 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
7 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
8 |
-
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent
|
9 |
{"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
|
10 |
{"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
|
11 |
{"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-8b", "overall": 0.457}
|
|
|
1 |
{"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-Base", "model": "qwq-32B", "overall": 0.419}
|
2 |
{"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-RL", "model": "qwq-32B", "overall": 0.465}
|
3 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwen2.5-7b-instruct", "overall": 0.36}
|
4 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwen2.5-32b-instruct", "overall": 0.384}
|
5 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwq-32b", "overall": 0.479}
|
6 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-32b-instruct", "overall": 0.514}
|
7 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwq-32b", "overall": 0.497}
|
8 |
+
{"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-72b-instruct", "overall": 0.522}
|
9 |
{"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
|
10 |
{"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
|
11 |
{"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-8b", "overall": 0.457}
|