Update README.md
Browse files
README.md
CHANGED
@@ -108,11 +108,8 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
108 |
```
|
109 |
|
110 |
## Evaluation
|
|
|
111 |
|
112 |
-
<details>
|
113 |
-
<summary>
|
114 |
-
SimplerEnv evaluation on Google Robot tasks.
|
115 |
-
</summary>
|
116 |
<table border="1" class="dataframe">
|
117 |
<thead>
|
118 |
<tr style="text-align: center;">
|
@@ -278,137 +275,131 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
278 |
</tbody>
|
279 |
</table>
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
<
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
<
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
</
|
298 |
-
<
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
<
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
<
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
<
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
<
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
<
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
<
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
<
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
<
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
</tbody>
|
407 |
-
</table>
|
408 |
-
</details>
|
409 |
-
|
410 |
-
<details>
|
411 |
-
<summary>LIBERO Simulation Benchmark Results.</summary>
|
412 |
<table border="1" class="dataframe">
|
413 |
<thead>
|
414 |
<tr style="text-align: center;">
|
@@ -501,22 +492,20 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
501 |
</tbody>
|
502 |
</table>
|
503 |
|
504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
|
506 |
-
|
507 |
-
<summary>Zero-shot Robot Control Evaluation on WidowX Robot.</summary>
|
508 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
|
509 |
-
</details>
|
510 |
|
511 |
-
<
|
512 |
-
<summary>Spatial Understanding Capability Evaluation..</summary>
|
513 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
|
514 |
-
</details>
|
515 |
|
516 |
-
<details>
|
517 |
-
<summary>Adapting to New Robot Setups on Franka Robot.</summary>
|
518 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
|
519 |
-
</details>
|
520 |
|
521 |
|
522 |
## Citation
|
|
|
108 |
```
|
109 |
|
110 |
## Evaluation
|
111 |
+
- SimplerEnv evaluation on Google Robot tasks.
|
112 |
|
|
|
|
|
|
|
|
|
113 |
<table border="1" class="dataframe">
|
114 |
<thead>
|
115 |
<tr style="text-align: center;">
|
|
|
275 |
</tbody>
|
276 |
</table>
|
277 |
|
278 |
+
- SimplerEnv evaluation on WidowX Robot tasks.
|
279 |
+
|
280 |
+
<table border="1" class="dataframe">
|
281 |
+
<thead>
|
282 |
+
<tr style="text-align: center;">
|
283 |
+
<th rowspan="2">Model</th>
|
284 |
+
<th colspan="2">Put Spoon on Towel</th>
|
285 |
+
<th colspan="2">Put Carrot on Plate</th>
|
286 |
+
<th colspan="2">Stack Green Block on Yellow Block</th>
|
287 |
+
<th colspan="2">Put Eggplant in Yellow Basket</th>
|
288 |
+
<th rowspan="2">#Overall Average</th>
|
289 |
+
</tr>
|
290 |
+
<tr style="text-align: center;">
|
291 |
+
<th>Grasp Spoon</th>
|
292 |
+
<th>Success</th>
|
293 |
+
<th>Grasp Carrot</th>
|
294 |
+
<th>Success</th>
|
295 |
+
<th>Grasp Green Block</th>
|
296 |
+
<th>Success</th>
|
297 |
+
<th>Grasp Eggplant</th>
|
298 |
+
<th>Success</th>
|
299 |
+
</tr>
|
300 |
+
</thead>
|
301 |
+
<tbody>
|
302 |
+
<tr>
|
303 |
+
<td>RT-1-X</td>
|
304 |
+
<td>16.7%</td>
|
305 |
+
<td>0.0%</td>
|
306 |
+
<td>20.8%</td>
|
307 |
+
<td>4.2%</td>
|
308 |
+
<td>8.3%</td>
|
309 |
+
<td>0.0%</td>
|
310 |
+
<td>0.0%</td>
|
311 |
+
<td>0.0%</td>
|
312 |
+
<td>1.1%</td>
|
313 |
+
</tr>
|
314 |
+
<tr>
|
315 |
+
<td>Octo-Base</td>
|
316 |
+
<td>34.7%</td>
|
317 |
+
<td>12.5%</td>
|
318 |
+
<td>52.8%</td>
|
319 |
+
<td>8.3%</td>
|
320 |
+
<td>31.9%</td>
|
321 |
+
<td>0.0%</td>
|
322 |
+
<td>66.7%</td>
|
323 |
+
<td>43.1%</td>
|
324 |
+
<td>16.0%</td>
|
325 |
+
</tr>
|
326 |
+
<tr>
|
327 |
+
<td>Octo-Small</td>
|
328 |
+
<td>77.8%</td>
|
329 |
+
<td>47.2%</td>
|
330 |
+
<td>27.8%</td>
|
331 |
+
<td>9.7%</td>
|
332 |
+
<td>40.3%</td>
|
333 |
+
<td>4.2%</td>
|
334 |
+
<td>87.5%</td>
|
335 |
+
<td>56.9%</td>
|
336 |
+
<td>30.0%</td>
|
337 |
+
</tr>
|
338 |
+
<tr>
|
339 |
+
<td>OpenVLA</td>
|
340 |
+
<td>4.1%</td>
|
341 |
+
<td>0.0%</td>
|
342 |
+
<td>33.3%</td>
|
343 |
+
<td>0.0%</td>
|
344 |
+
<td>12.5%</td>
|
345 |
+
<td>0.0%</td>
|
346 |
+
<td>8.3%</td>
|
347 |
+
<td>4.1%</td>
|
348 |
+
<td>1.0%</td>
|
349 |
+
</tr>
|
350 |
+
<tr>
|
351 |
+
<td>RoboVLM (zero-shot)</td>
|
352 |
+
<td>37.5%</td>
|
353 |
+
<td>20.8%</td>
|
354 |
+
<td>33.3%</td>
|
355 |
+
<td>25.0%</td>
|
356 |
+
<td>8.3%</td>
|
357 |
+
<td>8.3%</td>
|
358 |
+
<td>0.0%</td>
|
359 |
+
<td>0.0%</td>
|
360 |
+
<td>13.5%</td>
|
361 |
+
</tr>
|
362 |
+
<tr>
|
363 |
+
<td>RoboVLM (fine-tuning)</td>
|
364 |
+
<td>54.2%</td>
|
365 |
+
<td>29.2%</td>
|
366 |
+
<td>25.0%</td>
|
367 |
+
<td>25.0%</td>
|
368 |
+
<td>45.8%</td>
|
369 |
+
<td>12.5%</td>
|
370 |
+
<td>58.3%</td>
|
371 |
+
<td>58.3%</td>
|
372 |
+
<td>31.3%</td>
|
373 |
+
</tr>
|
374 |
+
<tr>
|
375 |
+
<td>SpatialVLA (zero-shot)</td>
|
376 |
+
<td><b>25.0%</b></td>
|
377 |
+
<td><b>20.8%</b></td>
|
378 |
+
<td><b>41.7%</b></td>
|
379 |
+
<td>20.8%</td>
|
380 |
+
<td><b>58.3%</b></td>
|
381 |
+
<td>25.0%</td>
|
382 |
+
<td><b>79.2%</b></td>
|
383 |
+
<td>70.8%</td>
|
384 |
+
<td><b>34.4%</b></td>
|
385 |
+
</tr>
|
386 |
+
<tr>
|
387 |
+
<td>SpatialVLA (fine-tuning)</td>
|
388 |
+
<td><b>20.8%</b></td>
|
389 |
+
<td>16.7%</td>
|
390 |
+
<td>29.2%</td>
|
391 |
+
<td>25.0%</td>
|
392 |
+
<td><b>62.5%</b></td>
|
393 |
+
<td>29.2%</td>
|
394 |
+
<td><b>100.0%</b></td>
|
395 |
+
<td><b>100.0%</b></td>
|
396 |
+
<td><b>42.7%</b></td>
|
397 |
+
</tr>
|
398 |
+
</tbody>
|
399 |
+
</table>
|
400 |
+
|
401 |
+
- LIBERO Simulation Benchmark Results.
|
402 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
<table border="1" class="dataframe">
|
404 |
<thead>
|
405 |
<tr style="text-align: center;">
|
|
|
492 |
</tbody>
|
493 |
</table>
|
494 |
|
495 |
+
- Zero-shot Robot Control Evaluation on WidowX Robot.
|
496 |
+
|
497 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
|
498 |
+
|
499 |
+
|
500 |
+
- Spatial Understanding Capability Evaluation.
|
501 |
+
|
502 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
|
503 |
+
|
504 |
|
505 |
+
- Adapting to New Robot Setups on Franka Robot.
|
|
|
|
|
|
|
506 |
|
507 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
|
|
|
|
|
|
|
508 |
|
|
|
|
|
|
|
|
|
509 |
|
510 |
|
511 |
## Citation
|