IPEC-COMMUNITY
/

spatialvla-4b-224-pt

@@ -108,11 +108,8 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
 ```
 ## Evaluation
-<details>
-  <summary>
-  SimplerEnv evaluation on Google Robot tasks.
-  </summary>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: center;">
@@ -278,137 +275,131 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
   </tbody>
 </table>
-</details>
-<details>
-  <summary>
-  SimplerEnv evaluation on WidowX Robot tasks.
-  </summary>
-  <table border="1" class="dataframe">
-    <thead>
-      <tr style="text-align: center;">
-        <th rowspan="2">Model</th>
-        <th colspan="2">Put Spoon on Towel</th>
-        <th colspan="2">Put Carrot on Plate</th>
-        <th colspan="2">Stack Green Block on Yellow Block</th>
-        <th colspan="2">Put Eggplant in Yellow Basket</th>
-        <th rowspan="2">#Overall Average</th>
-      </tr>
-      <tr style="text-align: center;">
-        <th>Grasp Spoon</th>
-        <th>Success</th>
-        <th>Grasp Carrot</th>
-        <th>Success</th>
-        <th>Grasp Green Block</th>
-        <th>Success</th>
-        <th>Grasp Eggplant</th>
-        <th>Success</th>
-      </tr>
-    </thead>
-    <tbody>
-      <tr>
-        <td>RT-1-X</td>
-        <td>16.7%</td>
-        <td>0.0%</td>
-        <td>20.8%</td>
-        <td>4.2%</td>
-        <td>8.3%</td>
-        <td>0.0%</td>
-        <td>0.0%</td>
-        <td>0.0%</td>
-        <td>1.1%</td>
-      </tr>
-      <tr>
-        <td>Octo-Base</td>
-        <td>34.7%</td>
-        <td>12.5%</td>
-        <td>52.8%</td>
-        <td>8.3%</td>
-        <td>31.9%</td>
-        <td>0.0%</td>
-        <td>66.7%</td>
-        <td>43.1%</td>
-        <td>16.0%</td>
-      </tr>
-      <tr>
-        <td>Octo-Small</td>
-        <td>77.8%</td>
-        <td>47.2%</td>
-        <td>27.8%</td>
-        <td>9.7%</td>
-        <td>40.3%</td>
-        <td>4.2%</td>
-        <td>87.5%</td>
-        <td>56.9%</td>
-        <td>30.0%</td>
-      </tr>
-      <tr>
-        <td>OpenVLA</td>
-        <td>4.1%</td>
-        <td>0.0%</td>
-        <td>33.3%</td>
-        <td>0.0%</td>
-        <td>12.5%</td>
-        <td>0.0%</td>
-        <td>8.3%</td>
-        <td>4.1%</td>
-        <td>1.0%</td>
-      </tr>
-      <tr>
-        <td>RoboVLM (zero-shot)</td>
-        <td>37.5%</td>
-        <td>20.8%</td>
-        <td>33.3%</td>
-        <td>25.0%</td>
-        <td>8.3%</td>
-        <td>8.3%</td>
-        <td>0.0%</td>
-        <td>0.0%</td>
-        <td>13.5%</td>
-      </tr>
-      <tr>
-        <td>RoboVLM (fine-tuning)</td>
-        <td>54.2%</td>
-        <td>29.2%</td>
-        <td>25.0%</td>
-        <td>25.0%</td>
-        <td>45.8%</td>
-        <td>12.5%</td>
-        <td>58.3%</td>
-        <td>58.3%</td>
-        <td>31.3%</td>
-      </tr>
-      <tr>
-        <td>SpatialVLA (zero-shot)</td>
-        <td><b>25.0%</b></td>
-        <td><b>20.8%</b></td>
-        <td><b>41.7%</b></td>
-        <td>20.8%</td>
-        <td><b>58.3%</b></td>
-        <td>25.0%</td>
-        <td><b>79.2%</b></td>
-        <td>70.8%</td>
-        <td><b>34.4%</b></td>
-      </tr>
-      <tr>
-        <td>SpatialVLA (fine-tuning)</td>
-        <td><b>20.8%</b></td>
-        <td>16.7%</td>
-        <td>29.2%</td>
-        <td>25.0%</td>
-        <td><b>62.5%</b></td>
-        <td>29.2%</td>
-        <td><b>100.0%</b></td>
-        <td><b>100.0%</b></td>
-        <td><b>42.7%</b></td>
-      </tr>
-    </tbody>
-  </table>
-</details>
-<details>
-  <summary>LIBERO Simulation Benchmark Results.</summary>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: center;">
@@ -501,22 +492,20 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
   </tbody>
 </table>
-</details>
-<details>
-  <summary>Zero-shot Robot Control Evaluation on WidowX Robot.</summary>
-  <img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
-</details>
-<details>
-  <summary>Spatial Understanding Capability Evaluation..</summary>
-  <img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
-</details>
-<details>
-  <summary>Adapting to New Robot Setups on Franka Robot.</summary>
-  <img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
-</details>
 ## Citation

 ```
 ## Evaluation
+- SimplerEnv evaluation on Google Robot tasks.
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: center;">
   </tbody>
 </table>
+- SimplerEnv evaluation on WidowX Robot tasks.
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: center;">
+      <th rowspan="2">Model</th>
+      <th colspan="2">Put Spoon on Towel</th>
+      <th colspan="2">Put Carrot on Plate</th>
+      <th colspan="2">Stack Green Block on Yellow Block</th>
+      <th colspan="2">Put Eggplant in Yellow Basket</th>
+      <th rowspan="2">#Overall Average</th>
+    </tr>
+    <tr style="text-align: center;">
+      <th>Grasp Spoon</th>
+      <th>Success</th>
+      <th>Grasp Carrot</th>
+      <th>Success</th>
+      <th>Grasp Green Block</th>
+      <th>Success</th>
+      <th>Grasp Eggplant</th>
+      <th>Success</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>RT-1-X</td>
+      <td>16.7%</td>
+      <td>0.0%</td>
+      <td>20.8%</td>
+      <td>4.2%</td>
+      <td>8.3%</td>
+      <td>0.0%</td>
+      <td>0.0%</td>
+      <td>0.0%</td>
+      <td>1.1%</td>
+    </tr>
+    <tr>
+      <td>Octo-Base</td>
+      <td>34.7%</td>
+      <td>12.5%</td>
+      <td>52.8%</td>
+      <td>8.3%</td>
+      <td>31.9%</td>
+      <td>0.0%</td>
+      <td>66.7%</td>
+      <td>43.1%</td>
+      <td>16.0%</td>
+    </tr>
+    <tr>
+      <td>Octo-Small</td>
+      <td>77.8%</td>
+      <td>47.2%</td>
+      <td>27.8%</td>
+      <td>9.7%</td>
+      <td>40.3%</td>
+      <td>4.2%</td>
+      <td>87.5%</td>
+      <td>56.9%</td>
+      <td>30.0%</td>
+    </tr>
+    <tr>
+      <td>OpenVLA</td>
+      <td>4.1%</td>
+      <td>0.0%</td>
+      <td>33.3%</td>
+      <td>0.0%</td>
+      <td>12.5%</td>
+      <td>0.0%</td>
+      <td>8.3%</td>
+      <td>4.1%</td>
+      <td>1.0%</td>
+    </tr>
+    <tr>
+      <td>RoboVLM (zero-shot)</td>
+      <td>37.5%</td>
+      <td>20.8%</td>
+      <td>33.3%</td>
+      <td>25.0%</td>
+      <td>8.3%</td>
+      <td>8.3%</td>
+      <td>0.0%</td>
+      <td>0.0%</td>
+      <td>13.5%</td>
+    </tr>
+    <tr>
+      <td>RoboVLM (fine-tuning)</td>
+      <td>54.2%</td>
+      <td>29.2%</td>
+      <td>25.0%</td>
+      <td>25.0%</td>
+      <td>45.8%</td>
+      <td>12.5%</td>
+      <td>58.3%</td>
+      <td>58.3%</td>
+      <td>31.3%</td>
+    </tr>
+    <tr>
+      <td>SpatialVLA (zero-shot)</td>
+      <td><b>25.0%</b></td>
+      <td><b>20.8%</b></td>
+      <td><b>41.7%</b></td>
+      <td>20.8%</td>
+      <td><b>58.3%</b></td>
+      <td>25.0%</td>
+      <td><b>79.2%</b></td>
+      <td>70.8%</td>
+      <td><b>34.4%</b></td>
+    </tr>
+    <tr>
+      <td>SpatialVLA (fine-tuning)</td>
+      <td><b>20.8%</b></td>
+      <td>16.7%</td>
+      <td>29.2%</td>
+      <td>25.0%</td>
+      <td><b>62.5%</b></td>
+      <td>29.2%</td>
+      <td><b>100.0%</b></td>
+      <td><b>100.0%</b></td>
+      <td><b>42.7%</b></td>
+    </tr>
+  </tbody>
+</table>
+- LIBERO Simulation Benchmark Results.
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: center;">
   </tbody>
 </table>
+- Zero-shot Robot Control Evaluation on WidowX Robot.
+<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
+- Spatial Understanding Capability Evaluation.
+<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
+- Adapting to New Robot Setups on Franka Robot.
+<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
 ## Citation