Jack Monas
commited on
Commit
·
e584ed3
1
Parent(s):
64a5116
remove eval
Browse files
app.py
CHANGED
@@ -87,13 +87,13 @@ def scoring_section():
|
|
87 |
|
88 |
# Intro text
|
89 |
st.write(
|
90 |
-
"Scores combine points from
|
91 |
)
|
92 |
|
93 |
# Points Breakdown in a table
|
94 |
st.markdown("### Points Breakdown")
|
95 |
# Create three columns for a more interesting layout
|
96 |
-
col1, col2
|
97 |
|
98 |
with col1:
|
99 |
st.markdown('<h3 style="margin-left:15px;">Compression</h3>', unsafe_allow_html=True)
|
@@ -115,29 +115,19 @@ def scoring_section():
|
|
115 |
"""
|
116 |
)
|
117 |
|
118 |
-
with col3:
|
119 |
-
st.markdown('<h3 style="margin-left:15px;">Evaluation</h3>', unsafe_allow_html=True)
|
120 |
-
st.markdown(
|
121 |
-
"""
|
122 |
-
- **1st Place**: 20 points
|
123 |
-
- **2nd Place**: 14 points
|
124 |
-
- **3rd Place**: 10 points
|
125 |
-
"""
|
126 |
-
)
|
127 |
# Tie-Breakers in an expander for a cleaner layout
|
128 |
with st.expander("Tie-Breakers"):
|
129 |
st.write(
|
130 |
"The overall winner will be the team with the highest total points. "
|
131 |
"In the event of a tie, the following tie-breakers will be applied in order:\n\n"
|
132 |
-
"1. Highest
|
133 |
-
"2. Highest
|
134 |
-
"3. Highest Compression Challenge score\n\n"
|
135 |
)
|
136 |
|
137 |
# Overall Leaderboard Section
|
138 |
st.write(
|
139 |
-
"The leaderboard, which shows the total points across
|
140 |
-
"Additionally,
|
141 |
"respective Hugging Face submission servers."
|
142 |
)
|
143 |
|
@@ -150,7 +140,7 @@ def main():
|
|
150 |
st.title("1X World Model Challenge")
|
151 |
st.markdown("## Welcome")
|
152 |
st.write(
|
153 |
-
"Welcome to the 1X World Model Challenge. This platform hosts
|
154 |
)
|
155 |
st.write(
|
156 |
"In partnership with Open Drive Lab, we are launching this challenge as part of the [Autonomous Grand Challenge 2025](https://opendrivelab.com/challenge2025/), held in conjunction with the CVPR 2025 (confirmed) and ICCV 2025 (tentative) workshops."
|
@@ -187,11 +177,7 @@ def main():
|
|
187 |
st.write(
|
188 |
"In the Sampling Challenge, your task is to predict a future video frame two seconds in the future given a short clip of robot interactions. The goal is to produce a coherent and plausible continuation of the video, which accurately reflects the dynamics of the scene. Your submission will be judged on how closely it matches the actual frame."
|
189 |
)
|
190 |
-
|
191 |
-
st.markdown("#### Evaluation Challenge")
|
192 |
-
st.write(
|
193 |
-
"The Evaluation Challenge tackles the ultimate question: Can you predict a robot's performance in the real world without physically deploying it? In this challenge, you will be provided with many different policies for a specific task. The objective is to rank these policies according to their expected real-world performance. This ranking will be compared with the actual ranking of the policies."
|
194 |
-
)
|
195 |
|
196 |
st.markdown("**Note:** Links to the submission servers will be released on March 1st.")
|
197 |
|
@@ -293,9 +279,9 @@ def main():
|
|
293 |
|
294 |
st.markdown("## FAQs")
|
295 |
|
296 |
-
with st.expander("Do I have to participate in
|
297 |
st.write(
|
298 |
-
"No, you may choose to participate in one
|
299 |
)
|
300 |
|
301 |
with st.expander("Can I work in a team?"):
|
@@ -325,7 +311,7 @@ def main():
|
|
325 |
|
326 |
with st.expander("How is the Cosmos tokenizer used in the Tokenized Data dataset, and can we use a different tokenizer?"):
|
327 |
st.write(
|
328 |
-
"The `world_model_tokenized_data` dataset uses NVIDIA’s Discrete Video 8x8x8 Cosmos Tokenizer to convert raw 256x256 video into tokens. For the Compression Challenge, this tokenizer is mandatory for a consistent benchmark. Alternative tokenizers are permitted for the Sampling
|
329 |
)
|
330 |
|
331 |
with st.expander("What metrics are used to evaluate the Sampling Challenge submissions?"):
|
@@ -338,11 +324,6 @@ def main():
|
|
338 |
"Yes, you are welcome to use generative models such as diffusion models, GANs, or autoregressive approaches for the Sampling Challenge, as long as they adhere to the rules (e.g., no use of actual future frames during inference). The challenge evaluates the quality of the predicted frame, not the method used, so feel free to experiment with cutting-edge techniques to achieve plausible and accurate predictions."
|
339 |
)
|
340 |
|
341 |
-
with st.expander("How are policies provided in the Evaluation Challenge, and what does ‘ranking’ entail?"):
|
342 |
-
st.write(
|
343 |
-
"In the Evaluation Challenge, policies are provided as pre-trained models for a specific task (more details to come). Your task is to predict and rank these policies (e.g., Policy A > Policy B > Policy C) based on their expected success rate or efficiency in the real world. Your ranking is scored against the ground-truth ranking derived from physical deployments."
|
344 |
-
)
|
345 |
-
|
346 |
st.markdown("---")
|
347 |
|
348 |
st.markdown("## Data & Research Requests")
|
|
|
87 |
|
88 |
# Intro text
|
89 |
st.write(
|
90 |
+
"Scores combine points from both the Compression and Sampling Challenges. Final rankings are based on total points."
|
91 |
)
|
92 |
|
93 |
# Points Breakdown in a table
|
94 |
st.markdown("### Points Breakdown")
|
95 |
# Create three columns for a more interesting layout
|
96 |
+
col1, col2 = st.columns(2)
|
97 |
|
98 |
with col1:
|
99 |
st.markdown('<h3 style="margin-left:15px;">Compression</h3>', unsafe_allow_html=True)
|
|
|
115 |
"""
|
116 |
)
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
# Tie-Breakers in an expander for a cleaner layout
|
119 |
with st.expander("Tie-Breakers"):
|
120 |
st.write(
|
121 |
"The overall winner will be the team with the highest total points. "
|
122 |
"In the event of a tie, the following tie-breakers will be applied in order:\n\n"
|
123 |
+
"1. Highest Sampling Challenge score\n"
|
124 |
+
"2. Highest Compression Challenge score\n\n"
|
|
|
125 |
)
|
126 |
|
127 |
# Overall Leaderboard Section
|
128 |
st.write(
|
129 |
+
"The leaderboard, which shows the total points across the challenges, will go live on **March 10th**. "
|
130 |
+
"Additionally, both challenges—**Compression** and **Sampling**—will have its own leaderboard on their "
|
131 |
"respective Hugging Face submission servers."
|
132 |
)
|
133 |
|
|
|
140 |
st.title("1X World Model Challenge")
|
141 |
st.markdown("## Welcome")
|
142 |
st.write(
|
143 |
+
"Welcome to the 1X World Model Challenge. This platform hosts two challenges—Compression and Sampling—focused on advancing research in world models for robotics."
|
144 |
)
|
145 |
st.write(
|
146 |
"In partnership with Open Drive Lab, we are launching this challenge as part of the [Autonomous Grand Challenge 2025](https://opendrivelab.com/challenge2025/), held in conjunction with the CVPR 2025 (confirmed) and ICCV 2025 (tentative) workshops."
|
|
|
177 |
st.write(
|
178 |
"In the Sampling Challenge, your task is to predict a future video frame two seconds in the future given a short clip of robot interactions. The goal is to produce a coherent and plausible continuation of the video, which accurately reflects the dynamics of the scene. Your submission will be judged on how closely it matches the actual frame."
|
179 |
)
|
180 |
+
|
|
|
|
|
|
|
|
|
181 |
|
182 |
st.markdown("**Note:** Links to the submission servers will be released on March 1st.")
|
183 |
|
|
|
279 |
|
280 |
st.markdown("## FAQs")
|
281 |
|
282 |
+
with st.expander("Do I have to participate in both challenges?"):
|
283 |
st.write(
|
284 |
+
"No, you may choose to participate in one challenge. However, participating in both challenges may improve your overall ranking."
|
285 |
)
|
286 |
|
287 |
with st.expander("Can I work in a team?"):
|
|
|
311 |
|
312 |
with st.expander("How is the Cosmos tokenizer used in the Tokenized Data dataset, and can we use a different tokenizer?"):
|
313 |
st.write(
|
314 |
+
"The `world_model_tokenized_data` dataset uses NVIDIA’s Discrete Video 8x8x8 Cosmos Tokenizer to convert raw 256x256 video into tokens. For the Compression Challenge, this tokenizer is mandatory for a consistent benchmark. Alternative tokenizers are permitted for the Sampling Challenge."
|
315 |
)
|
316 |
|
317 |
with st.expander("What metrics are used to evaluate the Sampling Challenge submissions?"):
|
|
|
324 |
"Yes, you are welcome to use generative models such as diffusion models, GANs, or autoregressive approaches for the Sampling Challenge, as long as they adhere to the rules (e.g., no use of actual future frames during inference). The challenge evaluates the quality of the predicted frame, not the method used, so feel free to experiment with cutting-edge techniques to achieve plausible and accurate predictions."
|
325 |
)
|
326 |
|
|
|
|
|
|
|
|
|
|
|
327 |
st.markdown("---")
|
328 |
|
329 |
st.markdown("## Data & Research Requests")
|