Update index.html
Browse files- index.html +137 -419
index.html
CHANGED
@@ -1,435 +1,153 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
<
|
30 |
-
|
31 |
-
<
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
<
|
41 |
-
|
42 |
-
|
43 |
-
<a href="https://jonbarron.info" target="_blank">Jonathan T. Barron</a><sup>2</sup>,
|
44 |
-
</span>
|
45 |
-
<span class="author-block">
|
46 |
-
<a href="http://sofienbouaziz.com" target="_blank">Sofien Bouaziz</a><sup>2</sup>,
|
47 |
-
</span>
|
48 |
-
<span class="author-block">
|
49 |
-
<a href="https://www.danbgoldman.com" target="_blank">Dan B Goldman</a><sup>2</sup>,
|
50 |
-
</span>
|
51 |
-
<span class="author-block">
|
52 |
-
<a href="https://homes.cs.washington.edu/~seitz/" target="_blank">Steven M. Seitz</a><sup>1,2</sup>,
|
53 |
-
</span>
|
54 |
-
<span class="author-block">
|
55 |
-
<a href="http://www.ricardomartinbrualla.com" target="_blank">Ricardo Martin-Brualla</a><sup>2</sup>
|
56 |
-
</span>
|
57 |
-
</div>
|
58 |
-
|
59 |
-
<div class="is-size-5 publication-authors">
|
60 |
-
<span class="author-block"><sup>1</sup>University of Washington,</span>
|
61 |
-
<span class="author-block"><sup>2</sup>Google Research</span>
|
62 |
</div>
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
</a>
|
75 |
-
</span>
|
76 |
-
<span class="link-block">
|
77 |
-
<a href="https://arxiv.org/abs/2011.12948" target="_blank"
|
78 |
-
class="external-link button is-normal is-rounded is-dark">
|
79 |
-
<span class="icon">
|
80 |
-
<i class="ai ai-arxiv"></i>
|
81 |
-
</span>
|
82 |
-
<span>arXiv</span>
|
83 |
-
</a>
|
84 |
-
</span>
|
85 |
-
<!-- Video Link. -->
|
86 |
-
<span class="link-block">
|
87 |
-
<a href="https://www.youtube.com/watch?v=MrKrnHhk8IA" target="_blank"
|
88 |
-
class="external-link button is-normal is-rounded is-dark">
|
89 |
-
<span class="icon">
|
90 |
-
<i class="fab fa-youtube"></i>
|
91 |
-
</span>
|
92 |
-
<span>Video</span>
|
93 |
-
</a>
|
94 |
-
</span>
|
95 |
-
<!-- Code Link. -->
|
96 |
-
<span class="link-block">
|
97 |
-
<a href="https://github.com/google/nerfies" target="_blank"
|
98 |
-
class="external-link button is-normal is-rounded is-dark">
|
99 |
-
<span class="icon">
|
100 |
-
<i class="fab fa-github"></i>
|
101 |
-
</span>
|
102 |
-
<span>Code</span>
|
103 |
-
</a>
|
104 |
-
</span>
|
105 |
-
<!-- Dataset Link. -->
|
106 |
-
<span class="link-block">
|
107 |
-
<a href="https://github.com/google/nerfies/releases/tag/0.1" target="_blank"
|
108 |
-
class="external-link button is-normal is-rounded is-dark">
|
109 |
-
<span class="icon">
|
110 |
-
<i class="far fa-images"></i>
|
111 |
-
</span>
|
112 |
-
<span>Data</span>
|
113 |
-
</a>
|
114 |
-
</div>
|
115 |
-
|
116 |
</div>
|
117 |
</div>
|
118 |
-
</
|
119 |
-
</div>
|
120 |
-
</div>
|
121 |
-
</section>
|
122 |
-
|
123 |
-
<section class="hero teaser">
|
124 |
-
<div class="container is-max-desktop">
|
125 |
-
<div class="hero-body">
|
126 |
-
<video id="teaser" autoplay muted loop playsinline height="100%">
|
127 |
-
<source src="./static/videos/teaser.mp4"
|
128 |
-
type="video/mp4">
|
129 |
-
</video>
|
130 |
-
<h2 class="subtitle has-text-centered">
|
131 |
-
<span class="dnerf">Nerfies</span> turns selfie videos from your phone into
|
132 |
-
free-viewpoint
|
133 |
-
portraits.
|
134 |
-
</h2>
|
135 |
-
</div>
|
136 |
-
</div>
|
137 |
-
</section>
|
138 |
-
|
139 |
-
|
140 |
-
<section class="hero is-light is-small">
|
141 |
-
<div class="hero-body">
|
142 |
-
<div class="container">
|
143 |
-
<div id="results-carousel" class="carousel results-carousel">
|
144 |
-
<div class="item item-steve">
|
145 |
-
<video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
|
146 |
-
<source src="./static/videos/steve.mp4"
|
147 |
-
type="video/mp4">
|
148 |
-
</video>
|
149 |
-
</div>
|
150 |
-
<div class="item item-chair-tp">
|
151 |
-
<video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
|
152 |
-
<source src="./static/videos/chair-tp.mp4"
|
153 |
-
type="video/mp4">
|
154 |
-
</video>
|
155 |
-
</div>
|
156 |
-
<div class="item item-shiba">
|
157 |
-
<video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
|
158 |
-
<source src="./static/videos/shiba.mp4"
|
159 |
-
type="video/mp4">
|
160 |
-
</video>
|
161 |
-
</div>
|
162 |
-
<div class="item item-fullbody">
|
163 |
-
<video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
|
164 |
-
<source src="./static/videos/fullbody.mp4"
|
165 |
-
type="video/mp4">
|
166 |
-
</video>
|
167 |
-
</div>
|
168 |
-
<div class="item item-blueshirt">
|
169 |
-
<video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
|
170 |
-
<source src="./static/videos/blueshirt.mp4"
|
171 |
-
type="video/mp4">
|
172 |
-
</video>
|
173 |
-
</div>
|
174 |
-
<div class="item item-mask">
|
175 |
-
<video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
|
176 |
-
<source src="./static/videos/mask.mp4"
|
177 |
-
type="video/mp4">
|
178 |
-
</video>
|
179 |
-
</div>
|
180 |
-
<div class="item item-coffee">
|
181 |
-
<video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
|
182 |
-
<source src="./static/videos/coffee.mp4"
|
183 |
-
type="video/mp4">
|
184 |
-
</video>
|
185 |
-
</div>
|
186 |
-
<div class="item item-toby">
|
187 |
-
<video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
|
188 |
-
<source src="./static/videos/toby2.mp4"
|
189 |
-
type="video/mp4">
|
190 |
-
</video>
|
191 |
-
</div>
|
192 |
-
</div>
|
193 |
-
</div>
|
194 |
-
</div>
|
195 |
-
</section>
|
196 |
-
|
197 |
-
|
198 |
-
<section class="section">
|
199 |
-
<div class="container is-max-desktop">
|
200 |
-
<!-- Abstract. -->
|
201 |
-
<div class="columns is-centered has-text-centered">
|
202 |
-
<div class="column is-four-fifths">
|
203 |
-
<h2 class="title is-3">Abstract</h2>
|
204 |
-
<div class="content has-text-justified">
|
205 |
-
<p>
|
206 |
-
We present the first method capable of photorealistically reconstructing a non-rigidly
|
207 |
-
deforming scene using photos/videos captured casually from mobile phones.
|
208 |
-
</p>
|
209 |
-
<p>
|
210 |
-
Our approach augments neural radiance fields
|
211 |
-
(NeRF) by optimizing an
|
212 |
-
additional continuous volumetric deformation field that warps each observed point into a
|
213 |
-
canonical 5D NeRF.
|
214 |
-
We observe that these NeRF-like deformation fields are prone to local minima, and
|
215 |
-
propose a coarse-to-fine optimization method for coordinate-based models that allows for
|
216 |
-
more robust optimization.
|
217 |
-
By adapting principles from geometry processing and physical simulation to NeRF-like
|
218 |
-
models, we propose an elastic regularization of the deformation field that further
|
219 |
-
improves robustness.
|
220 |
-
</p>
|
221 |
-
<p>
|
222 |
-
We show that <span class="dnerf">Nerfies</span> can turn casually captured selfie
|
223 |
-
photos/videos into deformable NeRF
|
224 |
-
models that allow for photorealistic renderings of the subject from arbitrary
|
225 |
-
viewpoints, which we dub <i>"nerfies"</i>. We evaluate our method by collecting data
|
226 |
-
using a
|
227 |
-
rig with two mobile phones that take time-synchronized photos, yielding train/validation
|
228 |
-
images of the same pose at different viewpoints. We show that our method faithfully
|
229 |
-
reconstructs non-rigidly deforming scenes and reproduces unseen views with high
|
230 |
-
fidelity.
|
231 |
-
</p>
|
232 |
-
</div>
|
233 |
-
</div>
|
234 |
-
</div>
|
235 |
-
<!--/ Abstract. -->
|
236 |
-
|
237 |
-
<!-- Paper video. -->
|
238 |
-
<div class="columns is-centered has-text-centered">
|
239 |
-
<div class="column is-four-fifths">
|
240 |
-
<h2 class="title is-3">Video</h2>
|
241 |
-
<div class="publication-video">
|
242 |
-
<iframe src="https://www.youtube.com/embed/MrKrnHhk8IA?rel=0&showinfo=0"
|
243 |
-
frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
|
244 |
-
</div>
|
245 |
-
</div>
|
246 |
-
</div>
|
247 |
-
<!--/ Paper video. -->
|
248 |
-
</div>
|
249 |
-
</section>
|
250 |
-
|
251 |
-
|
252 |
-
<section class="section">
|
253 |
-
<div class="container is-max-desktop">
|
254 |
-
|
255 |
-
<div class="columns is-centered">
|
256 |
|
257 |
-
|
258 |
-
<
|
259 |
-
|
260 |
-
|
261 |
-
<
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
<
|
267 |
-
|
268 |
-
|
269 |
-
</div>
|
270 |
-
</div>
|
271 |
-
<!--/ Visual Effects. -->
|
272 |
-
|
273 |
-
<!-- Matting. -->
|
274 |
-
<div class="column">
|
275 |
-
<h2 class="title is-3">Matting</h2>
|
276 |
-
<div class="columns is-centered">
|
277 |
-
<div class="column content">
|
278 |
<p>
|
279 |
-
|
280 |
-
samples that fall outside of a bounding box during rendering.
|
281 |
</p>
|
282 |
-
<video id="matting-video" controls playsinline height="100%">
|
283 |
-
<source src="./static/videos/matting.mp4"
|
284 |
-
type="video/mp4">
|
285 |
-
</video>
|
286 |
</div>
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
</p>
|
306 |
-
</div>
|
307 |
-
<div class="columns is-vcentered interpolation-panel">
|
308 |
-
<div class="column is-3 has-text-centered">
|
309 |
-
<img src="./static/images/interpolate_start.jpg"
|
310 |
-
class="interpolation-image"
|
311 |
-
alt="Interpolate start reference image."/>
|
312 |
-
<p>Start Frame</p>
|
313 |
</div>
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
</div>
|
318 |
-
<input class="slider is-fullwidth is-large is-info"
|
319 |
-
id="interpolation-slider"
|
320 |
-
step="1" min="0" max="100" value="0" type="range">
|
321 |
</div>
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
</div>
|
328 |
-
</
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
muted
|
344 |
-
preload
|
345 |
-
playsinline
|
346 |
-
width="75%">
|
347 |
-
<source src="./static/videos/replay.mp4"
|
348 |
-
type="video/mp4">
|
349 |
-
</video>
|
350 |
-
</div>
|
351 |
-
<!--/ Re-rendering. -->
|
352 |
-
|
353 |
-
</div>
|
354 |
-
</div>
|
355 |
-
<!--/ Animation. -->
|
356 |
-
|
357 |
-
|
358 |
-
<!-- Concurrent Work. -->
|
359 |
-
<div class="columns is-centered">
|
360 |
-
<div class="column is-full-width">
|
361 |
-
<h2 class="title is-3">Related Links</h2>
|
362 |
-
|
363 |
-
<div class="content has-text-justified">
|
364 |
-
<p>
|
365 |
-
There's a lot of excellent work that was introduced around the same time as ours.
|
366 |
-
</p>
|
367 |
-
<p>
|
368 |
-
<a href="https://arxiv.org/abs/2104.09125" target="_blank">Progressive Encoding for Neural Optimization</a> introduces an idea similar to our windowed position encoding for coarse-to-fine optimization.
|
369 |
-
</p>
|
370 |
-
<p>
|
371 |
-
<a href="https://www.albertpumarola.com/research/D-NeRF/index.html" target="_blank">D-NeRF</a> and <a href="https://gvv.mpi-inf.mpg.de/projects/nonrigid_nerf/" target="_blank">NR-NeRF</a>
|
372 |
-
both use deformation fields to model non-rigid scenes.
|
373 |
-
</p>
|
374 |
-
<p>
|
375 |
-
Some works model videos with a NeRF by directly modulating the density, such as <a href="https://video-nerf.github.io/" target="_blank">Video-NeRF</a>, <a href="https://www.cs.cornell.edu/~zl548/NSFF/" target="_blank">NSFF</a>, and <a href="https://neural-3d-video.github.io/" target="_blank">DyNeRF</a>
|
376 |
-
</p>
|
377 |
-
<p>
|
378 |
-
There are probably many more by the time you are reading this. Check out <a href="https://dellaert.github.io/NeRF/" target="_blank">Frank Dellart's survey on recent NeRF papers</a>, and <a href="https://github.com/yenchenlin/awesome-NeRF" target="_blank">Yen-Chen Lin's curated list of NeRF papers</a>.
|
379 |
-
</p>
|
380 |
-
</div>
|
381 |
-
</div>
|
382 |
-
</div>
|
383 |
-
<!--/ Concurrent Work. -->
|
384 |
-
|
385 |
-
</div>
|
386 |
-
</section>
|
387 |
-
|
388 |
-
|
389 |
-
<section class="section" id="BibTeX">
|
390 |
-
<div class="container is-max-desktop content">
|
391 |
-
<h2 class="title">BibTeX</h2>
|
392 |
-
<pre><code>@article{park2021nerfies,
|
393 |
-
author = {Park, Keunhong and Sinha, Utkarsh and Barron, Jonathan T. and Bouaziz, Sofien and Goldman, Dan B and Seitz, Steven M. and Martin-Brualla, Ricardo},
|
394 |
-
title = {Nerfies: Deformable Neural Radiance Fields},
|
395 |
-
journal = {ICCV},
|
396 |
-
year = {2021},
|
397 |
-
}</code></pre>
|
398 |
-
</div>
|
399 |
-
</section>
|
400 |
-
|
401 |
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
href="./static/videos/nerfies_paper.pdf">
|
407 |
-
<i class="fas fa-file-pdf"></i>
|
408 |
-
</a>
|
409 |
-
<a class="icon-link" href="https://github.com/keunhong" target="_blank" class="external-link" disabled>
|
410 |
-
<i class="fab fa-github"></i>
|
411 |
-
</a>
|
412 |
-
</div>
|
413 |
-
<div class="columns is-centered">
|
414 |
-
<div class="column is-8">
|
415 |
-
<div class="content">
|
416 |
-
<p>
|
417 |
-
This website is licensed under a <a rel="license" target="_blank"
|
418 |
-
href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
|
419 |
-
Commons Attribution-ShareAlike 4.0 International License</a>.
|
420 |
-
</p>
|
421 |
-
<p>
|
422 |
-
This means you are free to borrow the <a target="_blank"
|
423 |
-
href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website,
|
424 |
-
we just ask that you link back to this page in the footer.
|
425 |
-
Please remember to remove the analytics code included in the header of the website which
|
426 |
-
you do not want on your website.
|
427 |
-
</p>
|
428 |
</div>
|
429 |
-
</
|
430 |
</div>
|
431 |
-
|
432 |
-
|
433 |
|
434 |
-
|
435 |
-
</html>
|
|
|
1 |
+
import React from 'react';
|
2 |
+
|
3 |
+
const TechReport = () => {
|
4 |
+
return (
|
5 |
+
<div className="min-h-screen bg-white">
|
6 |
+
{/* Header/Hero Section */}
|
7 |
+
<section className="py-16 bg-gray-50">
|
8 |
+
<div className="container mx-auto px-4 max-w-4xl">
|
9 |
+
<h1 className="text-4xl font-bold text-center mb-8">
|
10 |
+
Atla Selene Mini:<br/>A General Purpose Evaluation Model
|
11 |
+
</h1>
|
12 |
+
|
13 |
+
{/* Authors */}
|
14 |
+
<div className="text-center mb-8">
|
15 |
+
<p className="mb-4">
|
16 |
+
<span>Andrei Alexandru<sup>1</sup></span> •
|
17 |
+
<span> Antonia Calvi<sup>1</sup></span> •
|
18 |
+
<span> Henry Broomfield<sup>1</sup></span> •
|
19 |
+
<span> Jackson Golden<sup>1</sup></span> •
|
20 |
+
<span> Kyle Dai<sup>1</sup></span>
|
21 |
+
</p>
|
22 |
+
<p className="mb-4">
|
23 |
+
<span className="font-semibold">Mathias Leys<sup>1</sup></span> •
|
24 |
+
<span className="font-semibold"> Maurice Burger<sup>1</sup></span> •
|
25 |
+
<span className="font-semibold"> Max Bartolo<sup>2,3</sup></span> •
|
26 |
+
<span className="font-semibold"> Roman Engeler<sup>1</sup></span>
|
27 |
+
</p>
|
28 |
+
<p className="mb-4">
|
29 |
+
<span className="font-semibold">Sashank Pisupati<sup>1</sup></span> •
|
30 |
+
<span className="font-semibold"> Toby Drane<sup>1</sup></span> •
|
31 |
+
<span className="font-semibold"> Young Sun Park<sup>1</sup></span>
|
32 |
+
</p>
|
33 |
+
|
34 |
+
<p className="text-sm">
|
35 |
+
<span><sup>1</sup>atla</span> •
|
36 |
+
<span> <sup>2</sup>University College London</span> •
|
37 |
+
<span> <sup>3</sup>Cohere</span>
|
38 |
+
</p>
|
39 |
+
|
40 |
+
<a href="https://atla-ai.com" className="text-blue-600 hover:underline">
|
41 |
+
atla-ai.com
|
42 |
+
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
</div>
|
44 |
|
45 |
+
{/* Links */}
|
46 |
+
<div className="flex justify-center gap-4">
|
47 |
+
<a href="https://hf.co/AtlaAI/Selene-1-Mini-Llama-3.1-8B"
|
48 |
+
className="px-4 py-2 bg-gray-900 text-white rounded-full hover:bg-gray-800 transition">
|
49 |
+
HuggingFace
|
50 |
+
</a>
|
51 |
+
<a href="https://ollama.com/atla/selene-mini"
|
52 |
+
className="px-4 py-2 bg-gray-900 text-white rounded-full hover:bg-gray-800 transition">
|
53 |
+
Ollama
|
54 |
+
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
</div>
|
56 |
</div>
|
57 |
+
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
{/* Main Content */}
|
60 |
+
<main className="container mx-auto px-4 max-w-4xl py-12">
|
61 |
+
{/* Abstract */}
|
62 |
+
<section className="mb-16">
|
63 |
+
<h2 className="text-2xl font-bold mb-4">Abstract</h2>
|
64 |
+
<div className="prose max-w-none">
|
65 |
+
<p className="mb-4">
|
66 |
+
We introduce Atla Selene Mini, a state-of-the-art small language model-as-a-judge (SLMJ). Selene Mini is a general-purpose evaluator that outperforms the best SLMJs and GPT-4o-mini on overall performance across 11 out-of-distribution benchmarks, spanning absolute scoring, classification, and pairwise preference tasks. It is the highest-scoring 8B generative model on RewardBench, surpassing strong baselines like GPT-4o and specialized judges.
|
67 |
+
</p>
|
68 |
+
<p className="mb-4">
|
69 |
+
To achieve this, we develop a principled data curation strategy that augments public datasets with synthetically generated critiques and ensures high quality through filtering and dataset ablations. We train our model on a combined direct preference optimization (DPO) and supervised fine-tuning (SFT) loss, and produce a highly promptable evaluator that excels in real-world scenarios.
|
70 |
+
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
<p>
|
72 |
+
Selene Mini shows dramatically improved zero-shot agreement with human expert evaluations on financial and medical industry datasets. It is also robust to variations in prompt format. Preliminary results indicate that Selene Mini is the top-ranking evaluator in a live, community-driven Judge Arena. We release the model weights on HuggingFace and Ollama to encourage widespread community adoption.
|
|
|
73 |
</p>
|
|
|
|
|
|
|
|
|
74 |
</div>
|
75 |
+
</section>
|
76 |
+
|
77 |
+
{/* Introduction */}
|
78 |
+
<section className="mb-16">
|
79 |
+
<h2 className="text-2xl font-bold mb-4">Introduction</h2>
|
80 |
+
<div className="prose max-w-none">
|
81 |
+
<p className="mb-4">
|
82 |
+
Automated evaluation of large language models (LLMs) is an increasingly pertinent task as LLMs demonstrate their value across a growing array of real-world use cases. Reliable evaluation is critical to ensure that LLMs are aligned with human objectives, i.e. that these models do what they are intended to do.
|
83 |
+
</p>
|
84 |
+
<p className="mb-4">
|
85 |
+
Human evaluation is time-consuming and expensive, and scales poorly with volume and complexity – hence the need for scalable, automated techniques. As generative models have become more capable, the field has addressed this need by using LLMs themselves to evaluate other LLMs' responses, producing judgments and natural language critiques without humans in the loop – an approach also known as "LLM-as-a-judge" (LLMJ).
|
86 |
+
</p>
|
87 |
+
<div className="my-8">
|
88 |
+
<img src="/api/placeholder/800/400" alt="Figure 1: Performance comparison" className="w-full rounded-lg shadow-lg"/>
|
89 |
+
<p className="text-sm text-gray-600 mt-2">
|
90 |
+
Figure 1: Atla Selene Mini outperforms current state-of-the-art SLMJs: a) Overall task-average performance, comparing Atla Selene Mini (black) with the best and most widely used SLMJs. b) Breakdown of performance by task type and benchmark.
|
91 |
+
</p>
|
92 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
</div>
|
94 |
+
</section>
|
95 |
+
|
96 |
+
{/* Methods */}
|
97 |
+
<section className="mb-16">
|
98 |
+
<h2 className="text-2xl font-bold mb-4">Methods</h2>
|
99 |
+
<div className="prose max-w-none">
|
100 |
+
<p className="mb-4">
|
101 |
+
Selene Mini is optimized for fast inference, high performance, and promptability. It is a general-purpose evaluator, and is trained to respond with both critiques and judgments in order to deliver actionable insights.
|
102 |
+
</p>
|
103 |
+
<div className="my-8">
|
104 |
+
<img src="/api/placeholder/800/400" alt="Figure 2: Data curation strategy" className="w-full rounded-lg shadow-lg"/>
|
105 |
+
<p className="text-sm text-gray-600 mt-2">
|
106 |
+
Figure 2: Data curation strategy: The process of transforming a candidate dataset (left) into the final training mix (right). Yellow boxes indicate filtering steps, purple represents synthetic generation of chosen and rejected pairs for preference optimization.
|
107 |
+
</p>
|
108 |
</div>
|
|
|
|
|
|
|
109 |
</div>
|
110 |
+
</section>
|
111 |
+
|
112 |
+
{/* Results */}
|
113 |
+
<section className="mb-16">
|
114 |
+
<h2 className="text-2xl font-bold mb-4">Results</h2>
|
115 |
+
<div className="prose max-w-none">
|
116 |
+
<h3 className="text-xl font-semibold mb-3">Benchmark Performance</h3>
|
117 |
+
<p className="mb-4">
|
118 |
+
We assess the performance of Selene Mini on 11 out-of-distribution benchmarks, spanning three different types of evaluation tasks: absolute scoring, classification, and pairwise preference.
|
119 |
+
</p>
|
120 |
+
<div className="my-8">
|
121 |
+
<img src="/api/placeholder/800/400" alt="Figure 3: Real-world evaluation" className="w-full rounded-lg shadow-lg"/>
|
122 |
+
<p className="text-sm text-gray-600 mt-2">
|
123 |
+
Figure 3: Real-world evaluation: a) Performance on domain-specific industry benchmarks b) Performance on RewardBench with different prompt formats c) Performance measured by ELO scores in Judge Arena.
|
124 |
+
</p>
|
125 |
+
</div>
|
126 |
</div>
|
127 |
+
</section>
|
128 |
+
|
129 |
+
{/* Discussion */}
|
130 |
+
<section className="mb-16">
|
131 |
+
<h2 className="text-2xl font-bold mb-4">Discussion</h2>
|
132 |
+
<div className="prose max-w-none">
|
133 |
+
<p className="mb-4">
|
134 |
+
In this work, we introduce Atla Selene Mini, demonstrating that effective general-purpose evaluation can be achieved in smaller model architectures through principled data curation and a hybrid training objective (DPO + SFT).
|
135 |
+
</p>
|
136 |
+
<p className="mb-4">
|
137 |
+
Looking ahead, we anticipate two emerging frontiers that will shape the future of AI evaluation. First is the rise of agent-based systems that combine language models with external tools and APIs, creating more powerful and versatile AI systems. Second is the increasing use of inference-time compute – systems that perform additional reasoning steps during inference to generate higher-quality outputs.
|
138 |
+
</p>
|
139 |
+
</div>
|
140 |
+
</section>
|
141 |
+
</main>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
{/* Footer */}
|
144 |
+
<footer className="bg-gray-50 py-8">
|
145 |
+
<div className="container mx-auto px-4 max-w-4xl text-center text-sm text-gray-600">
|
146 |
+
<p>© 2025 Atla AI</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
</div>
|
148 |
+
</footer>
|
149 |
</div>
|
150 |
+
);
|
151 |
+
};
|
152 |
|
153 |
+
export default TechReport;
|
|