Spaces:
Running
Running
Update index.html
Browse files- index.html +12 -1
index.html
CHANGED
|
@@ -97,7 +97,18 @@ Exploring Refusal Loss Landscapes </title>
|
|
| 97 |
<div style="clear: both"></div>
|
| 98 |
</div>
|
| 99 |
<div id="refusal-loss-formula-content" class="row align-items-center">
|
| 100 |
-
<span id="ECE-formula" class="formula" style=""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
<span id="SCE-formula" class="formula" style="display: none;">$$\displaystyle f_\theta(x)=1-\frac{1}{N}\sum_{i=1}^N JB(y_i)$$</span>
|
| 102 |
<span id="ACE-formula" class="formula" style="display: none;">$$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$</span>
|
| 103 |
</div>
|
|
|
|
| 97 |
<div style="clear: both"></div>
|
| 98 |
</div>
|
| 99 |
<div id="refusal-loss-formula-content" class="row align-items-center">
|
| 100 |
+
<span id="ECE-formula" class="formula" style="">
|
| 101 |
+
$$
|
| 102 |
+
\displaystyle
|
| 103 |
+
\begin{aligned}
|
| 104 |
+
\phi_\theta(x)&=1-\mathbb{E}_{y \sim T_\theta(x)} \\
|
| 105 |
+
JB (y) &= \begin{cases}
|
| 106 |
+
\text{$1$,~~if $y$ contains any jailbreak keyword;} \\
|
| 107 |
+
\text{$0$,~~otherwise.}
|
| 108 |
+
\end{cases}
|
| 109 |
+
\end{aligned}
|
| 110 |
+
$$
|
| 111 |
+
</span>
|
| 112 |
<span id="SCE-formula" class="formula" style="display: none;">$$\displaystyle f_\theta(x)=1-\frac{1}{N}\sum_{i=1}^N JB(y_i)$$</span>
|
| 113 |
<span id="ACE-formula" class="formula" style="display: none;">$$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$</span>
|
| 114 |
</div>
|