Niklas Hoepner
commited on
Commit
·
f4e6d54
1
Parent(s):
e130a6a
Implemened L3Score from SPIQA datase paper
Browse files- L3Score.py +1 -3
- README.md +1 -1
L3Score.py
CHANGED
@@ -120,8 +120,6 @@ class L3Score(evaluate.Metric):
|
|
120 |
)
|
121 |
)
|
122 |
|
123 |
-
if api_key == "":
|
124 |
-
raise ValueError("api_key is required")
|
125 |
|
126 |
def _get_llm(self, model, api_key):
|
127 |
"""Get the LLM"""
|
@@ -134,7 +132,7 @@ class L3Score(evaluate.Metric):
|
|
134 |
questions,
|
135 |
predictions,
|
136 |
references,
|
137 |
-
api_key
|
138 |
provider="openai",
|
139 |
model="gpt-4o-mini",
|
140 |
):
|
|
|
120 |
)
|
121 |
)
|
122 |
|
|
|
|
|
123 |
|
124 |
def _get_llm(self, model, api_key):
|
125 |
"""Get the LLM"""
|
|
|
132 |
questions,
|
133 |
predictions,
|
134 |
references,
|
135 |
+
api_key,
|
136 |
provider="openai",
|
137 |
model="gpt-4o-mini",
|
138 |
):
|
README.md
CHANGED
@@ -13,7 +13,7 @@ description: >
|
|
13 |
It uses log-probabilities of "Yes"/"No" tokens from a language model acting as a judge.
|
14 |
Based on the SPIQA benchmark: https://arxiv.org/pdf/2407.09413
|
15 |
sdk: gradio
|
16 |
-
sdk_version:
|
17 |
app_file: app.py
|
18 |
pinned: false
|
19 |
---
|
|
|
13 |
It uses log-probabilities of "Yes"/"No" tokens from a language model acting as a judge.
|
14 |
Based on the SPIQA benchmark: https://arxiv.org/pdf/2407.09413
|
15 |
sdk: gradio
|
16 |
+
sdk_version: 4.44.1
|
17 |
app_file: app.py
|
18 |
pinned: false
|
19 |
---
|