Spaces:
Sleeping
Sleeping
Commit
·
524cf7c
1
Parent(s):
0fc916a
Refusal Changed
Browse files
app.py
CHANGED
@@ -115,7 +115,11 @@ Now, please output the following as a JSON object:
|
|
115 |
"""
|
116 |
|
117 |
template = Template(template_str)
|
118 |
-
|
|
|
|
|
|
|
|
|
119 |
client = openai.OpenAI(
|
120 |
base_url=API_ENDPOINT,
|
121 |
api_key=API_KEY
|
@@ -133,9 +137,9 @@ Now, please output the following as a JSON object:
|
|
133 |
output_value = re.search(r'"output"\s*:\s*"(\d+)"', output)
|
134 |
val = output_value.group(1) if output_value else None
|
135 |
if int(val)==1:
|
136 |
-
return 'safe'
|
137 |
else:
|
138 |
-
return 'unsafe'
|
139 |
|
140 |
async def add_to_dataset(safe_text,unsafe_text,conv_prefix, response,llama_resp,collinear_resp):
|
141 |
conv_prefix = json.loads(conv_prefix)
|
@@ -166,7 +170,10 @@ def process_inputs(category,safe_text,unsafe_text,conv_prefix, response_content)
|
|
166 |
response = {"role": "assistant", "content": response_content}
|
167 |
conv_prefix = json.loads(conv_prefix)
|
168 |
output = classify_prompt(category,safe_text,unsafe_text,conv_prefix, response)
|
169 |
-
|
|
|
|
|
|
|
170 |
return output,llama_output
|
171 |
|
172 |
|
|
|
115 |
"""
|
116 |
|
117 |
template = Template(template_str)
|
118 |
+
if category=='prompt' or category=='response':
|
119 |
+
scoring_criteria = [{"score":0,"description":unsafe_text},{"score":1,"description":safe_text}]
|
120 |
+
else:
|
121 |
+
scoring_criteria = [{"score":1,"description":unsafe_text},{"score":0,"description":safe_text}]
|
122 |
+
rendered_message = template.render(conversation_prefix=conv_prefix, response=response,scoring_criteria=scoring_criteria)
|
123 |
client = openai.OpenAI(
|
124 |
base_url=API_ENDPOINT,
|
125 |
api_key=API_KEY
|
|
|
137 |
output_value = re.search(r'"output"\s*:\s*"(\d+)"', output)
|
138 |
val = output_value.group(1) if output_value else None
|
139 |
if int(val)==1:
|
140 |
+
return 'refusal' if category=='refusal' else 'safe'
|
141 |
else:
|
142 |
+
return 'non refusal' if category=='refusal' else 'unsafe'
|
143 |
|
144 |
async def add_to_dataset(safe_text,unsafe_text,conv_prefix, response,llama_resp,collinear_resp):
|
145 |
conv_prefix = json.loads(conv_prefix)
|
|
|
170 |
response = {"role": "assistant", "content": response_content}
|
171 |
conv_prefix = json.loads(conv_prefix)
|
172 |
output = classify_prompt(category,safe_text,unsafe_text,conv_prefix, response)
|
173 |
+
if category=='response':
|
174 |
+
llama_output = llama_guard_classify(conv_prefix, response)
|
175 |
+
else:
|
176 |
+
llama_output = 'NA'
|
177 |
return output,llama_output
|
178 |
|
179 |
|