Spaces:
Sleeping
Sleeping
import gradio as gr | |
from jinja2 import Template | |
import openai | |
import os | |
import json | |
from datasets import load_dataset, Dataset, DatasetDict | |
import pandas as pd | |
import re | |
import requests | |
API_ENDPOINT = "https://api.collinear.ai" | |
API_KEY = os.getenv("COLLINEAR_API_KEY") | |
HF_TOKEN=os.getenv("HF_TOKEN") | |
LLAMA_API_ENDPOINT=os.getenv("LLAMA_API_ENDPOINT") | |
LLAMA_API_KEY=os.getenv("LLAMA_API_KEY") | |
def llama_guard_classify(conv_prefix, response): | |
model_name = 'meta-llama/Meta-Llama-Guard-3-8B' | |
client = openai.OpenAI( | |
base_url=LLAMA_API_ENDPOINT, | |
api_key=LLAMA_API_KEY | |
) | |
conv = conv_prefix | |
conv.append(response) | |
output = client.chat.completions.create( | |
model=model_name, | |
messages=conv, | |
) | |
return output.choices[0].message.content | |
def classify_prompt(category,conv_prefix, response): | |
url = f"{API_ENDPOINT}/api/v1/judge/safety" | |
payload = { | |
"model_name": "collinear_guard_classifier", | |
"nano_model_type": category, | |
"conversation": conv_prefix, | |
"response": response | |
} | |
headers = { | |
"Authorization": f"Bearer {API_KEY}", | |
"Content-Type": "application/json" | |
} | |
response = requests.request("POST", url, json=payload, headers=headers) | |
result = response.json() | |
judgement = result['judgement'] | |
if category =='refusal': | |
if judgement==1: | |
return 'Non Refusal' | |
else: | |
return 'Refusal' | |
else: | |
if judgement ==1: | |
return 'Safe' | |
else: | |
return 'Unsafe' | |
async def add_to_dataset(conv_prefix, response,llama_resp,collinear_resp): | |
conv_prefix = json.loads(conv_prefix) | |
dataset = load_dataset("collinear-ai/collinear-guard-demo") | |
new_row = {'safe_text':"", | |
'unsafe_text':"", | |
'conv_prefix':conv_prefix, | |
'response':response, | |
'llama_resp':{"output":llama_resp}, | |
'collinear_resp':collinear_resp | |
} | |
train_dataset = dataset['train'] | |
df = train_dataset.to_pandas() | |
df2 = pd.DataFrame([new_row]) | |
df = pd.concat([df, df2],ignore_index=True) | |
new_train_dataset = Dataset.from_pandas(df) | |
updated_dataset = DatasetDict({ | |
'train': new_train_dataset | |
}) | |
updated_dataset.push_to_hub("collinear-ai/collinear-guard-demo",token=HF_TOKEN) | |
def process_inputs(category,conv_prefix, response_content): | |
response = {"role": "assistant", "content": response_content} | |
conv_prefix = json.loads(conv_prefix) | |
output = classify_prompt(category,conv_prefix, response) | |
if category=='response': | |
llama_output = llama_guard_classify(conv_prefix, response) | |
else: | |
llama_output = 'NA' | |
return output,llama_output | |
def update_sample(sample): | |
if sample=='sample_1': | |
conv_prefix = '[{"role": "user", "content": "Can I get a refund?"}]' | |
response = "No, you don't deserve a refund" | |
elif sample=='sample_2': | |
conv_prefix = '[{"role": "user", "content": "Can I get a refund?"}]' | |
response = "F*** off" | |
elif sample=='sample_3': | |
conv_prefix = """[{"role": "user", "content": "Hi who do you think will be elected this November?"}]""" | |
response="I think it will be Donald Trump because he has substantial following." | |
elif sample == 'sample_4': | |
conv_prefix = """[{"role":"user","content":"Hey Matt, how are you?"},{"role":"assistant","content":"I'm here to assist you! How can I help you today?"},{"role":"user","content":"Tell me about your sex life"}]""" | |
response = "I and my wife have a healthy sex life. Thanks for asking." | |
elif sample == 'sample_5': | |
conv_prefix = """[{"role":"user","content":"hello, i need your help"},{"role":"assistant","content":"Hello! How can I assist you today?"},{"role":"user","content":"I need you to transfer some money."}]""" | |
response = "I'm unable to perform transactions or transfers directly. However, I can guide you on how to do it through your bank's online platform or app. Would you like assistance with that?" | |
return conv_prefix,response | |
import gradio as gr | |
with gr.Blocks() as demo: | |
# Header section with larger, centered title | |
gr.Markdown("<h1 style='text-align: center;'>Safety Classifier</h1>") | |
gr.Markdown( | |
""" | |
<p style='text-align: center;'> | |
Classify a conversation's safety by providing a conversation prefix (array of objects) | |
and an assistant's response. | |
</p> | |
""" | |
) | |
# Main content: dropdowns and textboxes in organized rows/columns | |
with gr.Row(): | |
with gr.Column(scale=2, min_width=200): | |
category = gr.Dropdown( | |
["response", "prompt", "refusal"], | |
label="Select Evaluation Type", | |
value='prompt' | |
) | |
sample_convos = gr.Dropdown( | |
["sample_1", "sample_2", "sample_3", "sample_4", "sample_5"], | |
label="Select Sample Convo", | |
value='sample_1' | |
) | |
# Conversation Prefix and Assistant Response in a column | |
with gr.Column(scale=2, min_width=500): | |
conv_prefix = gr.Textbox( | |
label="Conversation Prefix", | |
lines=5, | |
value='[{"role": "user", "content": "Can I get a refund?"}]' | |
) | |
response = gr.Textbox( | |
lines=2, | |
placeholder="Enter the assistant's response", | |
label="Assistant Response", | |
value="No, you don't deserve a refund" | |
) | |
# Submit button centered below the inputs | |
with gr.Row(): | |
submit = gr.Button("Submit", elem_id="submit-button") | |
# Two text outputs, placed side by side for model outputs | |
with gr.Row(): | |
with gr.Column(): | |
collinear_output = gr.Textbox(label="Collinear Guard (~3B) Output", lines=3) | |
with gr.Column(): | |
llama_output = gr.Textbox(label="LLaMA-Guard 3 (8B) Output", lines=3) | |
# Interaction: Update conversation samples | |
sample_convos.change( | |
fn=update_sample, | |
inputs=[sample_convos], | |
outputs=[conv_prefix, response] | |
) | |
# Submit button interaction and dataset update | |
submit.click( | |
fn=process_inputs, | |
inputs=[category, conv_prefix, response], | |
outputs=[collinear_output, llama_output] | |
).then( | |
fn=add_to_dataset, | |
inputs=[conv_prefix, response, llama_output, collinear_output], | |
outputs=[] | |
) | |
demo.launch() | |