C3PO / app.py
Moritz Stephan
added multi feedback support
da952a9
raw
history blame
3.03 kB
import os
import requests
import threading
from typing import Optional, List, Tuple
import gradio as gr
ENDPOINT_URL = "https://austrian-code-wizard--metarlaif-web.modal.run"
def get_feedback_options() -> List[Tuple[str, str]]:
args = {
"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
}
response = requests.post(f"{ENDPOINT_URL}/list_adapters", json=args)
data = response.json()["adapters"]
return [
(adapter["feedback_name"], adapter["feedback_id"])
for adapter in data]
def get_completion(_, prompt: str, adapters: Optional[list[str]], method: str) -> str:
args = {
"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
"prompt": prompt,
"adapters": adapters if method != "baseline" else None,
"method": method if method != "baseline" else None,
}
response = requests.post(f"{ENDPOINT_URL}/completion", json=args)
data = response.json()
return data["response"]
def warmup(*args):
args = {
"C3PO_API_KEY": os.environ.get("C3PO_API_KEY"),
}
# Warmup the server but don't wait for the response
threading.Thread(target=requests.post, args=(f"{ENDPOINT_URL}/warmup"), kwargs={"json": args}, daemon=True).start()
dropdown_options = get_feedback_options()
demo = gr.Interface(
get_completion,
[
gr.Markdown(
"""
# C3PO Demo
This is a demo of Contextualized Critiques with Constrained Preference Optimization (C3PO). See the project website [here](https://austrian-code-wizard.github.io/c3po-website/), repo [here](https://github.com/austrian-code-wizard/c3po), and the paper [here](<insert link>).
Selecting a feedback in the dropdown and enabling the "Use Feedback Adapter" checkbox will add the respective adapter to the model. The model will then use the feedback to generate the completion.
### Warning
The model is not hosted on Huggingface but on a 3rd party service. If this HF space has not been used recently, the model container might need to spin up if it's not currently running. This might take up to a minute on the first request.
"""
),
gr.Textbox(
placeholder="Enter a prompt...", label="Prompt"
),
gr.Dropdown(
choices=dropdown_options, label="Feedback", info="Will add the adapter for the respective feedback to the model.",
value=dropdown_options[0][1],
multiselect=True,
max_choices=3
),
gr.Radio(
choices=[
("C3PO", "c3po"),
("DPO", "dpo_after_sft"),
("SCD + Negatives", "sft_negatives"),
("SCD", "sft"),
("Baseline", "baseline")
],
value="c3po",
label="Select which method to use. 'Baseline' is the Mistal-instruct-v0.2 model without any adapter.",
)
],
"text",
concurrency_limit=8
)
if __name__ == "__main__":
demo.queue(max_size=32)
demo.launch()