Spaces:

francismurray
/

llm-compare

Running

File size: 5,462 Bytes

aac64a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6ed54a
 
 
 
aac64a6

import os
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

if not HF_TOKEN:
    raise ValueError("Please set HF_TOKEN environment variable")

# Available models
AVAILABLE_MODELS = [
    "HuggingFaceH4/zephyr-7b-beta",
    "NousResearch/Hermes-3-Llama-3.1-8B",
    "mistralai/Mistral-Nemo-Base-2407",
    "meta-llama/Llama-2-70b-hf",
    "aaditya/Llama3-OpenBioLLM-8B",
]

# Initialize inference client
inference_client = InferenceClient(token=HF_TOKEN)

def get_model_response(prompt, model_name, temperature_value, do_sample):
    """Get response from a Hugging Face model."""
    try:
        # Build kwargs dynamically
        generation_args = {
            "prompt": prompt,
            "model": model_name,
            "max_new_tokens": 100,
            "do_sample": do_sample,
            "return_full_text": False
        }

        # Only include temperature if sampling is enabled
        if do_sample and temperature_value > 0:
            generation_args["temperature"] = temperature_value

        response = inference_client.text_generation(**generation_args)
        return response

    except Exception as e:
        return f"Error: {str(e)}"

def compare_models(prompt, model1, model2, temp1, temp2, do_sample1, do_sample2):
    """Compare outputs from two selected models."""
    if not prompt.strip():
        return (
            [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
            [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
            gr.update(interactive=True)
        )
    
    response1 = get_model_response(prompt, model1, temp1, do_sample1)
    response2 = get_model_response(prompt, model2, temp2, do_sample2)
    
    # Format responses for chatbot display
    chat1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}]
    chat2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}]

    
    return chat1, chat2, gr.update(interactive=True)


# Update temperature slider interactivity based on sampling checkbox
def update_slider_state(enabled):
    return [
        gr.update(interactive=enabled),
        gr.update(
            elem_classes=[] if enabled else ["disabled-slider"],
            value=0 if not enabled else None
        )
    ]

# Create the Gradio interface
with gr.Blocks(css="""
    .disabled-slider { opacity: 0.5; pointer-events: none; }
""") as demo:
    gr.Markdown("# LLM Comparison Tool")
    gr.Markdown("Compare outputs from different Hugging Face models side by side.")
    
    with gr.Row():
        prompt = gr.Textbox(
            label="Enter your prompt",
            placeholder="Type your prompt here...",
            lines=3
        )
    
    with gr.Row():
        submit_btn = gr.Button("Generate Responses")
        
    with gr.Row():
        with gr.Column():
            model1_dropdown = gr.Dropdown(
                choices=AVAILABLE_MODELS,
                value=AVAILABLE_MODELS[0],
                label="Select Model 1"
            )
            do_sample1 = gr.Checkbox(
                label="Enable sampling (random outputs)",
                value=False
            )
            temp1 = gr.Slider(
                label="Temperature (Higher = more creative, lower = more predictable)",
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.0,
                interactive=False,
                elem_classes=["disabled-slider"]
            )
            chatbot1 = gr.Chatbot(
                label="Model 1 Output",
                show_label=True,
                height=300,
                type="messages"
            )

            
        with gr.Column():
            model2_dropdown = gr.Dropdown(
                choices=AVAILABLE_MODELS,
                value=AVAILABLE_MODELS[1],
                label="Select Model 2"
            )
            do_sample2 = gr.Checkbox(
                label="Enable sampling (random outputs)",
                value=False
            )
            temp2 = gr.Slider(
                label="Temperature (Higher = more creative, lower = more predictable)",
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.0,
                interactive=False,
                elem_classes=["disabled-slider"]
            )
            chatbot2 = gr.Chatbot(
                label="Model 2 Output",
                show_label=True,
                height=300,
                type="messages"
            )

    def start_loading():
        return gr.update(interactive=False)

    # Handle form submission
    submit_btn.click(
        fn=start_loading,
        inputs=None,
        outputs=submit_btn,
        queue=False
    ).then(
        fn=compare_models,
        inputs=[prompt, model1_dropdown, model2_dropdown, temp1, temp2, do_sample1, do_sample2],
        outputs=[chatbot1, chatbot2, submit_btn]
    )



    do_sample1.change(
        fn=update_slider_state,
        inputs=[do_sample1],
        outputs=[temp1, temp1]
    )

    do_sample2.change(
        fn=update_slider_state,
        inputs=[do_sample2],
        outputs=[temp2, temp2]
    )

if __name__ == "__main__":
    demo.launch()     
    # demo.launch(share=True)