Spaces:

hackergeek98
/

ttherapist

Sleeping

File size: 3,338 Bytes

beb48b8
 
455f8af
 
 
 
beb48b8
455f8af
 
 
 
beb48b8
455f8af
 
beb48b8
455f8af
 
beb48b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455f8af
beb48b8
 
 
 
 
 
 
 
 
 
 
455f8af
 
42a13ed
 
 
 
 
 
455f8af
42a13ed
 
455f8af
42a13ed
 
455f8af
42a13ed
 
 
 
 
 
 
455f8af
 
 
 
 
 
 
 
 
42a13ed
455f8af
 
 
 
 
 
 
 
 
 
 
 
42a13ed
 
beb48b8
455f8af

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoModelForCausalLM, AutoTokenizer

# Initialize the Zephyr-7B client
zephyr_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Load your fine-tuned GPT-2 model from Hugging Face
MODEL_NAME = "hackergeek98/therapist01"  # Replace with your model name
gpt2_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
gpt2_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

# Initialize conversation history for GPT-2
conversation_history = ""

# Function to generate responses using Zephyr-7B
def respond_with_zephyr(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in zephyr_client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

# Function to generate responses using GPT-2
def respond_with_gpt2(user_input):
    global conversation_history

    # Update conversation history with user input
    conversation_history += f"User: {user_input}\n"
    
    # Tokenize the conversation history
    inputs = gpt2_tokenizer(conversation_history, return_tensors="pt", truncation=True, max_length=1024)

    # Generate a response from the model
    outputs = gpt2_model.generate(inputs['input_ids'], max_length=1024, num_return_sequences=1, no_repeat_ngram_size=2)

    # Decode the model's output
    response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Update conversation history with the model's response
    conversation_history += f"Therapist: {response}\n"

    # Return the therapist's response
    return response

# Function to handle the model selection and response generation
def respond(message, history, model_choice, system_message, max_tokens, temperature, top_p):
    if model_choice == "Zephyr-7B":
        return respond_with_zephyr(message, history, system_message, max_tokens, temperature, top_p)
    elif model_choice == "GPT-2 Therapist":
        return respond_with_gpt2(message)
    else:
        return "Invalid model selection."

# Create Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Dropdown(choices=["Zephyr-7B", "GPT-2 Therapist"], label="Model", value="Zephyr-7B"),
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    title="Multi-Model Chat Interface",
    description="Choose between Zephyr-7B and a fine-tuned GPT-2 model to chat with."
)

# Launch the app
if __name__ == "__main__":
    demo.launch()