import gradio as gr
import random
import time
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load Vicuna 7B model and tokenizer
model_name = "lmsys/vicuna-7b-v1.3"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

with gr.Blocks() as demo:
    gr.Markdown("# LLM Evaluator With Linguistic Scrutiny")

    with gr.Tab("POS"):
        with gr.Row():
            vicuna_chatbot = gr.Chatbot(label="vicuna-7b", live=True)
            llama_chatbot = gr.Chatbot(label="llama-7b", live=False)
            gpt_chatbot = gr.Chatbot(label="gpt-3.5", live=False)
        with gr.Row():
            prompt = gr.Textbox(show_label=False, placeholder="Enter prompt")
            send_button_POS = gr.Button("Send", scale=0)
        clear = gr.ClearButton([prompt, vicuna_chatbot])
    with gr.Tab("Chunk"):
        with gr.Row():
            vicuna_chatbot_chunk = gr.Chatbot(label="vicuna-7b", live=True)
            llama_chatbot_chunk = gr.Chatbot(label="llama-7b", live=False)
            gpt_chatbot_chunk = gr.Chatbot(label="gpt-3.5", live=False)
        with gr.Row():
            prompt_chunk = gr.Textbox(show_label=False, placeholder="Enter prompt")
            send_button_Chunk = gr.Button("Send", scale=0)
        clear = gr.ClearButton([prompt_chunk, vicuna_chatbot_chunk])

    def respond(message, chat_history, chatbot):
        input_ids = tokenizer.encode(message, return_tensors="pt")
        output = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2)
        bot_message = tokenizer.decode(output[0], skip_special_tokens=True)
        chat_history.append((message, bot_message))
        time.sleep(2)
        return "", chat_history

    prompt.submit(respond, [prompt, vicuna_chatbot, vicuna_chatbot_chunk])

demo.launch()