import gradio as gr import random import time from transformers import AutoModelForCausalLM, AutoTokenizer # Load Vicuna 7B model and tokenizer model_name = "lmsys/vicuna-7b-v1.3" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) with gr.Blocks() as demo: gr.Markdown("# LLM Evaluator With Linguistic Scrutiny") with gr.Tab("POS"): with gr.Row(): vicuna_chatbot = gr.Chatbot(label="vicuna-7b", live=True) llama_chatbot = gr.Chatbot(label="llama-7b", live=False) gpt_chatbot = gr.Chatbot(label="gpt-3.5", live=False) with gr.Row(): prompt = gr.Textbox(show_label=False, placeholder="Enter prompt") send_button_POS = gr.Button("Send", scale=0) clear = gr.ClearButton([prompt, vicuna_chatbot]) with gr.Tab("Chunk"): with gr.Row(): vicuna_chatbot_chunk = gr.Chatbot(label="vicuna-7b", live=True) llama_chatbot_chunk = gr.Chatbot(label="llama-7b", live=False) gpt_chatbot_chunk = gr.Chatbot(label="gpt-3.5", live=False) with gr.Row(): prompt_chunk = gr.Textbox(show_label=False, placeholder="Enter prompt") send_button_Chunk = gr.Button("Send", scale=0) clear = gr.ClearButton([prompt_chunk, vicuna_chatbot_chunk]) def respond(message, chat_history, chatbot): input_ids = tokenizer.encode(message, return_tensors="pt") output = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2) bot_message = tokenizer.decode(output[0], skip_special_tokens=True) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history prompt.submit(respond, [prompt, vicuna_chatbot, vicuna_chatbot_chunk]) demo.launch()