File size: 1,432 Bytes
b308128
f9ca505
b308128
f9ca505
 
 
b308128
f9ca505
 
04fc021
f9ca505
 
ac4f141
f9ca505
 
 
 
 
 
ac4f141
f9ca505
 
 
 
 
5e8be56
f9ca505
04fc021
f9ca505
 
 
 
04fc021
f9ca505
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the models and tokenizers
gpt35_model = AutoModelForCausalLM.from_pretrained("gpt-3.5-turbo-0613")
gpt35_tokenizer = AutoTokenizer.from_pretrained("gpt-3.5-turbo-0613")

vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3")
vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3")

llama_model = AutoModelForCausalLM.from_pretrained("./llama/hf/7B")
llama_tokenizer = AutoTokenizer.from_pretrained("./llama/hf/7B")

# Define the function for generating responses
def generate_response(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0])
    return response

# Define the Gradio interface
def chatbot_interface(prompt):
    gpt35_response = generate_response(gpt35_model, gpt35_tokenizer, prompt)
    vicuna_response = generate_response(vicuna_model, vicuna_tokenizer, prompt)
    llama_response = generate_response(llama_model, llama_tokenizer, prompt)

    return {"GPT-3.5": gpt35_response, "Vicuna-7B": vicuna_response, "Llama-7B": llama_response}

iface = gr.Interface(fn=chatbot_interface, 
                     inputs="text", 
                     outputs="panel",
                     title="Chatbot with Three Models")

iface.launch()