File size: 2,905 Bytes
f5f6359
 
6250663
0488844
 
f5f6359
0488844
f5f6359
 
0488844
 
 
 
 
 
 
 
f5f6359
 
0488844
f5f6359
b71d285
f5f6359
0eb1946
 
 
6250663
0eb1946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f6359
 
 
0eb1946
 
 
 
 
 
 
 
 
 
f5f6359
 
0eb1946
 
f5f6359
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import torch
import spaces
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Define the model name
model_name = "CreitinGameplays/ConvAI-9b"

# Quantization configuration with bitsandbytes settings
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, low_cpu_mem_usage=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model.to(device)

# Initialize chat history
chat_history = []

@spaces.GPU(duration=120)
def generate_text(user_prompt, top_p, top_k, temperature):
    """Generates text using the ConvAI model from Hugging Face Transformers and maintains conversation history."""
    # System introduction
    system = "You are a helpful AI language model called ChatGPT, your goal is helping users with their questions."

    # Append user prompt to chat history
    chat_history.append(f"User: {user_prompt}")

    # Construct the full prompt with system introduction, user prompt, and assistant role
    prompt = f"{system} </s> {' '.join(chat_history)} </s>"

    # Encode the entire prompt into tokens
    prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)

    # Generate text with the complete prompt and limit the maximum length to 256 tokens
    output = model.generate(
        input_ids=prompt_encoded,
        max_length=1550,
        num_beams=1,
        num_return_sequences=1,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        repetition_penalty=1.2
    )

    # Decode the generated token sequence back to text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract the assistant's response
    assistant_response = generated_text.split("User:")[-1].strip()
    chat_history.append(f"Assistant: {assistant_response}")

    return "\n".join(chat_history)

def reset_history():
    global chat_history
    chat_history = []
    return "Chat history reset."

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(label="Text Prompt", value="What's an AI?"),
        gr.Slider(0, 1, value=0.9, label="Top-p"),
        gr.Slider(1, 100, value=50, step=1, label="Top-k"),
        gr.Slider(0.01, 1, value=0.2, label="Temperature")
    ],
    outputs="text",
    description="Interact with ConvAI (Loaded with Hugging Face Transformers)",
    live=True
)

# Add a button to reset the chat history
interface.add_component(gr.Button(label="Reset Chat History", value=reset_history))

# Launch the Gradio interface
interface.launch()