Spaces:

CreitinGameplays
/

ConvAIChat

Runtime error

App Files Files Community

ConvAIChat / app.py

CreitinGameplays

Update app.py

0eb1946 verified over 1 year ago

raw

history blame

2.91 kB

	import gradio as gr
	import torch
	import spaces
	import bitsandbytes as bnb
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

	# Define the model name
	model_name = "CreitinGameplays/ConvAI-9b"

	# Quantization configuration with bitsandbytes settings
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, low_cpu_mem_usage=True)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	#model.to(device)

	# Initialize chat history
	chat_history = []

	@spaces.GPU(duration=120)
	def generate_text(user_prompt, top_p, top_k, temperature):
	"""Generates text using the ConvAI model from Hugging Face Transformers and maintains conversation history."""
	# System introduction
	system = "You are a helpful AI language model called ChatGPT, your goal is helping users with their questions."

	# Append user prompt to chat history
	chat_history.append(f"User: {user_prompt}")

	# Construct the full prompt with system introduction, user prompt, and assistant role
	prompt = f"{system} </s> {' '.join(chat_history)} </s>"

	# Encode the entire prompt into tokens
	prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)

	# Generate text with the complete prompt and limit the maximum length to 256 tokens
	output = model.generate(
	input_ids=prompt_encoded,
	max_length=1550,
	num_beams=1,
	num_return_sequences=1,
	do_sample=True,
	top_k=top_k,
	top_p=top_p,
	temperature=temperature,
	repetition_penalty=1.2
	)

	# Decode the generated token sequence back to text
	generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract the assistant's response
	assistant_response = generated_text.split("User:")[-1].strip()
	chat_history.append(f"Assistant: {assistant_response}")

	return "\n".join(chat_history)

	def reset_history():
	global chat_history
	chat_history = []
	return "Chat history reset."

	# Define the Gradio interface
	interface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(label="Text Prompt", value="What's an AI?"),
	gr.Slider(0, 1, value=0.9, label="Top-p"),
	gr.Slider(1, 100, value=50, step=1, label="Top-k"),
	gr.Slider(0.01, 1, value=0.2, label="Temperature")
	],
	outputs="text",
	description="Interact with ConvAI (Loaded with Hugging Face Transformers)",
	live=True
	)

	# Add a button to reset the chat history
	interface.add_component(gr.Button(label="Reset Chat History", value=reset_history))

	# Launch the Gradio interface
	interface.launch()