Spaces:

CreitinGameplays
/

ConvAIChat

Runtime error

App Files Files Community

ConvAIChat / app.py

CreitinGameplays

Update app.py

6250663 verified about 1 year ago

raw

history blame

2.49 kB

	import gradio as gr
	import torch
	import spaces
	import bitsandbytes as bnb
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

	# Define the model name
	model_name = "CreitinGameplays/ConvAI-9b"

	# Quantization configuration with bitsandbytes settings
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, low_cpu_mem_usage=True)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	#model.to(device)

	@spaces.GPU(duration=120)
	def generate_text(user_prompt):
	"""Generates text using the ConvAI model from Hugging Face Transformers and removes the user prompt."""
	# Construct the full prompt with system introduction, user prompt, and assistant role

	system = "You are a helpful AI language model called ChatGPT, your goal is helping users with their questions."

	prompt = f"<\|system\|> {system} </s> <\|user\|> {user_prompt} </s>"

	# Encode the entire prompt into tokens
	prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)

	# Generate text with the complete prompt and limit the maximum length to 256 tokens
	output = model.generate(
	input_ids=prompt_encoded,
	max_length=1550,
	num_beams=1,
	num_return_sequences=1,
	do_sample=True,
	top_k=50,
	top_p=0.9,
	temperature=0.2,
	repetition_penalty=1.2
	)

	# Decode the generated token sequence back to text
	generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract the assistant's response
	assistant_response = generated_text.split("<\|user\|>")[-1]
	assistant_response = assistant_response.replace(f"{user_prompt}", "").strip()
	assistant_response = assistant_response.replace(system, "").strip()
	assistant_response = assistant_response.replace("<\|system\|>", "").strip()
	assistant_response = assistant_response.replace("<\|assistant\|>", "").strip()

	return assistant_response

	# Define the Gradio interface
	interface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(label="Text Prompt", value="What's an AI?"),
	],
	outputs="text",
	description="Interact with ConvAI (Loaded with Hugging Face Transformers)",
	)


	# Launch the Gradio interface
	interface.launch()