Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

CreitinGameplays commited on Apr 24, 2024

Commit

f03eadd

verified ·

1 Parent(s): 5f7436b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ model_name = "CreitinGameplays/bloom-3b-conversational"
 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 def generate_text(user_prompt):
   """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
@@ -15,7 +17,7 @@ def generate_text(user_prompt):
   prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
   # Encode the entire prompt into tokens
-  prompt_encoded = tokenizer(prompt, return_tensors="pt").input_ids
   # Generate text with the complete prompt and limit the maximum length to 256 tokens
   output = model.generate(

 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 def generate_text(user_prompt):
   """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
   prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
   # Encode the entire prompt into tokens
+  prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)
   # Generate text with the complete prompt and limit the maximum length to 256 tokens
   output = model.generate(