Spaces:

OscarFAI
/

inference

Running on Zero

App Files Files Community

OscarFAI commited on 12 days ago

Commit

f1d7efb

1 Parent(s): 9577ec2

Padding

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 # Set an environment variable
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -43,6 +44,10 @@ h1 {
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-8B-Instruct-2410")
 model = AutoModelForCausalLM.from_pretrained("mistralai/Ministral-8B-Instruct-2410", device_map="auto")
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
@@ -88,17 +93,23 @@ def chat_mistral(message: str,
         first_message = f"{formatted_prompt}{message}" if formatted_prompt else message
         conversation.append({"role": "user", "content": first_message})
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         top_p=top_p,
         eos_token_id=terminators,
     )
@@ -139,10 +150,13 @@ with gr.Blocks(fill_height=True, css=css) as demo:
             gr.Slider(minimum=128, maximum=4096, step=1, value=4096, label="Max new tokens", render=False),
         ],
         examples=[
-            ['Are you a sentient being?']
         ],
-        cache_examples=False,
-        type='messages',
     )
 if __name__ == "__main__":

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
+import torch
 # Set an environment variable
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-8B-Instruct-2410")
 model = AutoModelForCausalLM.from_pretrained("mistralai/Ministral-8B-Instruct-2410", device_map="auto")
+# Ensure we have a pad token
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
         first_message = f"{formatted_prompt}{message}" if formatted_prompt else message
         conversation.append({"role": "user", "content": first_message})
+    # Tokenize with padding and attention mask
+    input_data = tokenizer.apply_chat_template(conversation, return_tensors="pt", padding=True, truncation=True)
+    input_ids = input_data.to(model.device)
+    attention_mask = input_ids.ne(tokenizer.pad_token_id).to(dtype=torch.long, device=model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids=input_ids,
+        attention_mask=attention_mask,  # Fixes the warning
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         top_p=top_p,
+        pad_token_id=tokenizer.pad_token_id,  # Explicitly set
         eos_token_id=terminators,
     )
             gr.Slider(minimum=128, maximum=4096, step=1, value=4096, label="Max new tokens", render=False),
         ],
         examples=[
+            ['How to setup a human base on Mars? Give short answer.'],
+            ['Explain theory of relativity to me like I’m 8 years old.'],
+            ['What is 9,000 * 9,000?'],
+            ['Write a pun-filled happy birthday message to my friend Alex.'],
+            ['Justify why a penguin might make a good king of the jungle.']
         ],
+        cache_examples=False
     )
 if __name__ == "__main__":