Spaces:

akhaliq
/

MobileLLM-R1-950M

Running on Zero

App Files Files Community

akhaliq HF Staff commited on Sep 12

Commit

e26eb4c

verified ·

1 Parent(s): 1df30c2

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -51

app.py CHANGED Viewed

@@ -8,70 +8,47 @@ model_id = "facebook/MobileLLM-R1-950M"
 pipe = pipeline(
     "text-generation",
     model=model_id,
-    torch_dtype="auto",
     device_map="auto",
 )
 @spaces.GPU(duration=120)
 def respond(message, history):
-    # Build messages list from history
-    messages = []
-    # Add system message based on content type detection
-    if any(kw in message.lower() for kw in ["python", "def ", "function"]):
-        messages.append({
-            "role": "system",
-            "content": (
-                "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
-                "Please use python programming language only.\n"
-                "You must use ```python for just the final solution code block with the following format:\n"
-                "```python\n# Your code here\n```\n"
-            )
-        })
-    elif any(kw in message.lower() for kw in ["c++", "cpp", "#include", "cout"]):
-        messages.append({
-            "role": "system",
-            "content": (
-                "\nYou are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.\n\n"
-                "Please use c++ programming language only.\n"
-                "You must use ```cpp for just the final solution code block with the following format:\n"
-                "```cpp\n// Your code here\n```\n"
-            )
-        })
-    elif any(kw in message.lower() for kw in ["compute", "calculate", "math", "+", "-", "*", "/"]):
-        messages.append({
-            "role": "system",
-            "content": "Please reason step by step, and put your final answer within \\boxed{}."
-        })
-    else:
-        messages.append({
-            "role": "system",
-            "content": "You are a helpful AI assistant."
-        })
-    # Add conversation history
     for user_msg, assistant_msg in history:
         if user_msg:
-            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
-    messages.append({"role": "user", "content": message})
-    # Generate response
-    outputs = pipe(
-        messages,
-        max_new_tokens=8192,
-        temperature=0.7,
-        do_sample=True,
-    )
-    # Extract and stream the generated text
-    full_response = outputs[0]["generated_text"][-1]["content"]
     response_text = ""
-    for char in full_response:
-        response_text += char
         yield response_text
 # Create the chat interface

 pipe = pipeline(
     "text-generation",
     model=model_id,
+    torch_dtype=torch.float16,
     device_map="auto",
 )
 @spaces.GPU(duration=120)
 def respond(message, history):
+    # Build prompt from history
+    prompt = ""
     for user_msg, assistant_msg in history:
         if user_msg:
+            prompt += f"User: {user_msg}\n"
         if assistant_msg:
+            prompt += f"Assistant: {assistant_msg}\n"
     # Add current message
+    prompt += f"User: {message}\nAssistant: "
+    # Generate response with streaming
+    streamer = pipe.tokenizer.decode
+    # Generate tokens
+    inputs = pipe.tokenizer(prompt, return_tensors="pt").to(pipe.model.device)
+    with torch.no_grad():
+        outputs = pipe.model.generate(
+            **inputs,
+            max_new_tokens=10000,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=pipe.tokenizer.eos_token_id,
+        )
+    # Decode the generated tokens, skipping the input tokens
+    generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
+    # Stream the output token by token
     response_text = ""
+    for i in range(len(generated_tokens)):
+        token = generated_tokens[i:i+1]
+        token_text = pipe.tokenizer.decode(token, skip_special_tokens=True)
+        response_text += token_text
         yield response_text
 # Create the chat interface