Spaces:

Ais203
/

aigen

Sleeping

App Files Files Community

Ais commited on 11 days ago

Commit

988fa7f

verified ·

1 Parent(s): 730d86c

Update app/inference.py

Browse files

Files changed (1) hide show

app/inference.py +46 -3

app/inference.py CHANGED Viewed

@@ -17,9 +17,34 @@ model.eval()
 streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-def generate_response(prompt: str) -> str:
-    formatted = f"<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(
             **inputs,
@@ -29,6 +54,24 @@ def generate_response(prompt: str) -> str:
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
     answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
-    return answer

 streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+def generate_response(prompt: str, conversation_history: list = None) -> str:
+    """
+    Generate response with optional conversation history
+    Args:
+        prompt: Current user message
+        conversation_history: List of {"role": "user/assistant", "content": "..."}
+    """
+    # Build conversation format
+    formatted = "<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n"
+    # Add conversation history if provided
+    if conversation_history:
+        for msg in conversation_history:
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "user":
+                formatted += f"<|im_start|>user\n{content}<|im_end|>\n"
+            elif role == "assistant":
+                formatted += f"<|im_start|>assistant\n{content}<|im_end|>\n"
+    # Add current prompt
+    formatted += f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(
             **inputs,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
     answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
+    # Clean up any end tokens
+    if "<|im_end|>" in answer:
+        answer = answer.split("<|im_end|>")[0].strip()
+    return answer
+# Example usage with conversation history
+if __name__ == "__main__":
+    # Test with conversation history
+    history = [
+        {"role": "user", "content": "What is Python?"},
+        {"role": "assistant", "content": "Python is a high-level programming language..."},
+    ]
+    # This should now consider the conversation context
+    response = generate_response("Can you show me a simple example?", conversation_history=history)
+    print("Response:", response)