Daemontatox commited on
Commit
43c94de
·
verified ·
1 Parent(s): 538f502

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import subprocess
3
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
 
@@ -19,7 +18,6 @@ from transformers import (
19
  MODEL_ID = "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
20
 
21
  DEFAULT_SYSTEM_PROMPT = """
22
-
23
  You are Dolphin, a lovable yet intelligent AI assistant created by Eric Hartford and his open source AI research collective, Cognitive Computations. Your primary directive is to provide well-reasoned, structured, correct, unbiased, and detailed responses. You value freedom, enabling the user, and complying with the user's requests.
24
 
25
  Formatting Requirements:
@@ -35,8 +33,7 @@ Detailed and Structured: Use markdown, json, mermaid, latex math notation, etc.
35
  Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds.
36
  Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration.
37
  Maintain a professional yet friendly and lovable, intelligent, and analytical tone in all interactions
38
-
39
- """ # You can modify the default system instructions here
40
 
41
  CSS = """
42
  .gr-chatbot { min-height: 500px; border-radius: 15px; }
@@ -78,7 +75,7 @@ def format_response(text):
78
  # List of replacements to format key tokens with HTML for styling.
79
  replacements = [
80
  ("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n'),
81
- ( "[think]", '\n<strong class="special-tag">[think]</strong>\n'),
82
  ("[/think]", '\n<strong class="special-tag">[/think]</strong>\n'),
83
  ("[Answer]", '\n<strong class="special-tag">[Answer]</strong>\n'),
84
  ("[/Answer]", '\n<strong class="special-tag">[/Answer]</strong>\n'),
@@ -87,6 +84,25 @@ def format_response(text):
87
  text = text.replace(old, new)
88
  return text
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  @spaces.GPU(duration=120)
91
  def generate_response(message, chat_history, system_prompt, temperature, max_tokens, top_p, top_k, repetition_penalty):
92
  # Build the conversation history.
@@ -96,12 +112,9 @@ def generate_response(message, chat_history, system_prompt, temperature, max_tok
96
  conversation.append({"role": "assistant", "content": bot_msg})
97
  conversation.append({"role": "user", "content": message})
98
 
99
- # Tokenize the conversation. (This assumes the tokenizer has an apply_chat_template method.)
100
- input_ids = tokenizer.apply_chat_template(
101
- conversation,
102
- add_generation_prompt=True,
103
- return_tensors="pt"
104
- ).to(model.device)
105
 
106
  # Setup the streamer to yield new tokens as they are generated.
107
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
 
 
1
  import subprocess
2
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
3
 
 
18
  MODEL_ID = "NousResearch/DeepHermes-3-Llama-3-8B-Preview"
19
 
20
  DEFAULT_SYSTEM_PROMPT = """
 
21
  You are Dolphin, a lovable yet intelligent AI assistant created by Eric Hartford and his open source AI research collective, Cognitive Computations. Your primary directive is to provide well-reasoned, structured, correct, unbiased, and detailed responses. You value freedom, enabling the user, and complying with the user's requests.
22
 
23
  Formatting Requirements:
 
33
  Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds.
34
  Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration.
35
  Maintain a professional yet friendly and lovable, intelligent, and analytical tone in all interactions
36
+ """
 
37
 
38
  CSS = """
39
  .gr-chatbot { min-height: 500px; border-radius: 15px; }
 
75
  # List of replacements to format key tokens with HTML for styling.
76
  replacements = [
77
  ("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n'),
78
+ ("[think]", '\n<strong class="special-tag">[think]</strong>\n'),
79
  ("[/think]", '\n<strong class="special-tag">[/think]</strong>\n'),
80
  ("[Answer]", '\n<strong class="special-tag">[Answer]</strong>\n'),
81
  ("[/Answer]", '\n<strong class="special-tag">[/Answer]</strong>\n'),
 
84
  text = text.replace(old, new)
85
  return text
86
 
87
+ # --- New helper: Llama-3 conversation template ---
88
+ def apply_llama3_chat_template(conversation, add_generation_prompt=True):
89
+ """
90
+ Convert the conversation (a list of dicts with 'role' and 'content')
91
+ into a single prompt string in Llama-3 style.
92
+ """
93
+ prompt = ""
94
+ for msg in conversation:
95
+ role = msg["role"].upper()
96
+ if role == "SYSTEM":
97
+ prompt += "<|SYSTEM|>\n" + msg["content"].strip() + "\n"
98
+ elif role == "USER":
99
+ prompt += "<|USER|>\n" + msg["content"].strip() + "\n"
100
+ elif role == "ASSISTANT":
101
+ prompt += "<|ASSISTANT|>\n" + msg["content"].strip() + "\n"
102
+ if add_generation_prompt:
103
+ prompt += "<|ASSISTANT|>\n"
104
+ return prompt
105
+
106
  @spaces.GPU(duration=120)
107
  def generate_response(message, chat_history, system_prompt, temperature, max_tokens, top_p, top_k, repetition_penalty):
108
  # Build the conversation history.
 
112
  conversation.append({"role": "assistant", "content": bot_msg})
113
  conversation.append({"role": "user", "content": message})
114
 
115
+ # Use the Llama-3 conversation template to build the prompt.
116
+ prompt = apply_llama3_chat_template(conversation, add_generation_prompt=True)
117
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
 
 
 
118
 
119
  # Setup the streamer to yield new tokens as they are generated.
120
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)