pabloce commited on
Commit
0e76b02
·
verified ·
1 Parent(s): 11987d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -6
app.py CHANGED
@@ -7,8 +7,21 @@ from huggingface_hub import hf_hub_download
7
  subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
8
  subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
9
 
10
- hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", local_dir = "./models")
11
- hf_hub_download(repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF", filename="Llama-3-8B-Synthia-v3.5-f16.gguf", local_dir = "./models")
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  css = """
14
  .message-row {
@@ -28,6 +41,14 @@ css = """
28
  }
29
  """
30
 
 
 
 
 
 
 
 
 
31
  @spaces.GPU(duration=120)
32
  def respond(
33
  message,
@@ -47,6 +68,8 @@ def respond(
47
  from llama_cpp_agent.chat_history import BasicChatHistory
48
  from llama_cpp_agent.chat_history.messages import Roles
49
 
 
 
50
  llm = Llama(
51
  model_path=f"models/{model}",
52
  flash_attn=True,
@@ -60,7 +83,7 @@ def respond(
60
  agent = LlamaCppAgent(
61
  provider,
62
  system_prompt=f"{system_message}",
63
- predefined_messages_formatter_type=MessagesFormatterType.LLAMA_3,
64
  debug_output=True
65
  )
66
 
@@ -83,11 +106,16 @@ def respond(
83
  'role': Roles.assistant,
84
  'content': msn[1]
85
  }
86
-
87
  messages.add_message(user)
88
  messages.add_message(assistant)
89
 
90
- stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
 
 
 
 
 
 
91
 
92
  outputs = ""
93
  for output in stream:
@@ -121,7 +149,13 @@ demo = gr.ChatInterface(
121
  step=0.1,
122
  label="Repetition penalty",
123
  ),
124
- gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
 
 
 
 
 
 
125
  ],
126
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
127
  body_background_fill_dark="#16141c",
 
7
  subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
8
  subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
9
 
10
+ hf_hub_download(
11
+ repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
12
+ filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
13
+ local_dir = "./models"
14
+ )
15
+ hf_hub_download(
16
+ repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF",
17
+ filename="Llama-3-8B-Synthia-v3.5-f16.gguf",
18
+ local_dir = "./models"
19
+ )
20
+ hf_hub_download(
21
+ repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
22
+ filename="Mistral-7B-Instruct-v0.3-f32.gguf",
23
+ local_dir = "./models"
24
+ )
25
 
26
  css = """
27
  .message-row {
 
41
  }
42
  """
43
 
44
+ def get_messages_formatter_type(model_name):
45
+ if "Llama" in model_name:
46
+ return MessagesFormatterType.LLAMA_3
47
+ elif "Mistral" in model_name:
48
+ return MessagesFormatterType.MISTRAL
49
+ else:
50
+ raise ValueError(f"Unsupported model: {model_name}")
51
+
52
  @spaces.GPU(duration=120)
53
  def respond(
54
  message,
 
68
  from llama_cpp_agent.chat_history import BasicChatHistory
69
  from llama_cpp_agent.chat_history.messages import Roles
70
 
71
+ chat_template = get_messages_formatter_type(model)
72
+
73
  llm = Llama(
74
  model_path=f"models/{model}",
75
  flash_attn=True,
 
83
  agent = LlamaCppAgent(
84
  provider,
85
  system_prompt=f"{system_message}",
86
+ predefined_messages_formatter_type=chat_template,
87
  debug_output=True
88
  )
89
 
 
106
  'role': Roles.assistant,
107
  'content': msn[1]
108
  }
 
109
  messages.add_message(user)
110
  messages.add_message(assistant)
111
 
112
+ stream = agent.get_chat_response(
113
+ message,
114
+ llm_sampling_settings=settings,
115
+ chat_history=messages,
116
+ returns_streaming_generator=True,
117
+ print_output=False
118
+ )
119
 
120
  outputs = ""
121
  for output in stream:
 
149
  step=0.1,
150
  label="Repetition penalty",
151
  ),
152
+ gr.Dropdown([
153
+ 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf',
154
+ 'Llama-3-8B-Synthia-v3.5-f16.gguf'
155
+ ],
156
+ value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
157
+ label="Model"
158
+ ),
159
  ],
160
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
161
  body_background_fill_dark="#16141c",