pabloce commited on
Commit
cf6a52f
·
verified ·
1 Parent(s): 5f15dd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -9
app.py CHANGED
@@ -28,20 +28,29 @@ def respond(
28
  top_p,
29
  ):
30
  from llama_cpp import Llama
 
 
 
 
31
  llm = Llama(
32
  model_path="models/mistral-7b-instruct-v0.2.Q6_K.gguf",
33
  n_gpu_layers=33,
34
  )
35
- stream = llm.create_chat_completion(
36
- messages = [
37
- {"role": "system", "content": f"{system_message}"},
38
- {
39
- "role": "user",
40
- "content": f"{message}"
41
- }
42
- ],
43
- stream=True,
44
  )
 
 
 
 
 
 
 
45
  outputs = ""
46
  for output in stream:
47
  print(output)
 
28
  top_p,
29
  ):
30
  from llama_cpp import Llama
31
+ from llama_cpp_agent import LlamaCppAgent
32
+ from llama_cpp_agent import MessagesFormatterType
33
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
34
+
35
  llm = Llama(
36
  model_path="models/mistral-7b-instruct-v0.2.Q6_K.gguf",
37
  n_gpu_layers=33,
38
  )
39
+ provider = LlamaCppPythonProvider(llm)
40
+
41
+ agent = LlamaCppAgent(
42
+ provider,
43
+ system_prompt="You are a helpful assistant.",
44
+ predefined_messages_formatter_type=MessagesFormatterType.MISTRAL,
45
+ debug_output=True
 
 
46
  )
47
+
48
+ settings = provider.get_provider_default_settings()
49
+ settings.max_tokens = 2000
50
+ settings.stream = True
51
+
52
+ stream = agent.get_chat_response(message, llm_sampling_settings=settings, returns_streaming_generator=True)
53
+
54
  outputs = ""
55
  for output in stream:
56
  print(output)