pabloce commited on
Commit
8e6bf26
·
verified ·
1 Parent(s): f3a35f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -22
app.py CHANGED
@@ -27,32 +27,50 @@ def respond(
27
  top_p,
28
  ):
29
  from llama_cpp import Llama
30
- from llama_cpp_agent import LlamaCppAgent
31
- from llama_cpp_agent import MessagesFormatterType
32
- from llama_cpp_agent.providers import LlamaCppPythonProvider
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- llama_model = Llama(r"models/mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=0, n_gpu_layers=33, n_ctx=8192, verbose=False)
35
 
36
- provider = LlamaCppPythonProvider(llama_model)
37
 
38
- agent = LlamaCppAgent(
39
- provider,
40
- system_prompt=f"{system_message}",
41
- predefined_messages_formatter_type=MessagesFormatterType.MISTRAL,
42
- debug_output=True
43
- )
44
 
45
- settings = provider.get_provider_default_settings()
46
- settings.stream = True
47
- settings.max_tokens = max_tokens
48
- settings.temperature = temperature
49
- settings.top_p = top_p
50
- partial_message = ""
51
- for new_token in agent.get_chat_response(message, llm_sampling_settings=settings, returns_streaming_generator=True):
52
- partial_message += new_token
53
- if '<|im_end|>' in partial_message:
54
- break
55
- yield partial_message
 
56
  # stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
57
  # chat_template = '<s>[INST] ' + system_message
58
  # # for human, assistant in history:
 
27
  top_p,
28
  ):
29
  from llama_cpp import Llama
30
+ llm = Llama(
31
+ model_path="models/mistral-7b-instruct-v0.2.Q6_K.gguf",
32
+ chat_format="mistral"
33
+ )
34
+ stream = llm.create_chat_completion(
35
+ messages = [
36
+ {"role": "system", "content": f"{system_message}"},
37
+ {
38
+ "role": "user",
39
+ "content": f"{message}"
40
+ }
41
+ ],
42
+ stream=True,
43
+ )
44
+ for output in stream:
45
+ yield json.dumps(output, indent=2)
46
+ # from llama_cpp import Llama
47
+ # from llama_cpp_agent import LlamaCppAgent
48
+ # from llama_cpp_agent import MessagesFormatterType
49
+ # from llama_cpp_agent.providers import LlamaCppPythonProvider
50
 
51
+ # llama_model = Llama(r"models/mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=0, n_gpu_layers=33, n_ctx=8192, verbose=False)
52
 
53
+ # provider = LlamaCppPythonProvider(llama_model)
54
 
55
+ # agent = LlamaCppAgent(
56
+ # provider,
57
+ # system_prompt=f"{system_message}",
58
+ # predefined_messages_formatter_type=MessagesFormatterType.MISTRAL,
59
+ # debug_output=True
60
+ # )
61
 
62
+ # settings = provider.get_provider_default_settings()
63
+ # settings.stream = True
64
+ # settings.max_tokens = max_tokens
65
+ # settings.temperature = temperature
66
+ # settings.top_p = top_p
67
+ # partial_message = ""
68
+ # for new_token in agent.get_chat_response(message, llm_sampling_settings=settings, returns_streaming_generator=True):
69
+ # partial_message += new_token
70
+ # if '<|im_end|>' in partial_message:
71
+ # break
72
+ # yield partial_message
73
+
74
  # stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
75
  # chat_template = '<s>[INST] ' + system_message
76
  # # for human, assistant in history: