pabloce commited on
Commit
246d0fd
·
verified ·
1 Parent(s): 3a779df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -65
app.py CHANGED
@@ -60,71 +60,6 @@ def respond(
60
  for output in stream:
61
  outputs += output
62
  yield outputs
63
- # from llama_cpp import Llama
64
- # from llama_cpp_agent import LlamaCppAgent
65
- # from llama_cpp_agent import MessagesFormatterType
66
- # from llama_cpp_agent.providers import LlamaCppPythonProvider
67
-
68
- # llama_model = Llama(r"models/mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=0, n_gpu_layers=33, n_ctx=8192, verbose=False)
69
-
70
- # provider = LlamaCppPythonProvider(llama_model)
71
-
72
- # agent = LlamaCppAgent(
73
- # provider,
74
- # system_prompt=f"{system_message}",
75
- # predefined_messages_formatter_type=MessagesFormatterType.MISTRAL,
76
- # debug_output=True
77
- # )
78
-
79
- # settings = provider.get_provider_default_settings()
80
- # settings.stream = True
81
- # settings.max_tokens = max_tokens
82
- # settings.temperature = temperature
83
- # settings.top_p = top_p
84
- # partial_message = ""
85
- # for new_token in agent.get_chat_response(message, llm_sampling_settings=settings, returns_streaming_generator=True):
86
- # partial_message += new_token
87
- # if '<|im_end|>' in partial_message:
88
- # break
89
- # yield partial_message
90
-
91
- # stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
92
- # chat_template = '<s>[INST] ' + system_message
93
- # # for human, assistant in history:
94
- # # chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
95
- # chat_template += ' ' + message + ' [/INST]'
96
-
97
- # print(chat_template)
98
-
99
- # llm = LlamaCPP(
100
- # model_path="models/mistral-7b-instruct-v0.2.Q6_K.gguf",
101
- # temperature=temperature,
102
- # max_new_tokens=max_tokens,
103
- # context_window=2048,
104
- # generate_kwargs={
105
- # "top_k": 50,
106
- # "top_p": top_p,
107
- # "repeat_penalty": 1.3
108
- # },
109
- # model_kwargs={
110
- # "n_threads": 0,
111
- # "n_gpu_layers": 33
112
- # },
113
- # messages_to_prompt=messages_to_prompt,
114
- # completion_to_prompt=completion_to_prompt,
115
- # verbose=True,
116
- # )
117
- # # response = ""
118
- # # for chunk in llm.stream_complete(message):
119
- # # print(chunk.delta, end="", flush=True)
120
- # # response += str(chunk.delta)
121
- # # yield response
122
- # outputs = []
123
- # for chunk in llm.stream_complete(message):
124
- # outputs.append(chunk.delta)
125
- # if chunk.delta in stop_tokens:
126
- # break
127
- # yield "".join(outputs)
128
 
129
  demo = gr.ChatInterface(
130
  respond,
 
60
  for output in stream:
61
  outputs += output
62
  yield outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  demo = gr.ChatInterface(
65
  respond,