sergey21000 commited on
Commit
eee722e
·
verified ·
1 Parent(s): 65f8509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -399
app.py CHANGED
@@ -1,399 +1,272 @@
1
- from typing import List, Optional
2
-
3
- import gradio as gr
4
- from langchain_core.vectorstores import VectorStore
5
-
6
- from config import (
7
- LLM_MODEL_REPOS,
8
- EMBED_MODEL_REPOS,
9
- SUBTITLES_LANGUAGES,
10
- GENERATE_KWARGS,
11
- )
12
-
13
- from utils import (
14
- load_llm_model,
15
- load_embed_model,
16
- load_documents_and_create_db,
17
- user_message_to_chatbot,
18
- update_user_message_with_context,
19
- get_llm_response,
20
- get_gguf_model_names,
21
- add_new_model_repo,
22
- clear_llm_folder,
23
- clear_embed_folder,
24
- get_memory_usage,
25
- )
26
-
27
-
28
- # ============ INTERFACE COMPONENT INITIALIZATION FUNCS ============
29
-
30
- def get_rag_settings(rag_mode: bool, render: bool = True):
31
- k = gr.Radio(
32
- choices=[1, 2, 3, 4, 5, 'all'],
33
- value=2,
34
- label='Number of relevant documents for search',
35
- visible=rag_mode,
36
- render=render,
37
- )
38
- score_threshold = gr.Slider(
39
- minimum=0,
40
- maximum=1,
41
- value=0.5,
42
- step=0.05,
43
- label='relevance_scores_threshold',
44
- visible=rag_mode,
45
- render=render,
46
- )
47
- return k, score_threshold
48
-
49
-
50
- def get_user_message_with_context(text: str, rag_mode: bool) -> gr.component:
51
- num_lines = len(text.split('\n'))
52
- max_lines = 10
53
- num_lines = max_lines if num_lines > max_lines else num_lines
54
- return gr.Textbox(
55
- text,
56
- visible=rag_mode,
57
- interactive=False,
58
- label='User Message With Context',
59
- lines=num_lines,
60
- )
61
-
62
-
63
- def get_system_prompt_component(interactive: bool) -> gr.Textbox:
64
- value = '' if interactive else 'System prompt is not supported by this model'
65
- return gr.Textbox(value=value, label='System prompt', interactive=interactive)
66
-
67
-
68
- def get_generate_args(do_sample: bool) -> List[gr.component]:
69
- generate_args = [
70
- gr.Slider(minimum=0.1, maximum=3, value=GENERATE_KWARGS['temperature'], step=0.1, label='temperature', visible=do_sample),
71
- gr.Slider(minimum=0.1, maximum=1, value=GENERATE_KWARGS['top_p'], step=0.01, label='top_p', visible=do_sample),
72
- gr.Slider(minimum=1, maximum=50, value=GENERATE_KWARGS['top_k'], step=1, label='top_k', visible=do_sample),
73
- gr.Slider(minimum=1, maximum=5, value=GENERATE_KWARGS['repeat_penalty'], step=0.1, label='repeat_penalty', visible=do_sample),
74
- ]
75
- return generate_args
76
-
77
-
78
- def get_rag_mode_component(db: Optional[VectorStore]) -> gr.Checkbox:
79
- value = visible = db is not None
80
- return gr.Checkbox(value=value, label='RAG Mode', scale=1, visible=visible)
81
-
82
-
83
- # ================ LOADING AND INITIALIZING MODELS ========================
84
-
85
- start_llm_model, start_support_system_role, load_log = load_llm_model(LLM_MODEL_REPOS[0], 'gemma-2-2b-it-Q8_0.gguf')
86
- start_embed_model, load_log = load_embed_model(EMBED_MODEL_REPOS[0])
87
-
88
-
89
-
90
- # ================== APPLICATION WEB INTERFACE ============================
91
-
92
- theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
93
- loader_color='rgb(0, 255, 0)',
94
- slider_color='rgb(0, 200, 0)',
95
- body_text_color_dark='rgb(0, 200, 0)',
96
- button_secondary_background_fill_dark='green',
97
- )
98
- css = '''.gradio-container {width: 60% !important}'''
99
-
100
- with gr.Blocks(theme=theme, css=css) as interface:
101
-
102
- # ==================== GRADIO STATES ===============================
103
-
104
- documents = gr.State([])
105
- db = gr.State(None)
106
- user_message_with_context = gr.State('')
107
- support_system_role = gr.State(start_support_system_role)
108
- llm_model_repos = gr.State(LLM_MODEL_REPOS)
109
- embed_model_repos = gr.State(EMBED_MODEL_REPOS)
110
- llm_model = gr.State(start_llm_model)
111
- embed_model = gr.State(start_embed_model)
112
-
113
-
114
-
115
- # ==================== BOT PAGE =================================
116
-
117
- with gr.Tab(label='Chatbot'):
118
- with gr.Row():
119
- with gr.Column(scale=3):
120
- chatbot = gr.Chatbot(
121
- show_copy_button=True,
122
- bubble_full_width=False,
123
- height=480,
124
- )
125
- user_message = gr.Textbox(label='User')
126
-
127
- with gr.Row():
128
- user_message_btn = gr.Button('Send')
129
- stop_btn = gr.Button('Stop')
130
- clear_btn = gr.Button('Clear')
131
-
132
- # ------------- GENERATION PARAMETERS -------------------
133
-
134
- with gr.Column(scale=1, min_width=80):
135
- with gr.Group():
136
- gr.Markdown('History size')
137
- history_len = gr.Slider(
138
- minimum=0,
139
- maximum=5,
140
- value=0,
141
- step=1,
142
- info='Number of previous messages taken into account in history',
143
- label='history_len',
144
- show_label=False,
145
- )
146
-
147
- with gr.Group():
148
- gr.Markdown('Generation parameters')
149
- do_sample = gr.Checkbox(
150
- value=False,
151
- label='do_sample',
152
- info='Activate random sampling',
153
- )
154
- generate_args = get_generate_args(do_sample.value)
155
- do_sample.change(
156
- fn=get_generate_args,
157
- inputs=do_sample,
158
- outputs=generate_args,
159
- show_progress=False,
160
- )
161
-
162
- rag_mode = get_rag_mode_component(db=db.value)
163
- k, score_threshold = get_rag_settings(rag_mode=rag_mode.value, render=False)
164
- rag_mode.change(
165
- fn=get_rag_settings,
166
- inputs=[rag_mode],
167
- outputs=[k, score_threshold],
168
- )
169
- with gr.Row():
170
- k.render()
171
- score_threshold.render()
172
-
173
- # ---------------- SYSTEM PROMPT AND USER MESSAGE -----------
174
-
175
- with gr.Accordion('Prompt', open=True):
176
- system_prompt = get_system_prompt_component(interactive=support_system_role.value)
177
- user_message_with_context = get_user_message_with_context(text='', rag_mode=rag_mode.value)
178
-
179
- # ---------------- SEND, CLEAR AND STOP BUTTONS ------------
180
-
181
- generate_event = gr.on(
182
- triggers=[user_message.submit, user_message_btn.click],
183
- fn=user_message_to_chatbot,
184
- inputs=[user_message, chatbot],
185
- outputs=[user_message, chatbot],
186
- queue=False,
187
- ).then(
188
- fn=update_user_message_with_context,
189
- inputs=[chatbot, rag_mode, db, k, score_threshold],
190
- outputs=[user_message_with_context],
191
- ).then(
192
- fn=get_user_message_with_context,
193
- inputs=[user_message_with_context, rag_mode],
194
- outputs=[user_message_with_context],
195
- ).then(
196
- fn=get_llm_response,
197
- inputs=[chatbot, llm_model, user_message_with_context, rag_mode, system_prompt,
198
- support_system_role, history_len, do_sample, *generate_args],
199
- outputs=[chatbot],
200
- )
201
-
202
- stop_btn.click(
203
- fn=None,
204
- inputs=None,
205
- outputs=None,
206
- cancels=generate_event,
207
- queue=False,
208
- )
209
-
210
- clear_btn.click(
211
- fn=lambda: (None, ''),
212
- inputs=None,
213
- outputs=[chatbot, user_message_with_context],
214
- queue=False,
215
- )
216
-
217
-
218
-
219
- # ================= FILE DOWNLOAD PAGE =========================
220
-
221
- with gr.Tab(label='Load documents'):
222
- with gr.Row(variant='compact'):
223
- upload_files = gr.File(file_count='multiple', label='Loading text files')
224
- web_links = gr.Textbox(lines=6, label='Links to Web sites or YouTube')
225
-
226
- with gr.Row(variant='compact'):
227
- chunk_size = gr.Slider(50, 2000, value=500, step=50, label='Chunk size')
228
- chunk_overlap = gr.Slider(0, 200, value=20, step=10, label='Chunk overlap')
229
-
230
- subtitles_lang = gr.Radio(
231
- SUBTITLES_LANGUAGES,
232
- value=SUBTITLES_LANGUAGES[0],
233
- label='YouTube subtitle language',
234
- )
235
-
236
- load_documents_btn = gr.Button(value='Upload documents and initialize database')
237
- load_docs_log = gr.Textbox(label='Status of loading and splitting documents', interactive=False)
238
-
239
- load_documents_btn.click(
240
- fn=load_documents_and_create_db,
241
- inputs=[upload_files, web_links, subtitles_lang, chunk_size, chunk_overlap, embed_model],
242
- outputs=[documents, db, load_docs_log],
243
- ).success(
244
- fn=get_rag_mode_component,
245
- inputs=[db],
246
- outputs=[rag_mode],
247
- )
248
-
249
- gr.HTML("""<h3 style='text-align: center'>
250
- <a href="https://github.com/sergey21000/chatbot-rag" target='_blank'>GitHub Repository</a></h3>
251
- """)
252
-
253
-
254
-
255
- # ================= VIEW PAGE FOR ALL DOCUMENTS =================
256
-
257
- with gr.Tab(label='View documents'):
258
- view_documents_btn = gr.Button(value='Show downloaded text chunks')
259
- view_documents_textbox = gr.Textbox(
260
- lines=1,
261
- placeholder='To view chunks, load documents in the Load documents tab',
262
- label='Uploaded chunks',
263
- )
264
- sep = '=' * 20
265
- view_documents_btn.click(
266
- lambda documents: f'\n{sep}\n\n'.join([doc.page_content for doc in documents]),
267
- inputs=[documents],
268
- outputs=[view_documents_textbox],
269
- )
270
-
271
-
272
- # ============== GGUF MODELS DOWNLOAD PAGE =====================
273
-
274
- with gr.Tab('Load LLM model'):
275
- new_llm_model_repo = gr.Textbox(
276
- value='',
277
- label='Add repository',
278
- placeholder='Link to repository of HF models in GGUF format',
279
- )
280
- new_llm_model_repo_btn = gr.Button('Add repository')
281
- curr_llm_model_repo = gr.Dropdown(
282
- choices=LLM_MODEL_REPOS,
283
- value=None,
284
- label='HF Model Repository',
285
- )
286
- curr_llm_model_path = gr.Dropdown(
287
- choices=[],
288
- value=None,
289
- label='GGUF model file',
290
- )
291
- load_llm_model_btn = gr.Button('Loading and initializing model')
292
- load_llm_model_log = gr.Textbox(
293
- value=f'Model {LLM_MODEL_REPOS[0]} loaded at application startup',
294
- label='Model loading status',
295
- lines=6,
296
- )
297
-
298
- with gr.Group():
299
- gr.Markdown('Free up disk space by deleting all models except the currently selected one')
300
- clear_llm_folder_btn = gr.Button('Clear folder')
301
-
302
- new_llm_model_repo_btn.click(
303
- fn=add_new_model_repo,
304
- inputs=[new_llm_model_repo, llm_model_repos],
305
- outputs=[curr_llm_model_repo, load_llm_model_log],
306
- ).success(
307
- fn=lambda: '',
308
- inputs=None,
309
- outputs=[new_llm_model_repo],
310
- )
311
-
312
- curr_llm_model_repo.change(
313
- fn=get_gguf_model_names,
314
- inputs=[curr_llm_model_repo],
315
- outputs=[curr_llm_model_path],
316
- )
317
-
318
- load_llm_model_btn.click(
319
- fn=load_llm_model,
320
- inputs=[curr_llm_model_repo, curr_llm_model_path],
321
- outputs=[llm_model, support_system_role, load_llm_model_log],
322
- ).success(
323
- fn=lambda log: log + get_memory_usage(),
324
- inputs=[load_llm_model_log],
325
- outputs=[load_llm_model_log],
326
- ).then(
327
- fn=get_system_prompt_component,
328
- inputs=[support_system_role],
329
- outputs=[system_prompt],
330
- )
331
-
332
- clear_llm_folder_btn.click(
333
- fn=clear_llm_folder,
334
- inputs=[curr_llm_model_path],
335
- outputs=None,
336
- ).success(
337
- fn=lambda model_path: f'Models other than {model_path} removed',
338
- inputs=[curr_llm_model_path],
339
- outputs=None,
340
- )
341
-
342
-
343
- # ============== EMBEDDING MODELS DOWNLOAD PAGE =============
344
-
345
- with gr.Tab('Load embed model'):
346
- new_embed_model_repo = gr.Textbox(
347
- value='',
348
- label='Add repository',
349
- placeholder='Link to HF model repository',
350
- )
351
- new_embed_model_repo_btn = gr.Button('Add repository')
352
- curr_embed_model_repo = gr.Dropdown(
353
- choices=EMBED_MODEL_REPOS,
354
- value=None,
355
- label='HF model repository',
356
- )
357
-
358
- load_embed_model_btn = gr.Button('Loading and initializing model')
359
- load_embed_model_log = gr.Textbox(
360
- value=f'Model {EMBED_MODEL_REPOS[0]} loaded at application startup',
361
- label='Model loading status',
362
- lines=7,
363
- )
364
- with gr.Group():
365
- gr.Markdown('Free up disk space by deleting all models except the currently selected one')
366
- clear_embed_folder_btn = gr.Button('Clear folder')
367
-
368
- new_embed_model_repo_btn.click(
369
- fn=add_new_model_repo,
370
- inputs=[new_embed_model_repo, embed_model_repos],
371
- outputs=[curr_embed_model_repo, load_embed_model_log],
372
- ).success(
373
- fn=lambda: '',
374
- inputs=None,
375
- outputs=new_embed_model_repo,
376
- )
377
-
378
- load_embed_model_btn.click(
379
- fn=load_embed_model,
380
- inputs=[curr_embed_model_repo],
381
- outputs=[embed_model, load_embed_model_log],
382
- ).success(
383
- fn=lambda log: log + get_memory_usage(),
384
- inputs=[load_embed_model_log],
385
- outputs=[load_embed_model_log],
386
- )
387
-
388
- clear_embed_folder_btn.click(
389
- fn=clear_embed_folder,
390
- inputs=[curr_embed_model_repo],
391
- outputs=None,
392
- ).success(
393
- fn=lambda model_repo: f'Models other than {model_repo} removed',
394
- inputs=[curr_embed_model_repo],
395
- outputs=None,
396
- )
397
-
398
-
399
- interface.launch(server_name='0.0.0.0', server_port=7860) # debug=True
 
1
+ from pathlib import Path
2
+ from shutil import rmtree
3
+ from typing import Union, List, Dict, Tuple, Optional
4
+ from tqdm import tqdm
5
+
6
+ import requests
7
+ import gradio as gr
8
+ from llama_cpp import Llama
9
+
10
+
11
+ # ================== ANNOTATIONS ========================
12
+
13
+ CHAT_HISTORY = List[Optional[Dict[str, Optional[str]]]]
14
+ MODEL_DICT = Dict[str, Llama]
15
+
16
+
17
+ # ================== FUNCS =============================
18
+
19
+ def download_file(file_url: str, file_path: Union[str, Path]) -> None:
20
+ response = requests.get(file_url, stream=True)
21
+ if response.status_code != 200:
22
+ raise Exception(f'Файл недоступен для скачивания по ссылке: {file_url}')
23
+ total_size = int(response.headers.get('content-length', 0))
24
+ progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
25
+ progress_gradio = gr.Progress()
26
+ completed_size = 0
27
+ with open(file_path, 'wb') as file:
28
+ for data in response.iter_content(chunk_size=4096):
29
+ size = file.write(data)
30
+ progress_tqdm.update(size)
31
+ completed_size += size
32
+ desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
33
+ progress_gradio(completed_size/total_size, desc=desc)
34
+
35
+
36
+ def download_gguf_and_init_model(gguf_url: str, model_dict: MODEL_DICT) -> Tuple[MODEL_DICT, bool, str]:
37
+ log = ''
38
+ if not gguf_url.endswith('.gguf'):
39
+ log += f'The link must be a direct link to the GGUF file\n'
40
+ return model_dict, log
41
+
42
+ gguf_filename = gguf_url.rsplit('/')[-1]
43
+ model_path = MODELS_PATH / gguf_filename
44
+ progress = gr.Progress()
45
+
46
+ if not model_path.is_file():
47
+ progress(0.3, desc='Шаг 1/2: Loading GGUF model file')
48
+ try:
49
+ download_file(gguf_url, model_path)
50
+ log += f'Model file {gguf_filename} successfully loaded\n'
51
+ except Exception as ex:
52
+ log += f'Error loading model from link {gguf_url}, error code:\n{ex}\n'
53
+ curr_model = model_dict.get('model')
54
+ if curr_model is None:
55
+ log += f'Model is missing from dictionary "model_dict"\n'
56
+ return model_dict, load_log
57
+ curr_model_filename = Path(curr_model.model_path).name
58
+ log += f'Current initialized model: {curr_model_filename}\n'
59
+ return model_dict, log
60
+ else:
61
+ log += f'Model file {gguf_filename} loaded, initializing model...\n'
62
+
63
+ progress(0.7, desc='Шаг 2/2: Model initialization')
64
+ model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=True)
65
+ model_dict = {'model': model}
66
+ support_system_role = 'System role not supported' not in model.metadata['tokenizer.chat_template']
67
+ log += f'Model {gguf_filename} initialized\n'
68
+ return model_dict, support_system_role, log
69
+
70
+
71
+ def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
72
+ if user_message:
73
+ chatbot.append({'role': 'user', 'metadata': {'title': None}, 'content': user_message})
74
+ return '', chatbot
75
+
76
+
77
+ def bot_response_to_chatbot(
78
+ chatbot: CHAT_HISTORY,
79
+ model_dict: MODEL_DICT,
80
+ system_prompt: str,
81
+ support_system_role: bool,
82
+ history_len: int,
83
+ do_sample: bool,
84
+ *generate_args,
85
+ ):
86
+
87
+ model = model_dict.get('model')
88
+ if model is None:
89
+ gr.Info('Model not initialized')
90
+ yield chatbot
91
+ return
92
+
93
+ if len(chatbot) == 0 or chatbot[-1]['role'] == 'assistant':
94
+ yield chatbot
95
+ return
96
+
97
+ messages = []
98
+ if support_system_role and system_prompt:
99
+ messages.append({'role': 'system', 'metadata': {'title': None}, 'content': system_prompt})
100
+
101
+ if history_len != 0:
102
+ messages.extend(chatbot[:-1][-(history_len*2):])
103
+
104
+ messages.append(chatbot[-1])
105
+
106
+ gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
107
+ gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
108
+ if not do_sample:
109
+ gen_kwargs['top_p'] = 0.0
110
+ gen_kwargs['top_k'] = 1
111
+ gen_kwargs['repeat_penalty'] = 1.0
112
+
113
+ stream_response = model.create_chat_completion(
114
+ messages=messages,
115
+ stream=True,
116
+ **gen_kwargs,
117
+ )
118
+
119
+ chatbot.append({'role': 'assistant', 'metadata': {'title': None}, 'content': ''})
120
+ for chunk in stream_response:
121
+ token = chunk['choices'][0]['delta'].get('content')
122
+ if token is not None:
123
+ chatbot[-1]['content'] += token
124
+ yield chatbot
125
+
126
+
127
+ def get_system_prompt_component(interactive: bool) -> gr.Textbox:
128
+ value = '' if interactive else 'System prompt is not supported by this model'
129
+ return gr.Textbox(value=value, label='System prompt', interactive=interactive)
130
+
131
+
132
+ def get_generate_args(do_sample: bool) -> List[gr.component]:
133
+ visible = do_sample
134
+ generate_args = [
135
+ gr.Slider(label='temperature', value=GENERATE_KWARGS['temperature'], minimum=0.1, maximum=3, step=0.1, visible=visible),
136
+ gr.Slider(label='top_p', value=GENERATE_KWARGS['top_p'], minimum=0.1, maximum=1, step=0.1, visible=visible),
137
+ gr.Slider(label='top_k', value=GENERATE_KWARGS['top_k'], minimum=1, maximum=50, step=5, visible=visible),
138
+ gr.Slider(label='repeat_penalty', value=GENERATE_KWARGS['repeat_penalty'], minimum=1, maximum=5, step=0.1, visible=visible),
139
+ ]
140
+ return generate_args
141
+
142
+
143
+ # ================== VARIABLES =============================
144
+
145
+ MODELS_PATH = Path('models')
146
+ MODELS_PATH.mkdir(exist_ok=True)
147
+ DEFAULT_GGUF_URL = 'https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q8_0.gguf'
148
+
149
+ start_model_dict, start_support_system_role, start_load_log = download_gguf_and_init_model(
150
+ gguf_url=DEFAULT_GGUF_URL, model_dict={},
151
+ )
152
+
153
+ GENERATE_KWARGS = dict(
154
+ temperature=0.2,
155
+ top_p=0.95,
156
+ top_k=40,
157
+ repeat_penalty=1.0,
158
+ )
159
+
160
+ theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
161
+ loader_color='rgb(0, 255, 0)',
162
+ slider_color='rgb(0, 200, 0)',
163
+ body_text_color_dark='rgb(0, 200, 0)',
164
+ button_secondary_background_fill_dark='green',
165
+ )
166
+ css = '''.gradio-container {width: 60% !important}'''
167
+
168
+
169
+ # ================== INTERFACE =============================
170
+
171
+ with gr.Blocks(theme=theme, css=css) as interface:
172
+ model_dict = gr.State(start_model_dict)
173
+ support_system_role = gr.State(start_support_system_role)
174
+
175
+ # ================= CHAT BOT PAGE ======================
176
+ with gr.Tab('Chatbot'):
177
+ with gr.Row():
178
+ with gr.Column(scale=3):
179
+ chatbot = gr.Chatbot(
180
+ type='messages', # new in gradio 5+
181
+ show_copy_button=True,
182
+ bubble_full_width=False,
183
+ height=480,
184
+ )
185
+ user_message = gr.Textbox(label='User')
186
+
187
+ with gr.Row():
188
+ user_message_btn = gr.Button('Send')
189
+ stop_btn = gr.Button('Stop')
190
+ clear_btn = gr.Button('Clear')
191
+
192
+ system_prompt = get_system_prompt_component(interactive=support_system_role.value)
193
+
194
+ with gr.Column(scale=1, min_width=80):
195
+ with gr.Group():
196
+ gr.Markdown('Length of message history')
197
+ history_len = gr.Slider(
198
+ minimum=0,
199
+ maximum=10,
200
+ value=0,
201
+ step=1,
202
+ info='Number of previous messages taken into account in history',
203
+ label='history_len',
204
+ show_label=False,
205
+ )
206
+
207
+ with gr.Group():
208
+ gr.Markdown('Generation parameters')
209
+ do_sample = gr.Checkbox(
210
+ value=False,
211
+ label='do_sample',
212
+ info='Activate random sampling',
213
+ )
214
+ generate_args = get_generate_args(do_sample.value)
215
+ do_sample.change(
216
+ fn=get_generate_args,
217
+ inputs=do_sample,
218
+ outputs=generate_args,
219
+ show_progress=False,
220
+ )
221
+
222
+ generate_event = gr.on(
223
+ triggers=[user_message.submit, user_message_btn.click],
224
+ fn=user_message_to_chatbot,
225
+ inputs=[user_message, chatbot],
226
+ outputs=[user_message, chatbot],
227
+ ).then(
228
+ fn=bot_response_to_chatbot,
229
+ inputs=[chatbot, model_dict, system_prompt, support_system_role, history_len, do_sample, *generate_args],
230
+ outputs=[chatbot],
231
+ )
232
+ stop_btn.click(
233
+ fn=None,
234
+ inputs=None,
235
+ outputs=None,
236
+ cancels=generate_event,
237
+ )
238
+ clear_btn.click(
239
+ fn=lambda: None,
240
+ inputs=None,
241
+ outputs=[chatbot],
242
+ )
243
+
244
+ # ================= LOAD MODELS PAGE ======================
245
+ with gr.Tab('Load model'):
246
+ gguf_url = gr.Textbox(
247
+ value='',
248
+ label='Link to GGUF',
249
+ placeholder='URL link to the model in GGUF format',
250
+ )
251
+ load_model_btn = gr.Button('Downloading GGUF and initializing the model')
252
+ load_log = gr.Textbox(
253
+ value=start_load_log,
254
+ label='Model loading status',
255
+ lines=3,
256
+ )
257
+
258
+ load_model_btn.click(
259
+ fn=download_gguf_and_init_model,
260
+ inputs=[gguf_url, model_dict],
261
+ outputs=[model_dict, support_system_role, load_log],
262
+ ).success(
263
+ fn=get_system_prompt_component,
264
+ inputs=[support_system_role],
265
+ outputs=[system_prompt],
266
+ )
267
+
268
+ gr.HTML("""<h3 style='text-align: center'>
269
+ <a href="https://github.com/sergey21000/gradio-llamacpp-chatbot" target='_blank'>GitHub Repository</a></h3>
270
+ """)
271
+
272
+ interface.launch(server_name='0.0.0.0', server_port=7860)