h2ogpt-chatbot2

Runtime error

App Files Files Community

pseudotensor commited on Apr 25, 2023

Commit

09d4719

1 Parent(s): 7246b62

Update with h2oGPT hash 27616ac37a45f19994b9d1893953791e1644b3f1

Browse files

Files changed (3) hide show

app.py +107 -89
client_test.py +56 -28
finetune.py +7 -1

app.py CHANGED Viewed

@@ -83,6 +83,10 @@ def main(
         # set to True to load --base_model after client logs in,
         # to be able to free GPU memory when model is swapped
         login_mode_if_model0: bool = False,
         sanitize_user_prompt: bool = True,
         sanitize_bot_response: bool = True,
@@ -116,6 +120,12 @@ def main(
         # must override share if in spaces
         share = False
     save_dir = os.getenv('SAVE_DIR', save_dir)
     # get defaults
     model_lower = base_model.lower()
@@ -166,7 +176,7 @@ def main(
                 assert data[i]['conversations'][turn_start + 1]['from'] == 'gpt'
                 output = data[i]['conversations'][turn_start + 1]['value']
                 examplenew = example1.copy()
-                assert not chat, "No gradio must use chat=False, uses nochat isntruct"
                 examplenew[eval_func_param_names.index('instruction_nochat')] = instruction
                 examplenew[eval_func_param_names.index('iinput_nochat')] = ''  # no input
                 examplenew[eval_func_param_names.index('context')] = ''  # no context
@@ -528,6 +538,7 @@ def get_score_model(**kwargs):
 def go_gradio(**kwargs):
     # get default model
     all_kwargs = kwargs.copy()
     all_kwargs.update(locals())
     if kwargs.get('base_model') and not kwargs['login_mode_if_model0']:
@@ -726,12 +737,12 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
                                                    placeholder=kwargs['placeholder_input'])
                         submit_nochat = gr.Button("Submit")
                         flag_btn_nochat = gr.Button("Flag")
-                        if kwargs['score_model']:
-                            if not kwargs['auto_score']:
-                                with gr.Column():
-                                    score_btn_nochat = gr.Button("Score last prompt & response")
-                                    score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
-                            else:
                                 score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
                     col_chat = gr.Column(visible=kwargs['chat'])
                     with col_chat:
@@ -751,19 +762,19 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
                         with gr.Row():
                             clear = gr.Button("New Conversation")
                             flag_btn = gr.Button("Flag")
-                            if kwargs['score_model']:
-                                if not kwargs['auto_score']:  # FIXME: For checkbox model2
-                                    with gr.Column():
-                                        with gr.Row():
-                                            score_btn = gr.Button("Score last prompt & response").style(
-                                                full_width=False, size='sm')
-                                            score_text = gr.Textbox("Response Score: NA", show_label=False)
-                                        score_res2 = gr.Row(visible=False)
-                                        with score_res2:
-                                            score_btn2 = gr.Button("Score last prompt & response 2").style(
-                                                full_width=False, size='sm')
-                                            score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
-                                else:
                                     score_text = gr.Textbox("Response Score: NA", show_label=False)
                                     score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
                             retry = gr.Button("Regenerate")
@@ -942,7 +953,6 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
         fun = partial(evaluate,
                       **kwargs_evaluate)
         fun2 = partial(evaluate,
-                       model_state2,
                        **kwargs_evaluate)
         dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
@@ -953,7 +963,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
             None,
             None,
             _js=dark_js,
-            api_name="dark",
         )
         # Control chat and non-chat blocks, which can be independently used by chat checkbox swap
@@ -966,7 +976,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
         def context_fun(x):
             return gr.Textbox.update(visible=not x)
-        chat.select(col_nochat_fun, chat, col_nochat, api_name="chat_checkbox") \
             .then(col_chat_fun, chat, col_chat) \
             .then(context_fun, chat, context)
@@ -1042,25 +1052,31 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
             os.environ['TOKENIZERS_PARALLELISM'] = 'true'
             return 'Response Score: {:.1%}'.format(score)
         if kwargs['score_model']:
-            score_args = dict(fn=score_last_response,
-                              inputs=inputs_list + [text_output],
-                              outputs=[score_text],
-                              )
-            score_args2 = dict(fn=partial(score_last_response, model2=True),
-                               inputs=inputs_list + [text_output2],
-                               outputs=[score_text2],
-                               )
-            score_args_nochat = dict(fn=partial(score_last_response, nochat=True),
-                                     inputs=inputs_list + [text_output_nochat],
-                                     outputs=[score_text_nochat],
-                                     )
-            if not kwargs['auto_score']:
-                score_event = score_btn.click(**score_args, queue=stream_output, api_name='score') \
-                    .then(**score_args2, queue=stream_output, api_name='score2')
-                score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
-                                                            api_name='score_nochat')
         def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
             """
@@ -1208,64 +1224,64 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
         if kwargs['auto_score']:
             # in case 2nd model, consume instruction first, so can clear quickly
             # bot doesn't consume instruction itself, just history from user, so why works
-            submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction') \
-                .then(**user_args2, queue=stream_output, api_name='instruction2') \
                 .then(clear_instruct, None, instruction) \
-                .then(**bot_args, api_name='instruction_bot') \
-                .then(**score_args, api_name='instruction_bot_score') \
-                .then(**bot_args2, api_name='instruction_bot2') \
-                .then(**score_args2, api_name='instruction_bot_score2') \
                 .then(clear_torch_cache)
-            submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit') \
-                .then(**user_args2, queue=stream_output, api_name='submit2') \
-                .then(**bot_args, api_name='submit_bot') \
                 .then(clear_instruct, None, instruction) \
-                .then(**score_args, api_name='submit_bot_score') \
-                .then(**bot_args2, api_name='submit_bot2') \
-                .then(**score_args2, api_name='submit_bot_score2') \
                 .then(clear_torch_cache)
-            submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry') \
-                .then(**user_args2, queue=stream_output, api_name='retry2') \
                 .then(clear_instruct, None, instruction) \
-                .then(**retry_bot_args, api_name='retry_bot') \
-                .then(**score_args, api_name='retry_bot_score') \
-                .then(**retry_bot_args2, api_name='retry_bot2') \
-                .then(**score_args2, api_name='retry_bot_score2') \
                 .then(clear_torch_cache)
-            submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo') \
-                .then(**score_args, api_name='undo_score') \
-                .then(**undo_user_args2, queue=stream_output, api_name='undo2') \
-                .then(**score_args2, api_name='undo_score2') \
                 .then(clear_instruct, None, instruction)
         else:
-            submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction') \
-                .then(**user_args2, queue=stream_output, api_name='instruction2') \
                 .then(clear_instruct, None, instruction) \
-                .then(**bot_args, api_name='instruction_bot') \
-                .then(**bot_args2, api_name='instruction_bot2') \
                 .then(clear_torch_cache)
-            submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit') \
-                .then(**user_args2, queue=stream_output, api_name='submit2') \
                 .then(clear_instruct, None, instruction) \
-                .then(**bot_args, api_name='submit_bot') \
-                .then(**bot_args2, api_name='submit_bot2') \
                 .then(clear_torch_cache)
-            submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry') \
-                .then(**user_args2, queue=stream_output, api_name='retry2') \
                 .then(clear_instruct, None, instruction) \
-                .then(**retry_bot_args, api_name='retry_bot') \
-                .then(**retry_bot_args2, api_name='retry_bot2') \
                 .then(clear_torch_cache)
-            submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo') \
-                .then(**undo_user_args2, queue=stream_output, api_name='undo2')
         # does both models
-        clear.click(lambda: None, None, text_output, queue=False, api_name='clear') \
-            .then(lambda: None, None, text_output2, queue=False, api_name='clear2')
         # FIXME: compare
         submit_event_nochat = submit_nochat.click(fun, inputs=[model_state] + inputs_list,
-                                                  outputs=text_output_nochat, api_name='submit_nochat') \
-            .then(**score_args_nochat, api_name='instruction_bot_score_nochat') \
             .then(clear_torch_cache)
         def load_model(model_name, lora_weights, model_state_old, prompt_type_old, load_8bit, infer_devices, gpu_id):
@@ -1380,7 +1396,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
                                                inputs=[lora_options_state, new_lora, model_used, lora_used, model_used2, lora_used2],
                                                outputs=[lora_choice, lora_choice2, new_lora, lora_options_state])
-        go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go") \
             .then(lambda: gr.update(visible=True), None, normal_block) \
             .then(**load_model_args).then(**prompt_update_args)
@@ -1393,7 +1409,8 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
         def compare_prompt_fun(x):
             return gr.Dropdown.update(visible=x)
-        compare_checkbox.select(compare_textbox_fun, compare_checkbox, text_output2, api_name="compare_checkbox") \
             .then(compare_column_fun, compare_checkbox, col_model2) \
             .then(compare_prompt_fun, compare_checkbox, prompt_type2) \
             .then(compare_textbox_fun, compare_checkbox, score_text2)
@@ -1402,28 +1419,29 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
         # callback for logging flagged input/output
         callback.setup(inputs_list + [text_output], "flagged_data_points")
         flag_btn.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
-                       api_name='flag')
         flag_btn_nochat.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
-                              api_name='flag_nochat')
         def get_system_info():
             return gr.Textbox.update(value=system_info_print())
-        system_event = system_btn.click(get_system_info, outputs=system_text, api_name='system_info')
         # don't pass text_output, don't want to clear output, just stop it
         # FIXME: have to click once to stop output and second time to stop GPUs going
         stop_btn.click(lambda: None, None, None,
                        cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
-                       queue=False, api_name='stop').then(clear_torch_cache)
-        demo.load(None,None,None,_js=dark_js)
-    demo.queue(concurrency_count=1)
     favicon_path = "h2o-logo.svg"
     demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
                 favicon_path=favicon_path, prevent_thread_lock=True)  # , enable_queue=True)
     print("Started GUI", flush=True)
-    demo.block_thread()
 input_args_list = ['model_state']

         # set to True to load --base_model after client logs in,
         # to be able to free GPU memory when model is swapped
         login_mode_if_model0: bool = False,
+        block_gradio_exit: bool = True,
+        concurrency_count: int = 1,
+        api_open: bool = False,  # don't let API skip queue
+        allow_api: bool = True,
         sanitize_user_prompt: bool = True,
         sanitize_bot_response: bool = True,
         # must override share if in spaces
         share = False
     save_dir = os.getenv('SAVE_DIR', save_dir)
+    score_model = os.getenv('SCORE_MODEL', score_model)
+    if score_model == 'None':
+        score_model = ''
+    concurrency_count = int(os.getenv('CONCURRENCY_COUNT', concurrency_count))
+    api_open = bool(int(os.getenv('API_OPEN', api_open)))
+    allow_api = bool(int(os.getenv('ALLOW_API', allow_api)))
     # get defaults
     model_lower = base_model.lower()
                 assert data[i]['conversations'][turn_start + 1]['from'] == 'gpt'
                 output = data[i]['conversations'][turn_start + 1]['value']
                 examplenew = example1.copy()
+                assert not chat, "No gradio must use chat=False, uses nochat instruct"
                 examplenew[eval_func_param_names.index('instruction_nochat')] = instruction
                 examplenew[eval_func_param_names.index('iinput_nochat')] = ''  # no input
                 examplenew[eval_func_param_names.index('context')] = ''  # no context
 def go_gradio(**kwargs):
     # get default model
+    allow_api = kwargs['allow_api']
     all_kwargs = kwargs.copy()
     all_kwargs.update(locals())
     if kwargs.get('base_model') and not kwargs['login_mode_if_model0']:
                                                    placeholder=kwargs['placeholder_input'])
                         submit_nochat = gr.Button("Submit")
                         flag_btn_nochat = gr.Button("Flag")
+                        if not kwargs['auto_score']:
+                            with gr.Column(visible=kwargs['score_model']):
+                                score_btn_nochat = gr.Button("Score last prompt & response")
+                                score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
+                        else:
+                            with gr.Column(visible=kwargs['score_model']):
                                 score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
                     col_chat = gr.Column(visible=kwargs['chat'])
                     with col_chat:
                         with gr.Row():
                             clear = gr.Button("New Conversation")
                             flag_btn = gr.Button("Flag")
+                            if not kwargs['auto_score']:  # FIXME: For checkbox model2
+                                with gr.Column(visible=kwargs['score_model']):
+                                    with gr.Row():
+                                        score_btn = gr.Button("Score last prompt & response").style(
+                                            full_width=False, size='sm')
+                                        score_text = gr.Textbox("Response Score: NA", show_label=False)
+                                    score_res2 = gr.Row(visible=False)
+                                    with score_res2:
+                                        score_btn2 = gr.Button("Score last prompt & response 2").style(
+                                            full_width=False, size='sm')
+                                        score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
+                            else:
+                                with gr.Column(visible=kwargs['score_model']):
                                     score_text = gr.Textbox("Response Score: NA", show_label=False)
                                     score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
                             retry = gr.Button("Regenerate")
         fun = partial(evaluate,
                       **kwargs_evaluate)
         fun2 = partial(evaluate,
                        **kwargs_evaluate)
         dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
             None,
             None,
             _js=dark_js,
+            api_name="dark" if allow_api else None,
         )
         # Control chat and non-chat blocks, which can be independently used by chat checkbox swap
         def context_fun(x):
             return gr.Textbox.update(visible=not x)
+        chat.select(col_nochat_fun, chat, col_nochat, api_name="chat_checkbox" if allow_api else None) \
             .then(col_chat_fun, chat, col_chat) \
             .then(context_fun, chat, context)
             os.environ['TOKENIZERS_PARALLELISM'] = 'true'
             return 'Response Score: {:.1%}'.format(score)
+        def noop_score_last_response(*args, **kwargs):
+            return "Response Score: Disabled"
         if kwargs['score_model']:
+            score_fun = score_last_response
+        else:
+            score_fun = noop_score_last_response
+        score_args = dict(fn=score_fun,
+                          inputs=inputs_list + [text_output],
+                          outputs=[score_text],
+                          )
+        score_args2 = dict(fn=partial(score_fun, model2=True),
+                           inputs=inputs_list + [text_output2],
+                           outputs=[score_text2],
+                           )
+        score_args_nochat = dict(fn=partial(score_fun, nochat=True),
+                                 inputs=inputs_list + [text_output_nochat],
+                                 outputs=[score_text_nochat],
+                                 )
+        if not kwargs['auto_score']:
+            score_event = score_btn.click(**score_args, queue=stream_output, api_name='score' if allow_api else None) \
+                .then(**score_args2, queue=stream_output, api_name='score2' if allow_api else None)
+            score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
+                                                        api_name='score_nochat' if allow_api else None)
         def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
             """
         if kwargs['auto_score']:
             # in case 2nd model, consume instruction first, so can clear quickly
             # bot doesn't consume instruction itself, just history from user, so why works
+            submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='instruction2' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**bot_args, api_name='instruction_bot' if allow_api else None) \
+                .then(**score_args, api_name='instruction_bot_score' if allow_api else None) \
+                .then(**bot_args2, api_name='instruction_bot2' if allow_api else None) \
+                .then(**score_args2, api_name='instruction_bot_score2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='submit2' if allow_api else None) \
+                .then(**bot_args, api_name='submit_bot' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**score_args, api_name='submit_bot_score' if allow_api else None) \
+                .then(**bot_args2, api_name='submit_bot2' if allow_api else None) \
+                .then(**score_args2, api_name='submit_bot_score2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='retry2' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**retry_bot_args, api_name='retry_bot' if allow_api else None) \
+                .then(**score_args, api_name='retry_bot_score' if allow_api else None) \
+                .then(**retry_bot_args2, api_name='retry_bot2' if allow_api else None) \
+                .then(**score_args2, api_name='retry_bot_score2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo' if allow_api else None) \
+                .then(**score_args, api_name='undo_score' if allow_api else None) \
+                .then(**undo_user_args2, queue=stream_output, api_name='undo2' if allow_api else None) \
+                .then(**score_args2, api_name='undo_score2' if allow_api else None) \
                 .then(clear_instruct, None, instruction)
         else:
+            submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='instruction2' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**bot_args, api_name='instruction_bot' if allow_api else None) \
+                .then(**bot_args2, api_name='instruction_bot2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='submit2' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**bot_args, api_name='submit_bot' if allow_api else None) \
+                .then(**bot_args2, api_name='submit_bot2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry' if allow_api else None) \
+                .then(**user_args2, queue=stream_output, api_name='retry2' if allow_api else None) \
                 .then(clear_instruct, None, instruction) \
+                .then(**retry_bot_args, api_name='retry_bot' if allow_api else None) \
+                .then(**retry_bot_args2, api_name='retry_bot2' if allow_api else None) \
                 .then(clear_torch_cache)
+            submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo' if allow_api else None) \
+                .then(**undo_user_args2, queue=stream_output, api_name='undo2' if allow_api else None)
         # does both models
+        clear.click(lambda: None, None, text_output, queue=False, api_name='clear' if allow_api else None) \
+            .then(lambda: None, None, text_output2, queue=False, api_name='clear2' if allow_api else None)
         # FIXME: compare
         submit_event_nochat = submit_nochat.click(fun, inputs=[model_state] + inputs_list,
+                                                  outputs=text_output_nochat, api_name='submit_nochat' if allow_api else None) \
+            .then(**score_args_nochat, api_name='instruction_bot_score_nochat' if allow_api else None) \
             .then(clear_torch_cache)
         def load_model(model_name, lora_weights, model_state_old, prompt_type_old, load_8bit, infer_devices, gpu_id):
                                                inputs=[lora_options_state, new_lora, model_used, lora_used, model_used2, lora_used2],
                                                outputs=[lora_choice, lora_choice2, new_lora, lora_options_state])
+        go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go" if allow_api else None) \
             .then(lambda: gr.update(visible=True), None, normal_block) \
             .then(**load_model_args).then(**prompt_update_args)
         def compare_prompt_fun(x):
             return gr.Dropdown.update(visible=x)
+        compare_checkbox.select(compare_textbox_fun, compare_checkbox, text_output2,
+                                api_name="compare_checkbox" if allow_api else None) \
             .then(compare_column_fun, compare_checkbox, col_model2) \
             .then(compare_prompt_fun, compare_checkbox, prompt_type2) \
             .then(compare_textbox_fun, compare_checkbox, score_text2)
         # callback for logging flagged input/output
         callback.setup(inputs_list + [text_output], "flagged_data_points")
         flag_btn.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
+                       api_name='flag' if allow_api else None)
         flag_btn_nochat.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
+                              api_name='flag_nochat' if allow_api else None)
         def get_system_info():
             return gr.Textbox.update(value=system_info_print())
+        system_event = system_btn.click(get_system_info, outputs=system_text, api_name='system_info' if allow_api else None)
         # don't pass text_output, don't want to clear output, just stop it
         # FIXME: have to click once to stop output and second time to stop GPUs going
         stop_btn.click(lambda: None, None, None,
                        cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
+                       queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache)
+        demo.load(None, None, None, _js=dark_js)
+    demo.queue(concurrency_count=kwargs['concurrency_count'], api_open=kwargs['api_open'])
     favicon_path = "h2o-logo.svg"
     demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
                 favicon_path=favicon_path, prevent_thread_lock=True)  # , enable_queue=True)
     print("Started GUI", flush=True)
+    if kwargs['block_gradio_exit']:
+        demo.block_thread()
 input_args_list = ['model_state']

client_test.py CHANGED Viewed

@@ -13,43 +13,69 @@ Currently, this will force model to be on a single GPU.
 Then run this client as:
 python client_test.py
 """
 debug = False
 import os
 os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
-from gradio_client import Client
-client = Client("http://localhost:7860")
-if debug:
-    print(client.view_api(all_endpoints=True))
-instruction = ''  # only for chat=True
-iinput = ''  # only for chat=True
-context = ''
-# streaming output is supported, loops over and outputs each generation in streaming mode
-# but leave stream_output=False for simple input/output mode
-stream_output = False
-prompt_type = 'human_bot'
-temperature = 0.1
-top_p = 0.75
-top_k = 40
-num_beams = 1
-max_new_tokens = 50
-min_new_tokens = 0
-early_stopping = False
-max_time = 20
-repetition_penalty = 1.0
-num_return_sequences = 1
-do_sample = True
-# only these 2 below used if pass chat=False
-chat = False
-instruction_nochat = "Who are you?"
-iinput_nochat = ''
 def test_client_basic():
     args = [instruction,
             iinput,
             context,
@@ -71,12 +97,14 @@ def test_client_basic():
             iinput_nochat,
             ]
     api_name = '/submit_nochat'
     res = client.predict(
         *tuple(args),
         api_name=api_name,
     )
     res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
     print(res_dict)
 import markdown  # pip install markdown

 Then run this client as:
 python client_test.py
+For HF spaces:
+HOST="https://h2oai-h2ogpt-chatbot.hf.space" python client_test.py
+Result:
+Loaded as API: https://h2oai-h2ogpt-chatbot.hf.space ✔
+{'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a large language model developed by LAION.'}
+For demo:
+HOST="https://gpt.h2o.ai" python client_test.py
+Result:
+Loaded as API: https://gpt.h2o.ai ✔
+{'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a chatbot created by LAION.'}
 """
 debug = False
 import os
 os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
+def get_client():
+    from gradio_client import Client
+    client = Client(os.getenv('HOST', "http://localhost:7860"))
+    if debug:
+        print(client.view_api(all_endpoints=True))
+    return client
 def test_client_basic():
+    instruction = ''  # only for chat=True
+    iinput = ''  # only for chat=True
+    context = ''
+    # streaming output is supported, loops over and outputs each generation in streaming mode
+    # but leave stream_output=False for simple input/output mode
+    stream_output = False
+    prompt_type = 'human_bot'
+    temperature = 0.1
+    top_p = 0.75
+    top_k = 40
+    num_beams = 1
+    max_new_tokens = 50
+    min_new_tokens = 0
+    early_stopping = False
+    max_time = 20
+    repetition_penalty = 1.0
+    num_return_sequences = 1
+    do_sample = True
+    # only these 2 below used if pass chat=False
+    chat = False
+    instruction_nochat = "Who are you?"
+    iinput_nochat = ''
     args = [instruction,
             iinput,
             context,
             iinput_nochat,
             ]
     api_name = '/submit_nochat'
+    client = get_client()
     res = client.predict(
         *tuple(args),
         api_name=api_name,
     )
     res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
     print(res_dict)
+    return res_dict
 import markdown  # pip install markdown

finetune.py CHANGED Viewed

@@ -765,7 +765,13 @@ Current Time: {}
         PreInput = None
-        PreResponse = bot
         terminate_response = [start, PreResponse]
     elif prompt_type in [3, "3", "dai_faq"]:

         PreInput = None
+        if reduced:
+            # when making context, want it to appear as-if LLM generated, which starts with space after :
+            PreResponse = bot + ' '
+        else:
+            # normally LLM adds space after this, because was how trained.
+            # if add space here, non-unique tokenization will often make LLM produce wrong output
+            PreResponse = bot
         terminate_response = [start, PreResponse]
     elif prompt_type in [3, "3", "dai_faq"]: