google-gemma

Runtime error

App Files Files Community

Omnibus commited on Feb 23, 2024

Commit

ba6433f

verified ·

1 Parent(s): 8300fa7

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -16

app.py CHANGED Viewed

@@ -19,20 +19,30 @@ InferenceClient(models[3]),
 VERBOSE=False
-def format_prompt(message, history):
     prompt = ""
     if history:
-        #<start_of_turn>userHow does the brain work?<end_of_turn><start_of_turn>model
         for user_prompt, bot_response in history:
-            prompt += f"{user_prompt}\n"
-            #print(prompt)
-            prompt += f"{bot_response}\n"
-            #print(prompt)
-    prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
     return prompt
-def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem):
     #token max=8192
     hist_len=0
     client=clients[int(client_choice)-1]
     if not history:
@@ -58,9 +68,11 @@ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,
             do_sample=True,
             seed=seed,
         )
-        formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", memory[0-chat_mem:])
-        #print("\n######### PROMPT "+str(len(formatted_prompt)))
-        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""
         for response in stream:
             output += response.token.text
@@ -68,6 +80,7 @@ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,
         history.append((prompt,output))
         memory.append((prompt,output))
         yield history,memory
     if VERBOSE==True:
         print("\n######### HIST "+str(in_len))
         print("\n######### TOKENS "+str(tokens))
@@ -109,14 +122,16 @@ with gr.Blocks() as app:
                             stop_btn=gr.Button("Stop")
                             clear_btn=gr.Button("Clear")
                 client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
             with gr.Column(scale=1):
                 with gr.Group():
                     rand = gr.Checkbox(label="Random Seed", value=True)
                     seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens",value=1600,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
-                    temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
-                    top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
-                    rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
                     chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=4)
         with gr.Accordion(label="Screenshot",open=False):
             with gr.Row():
@@ -131,9 +146,15 @@ with gr.Blocks() as app:
                     theme=gr.Radio(label="Theme", choices=["light","dark"],value="light")
                     chatblock=gr.Dropdown(label="Chatblocks",info="Choose specific blocks of chat",choices=[c for c in range(1,40)],multiselect=True)
     im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
-    chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],[chat_b,memory])
-    go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],[chat_b,memory])
     stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
     clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b,memory])
 app.queue(default_concurrency_limit=10).launch()

 VERBOSE=False
+def load_models(inp):
+    if VERBOSE==True:
+        print(type(inp))
+        print(inp)
+        print(models[inp])
+    #client_z.clear()
+    #client_z.append(InferenceClient(models[inp]))
+    return gr.update(label=models[inp])
+def format_prompt(message, history, cust_p):
     prompt = ""
     if history:
         for user_prompt, bot_response in history:
+            prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
+            prompt += f"<start_of_turn>model{bot_response}<end_of_turn>"
+            if VERBOSE==True:
+                print(prompt)
+    #prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
+    prompt+=cust_p.replace("USER_INPUT",message)
     return prompt
+def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem,cust_p):
     #token max=8192
+    print(client_choice)
     hist_len=0
     client=clients[int(client_choice)-1]
     if not history:
             do_sample=True,
             seed=seed,
         )
+        if system_prompt:
+            formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", memory[0-chat_mem:],cust_p)
+        else:
+            formatted_prompt = format_prompt(prompt, memory[0-chat_mem:],cust_p)
+        stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
         output = ""
         for response in stream:
             output += response.token.text
         history.append((prompt,output))
         memory.append((prompt,output))
         yield history,memory
     if VERBOSE==True:
         print("\n######### HIST "+str(in_len))
         print("\n######### TOKENS "+str(tokens))
                             stop_btn=gr.Button("Stop")
                             clear_btn=gr.Button("Clear")
                 client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
+                with gr.Accordion("Prompt Format",open=False):
+                    custom_prompt=gr.Textbox(label="Modify Prompt Format", info="For testing purposes. 'USER_INPUT' is where 'SYSTEM_PROMPT, PROMPT' will be placed", lines=5,value="<start_of_turn>userUSER_INPUT<end_of_turn><start_of_turn>model")
             with gr.Column(scale=1):
                 with gr.Group():
                     rand = gr.Checkbox(label="Random Seed", value=True)
                     seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
                     tokens = gr.Slider(label="Max new tokens",value=1600,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
+                    temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.49)
+                    top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.49)
+                    rep_p=gr.Slider(label="Repetition Penalty",step=0.01, minimum=0.1, maximum=2.0, value=0.99)
                     chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=4)
         with gr.Accordion(label="Screenshot",open=False):
             with gr.Row():
                     theme=gr.Radio(label="Theme", choices=["light","dark"],value="light")
                     chatblock=gr.Dropdown(label="Chatblocks",info="Choose specific blocks of chat",choices=[c for c in range(1,40)],multiselect=True)
+    client_choice.change(load_models,client_choice,[chat_b])
+    app.load(load_models,client_choice,[chat_b])
     im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
+    chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem,custom_prompt],[chat_b,memory])
+    go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem,custom_prompt],[chat_b,memory])
     stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
     clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b,memory])
 app.queue(default_concurrency_limit=10).launch()