File size: 2,772 Bytes
a25c1ff
c49c1a9
 
 
 
 
 
 
 
9c95f36
 
c49c1a9
9c95f36
 
c49c1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ace2f35
 
563477e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c49c1a9
563477e
 
 
4e3899c
 
 
 
 
ace2f35
563477e
 
ace2f35
563477e
 
ace2f35
549684b
4e3899c
 
 
 
 
 
 
 
 
 
549684b
 
4e3899c
549684b
4e3899c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import os
from pathlib import Path
import argparse
from huggingface_hub import snapshot_download

# repo_name = "TheBloke/Mistral-7B-v0.1-GGUF"
# model_file = "mistral-7b-v0.1.Q6_K.gguf"

#repo_name = 'HumanityFTW/so_rude'
#model_file = "mistral-comedy-2.0-ckpt-600.Q6_K.gguf"

repo_name = 'TheBloke/OpenHermes-2.5-Mistral-7B-GGUF'
model_file = "openhermes-2.5-mistral-7b.Q4_K_M.gguf"

print('Fetching model:', repo_name, model_file)
snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_file)
print('Done fetching model:')

DEFAULT_MODEL_PATH = model_file

from llama_cpp import Llama
llm = Llama(model_path=model_file, model_type="mistral")


def predict(input, chatbot, max_length, top_p, temperature, history):
    chatbot.append((input, ""))
    response = ""
    history.append(input)

    for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, ):
        piece = output['choices'][0]['text']
        response += piece
        chatbot[-1] = (chatbot[-1][0], response)

        yield chatbot, history

    history.append(response)
    yield chatbot, history


def reset_user_input():
    return gr.update(value="")


def reset_state():
    return [], []


def AIPatient(message):

    global isFirstRun, history,context

    if isFirstRun:
      context = initContext
      isFirstRun = False
    #else:
      #for turn in history:
      #  context += f"\n<|im_start|> Nurse: {turn[0]}\n<|im_start|> Barry: {turn[1]}"
    context += """
                  <|im_start|>nurse
                  Nurse: """+message+"""
                  <|im_start|>barry
                  Barry:
                  """

    response = ""
    # Here, you should add the code to generate the response using your model
    # For example:
    while(len(response) < 1):
        print("here")
      output = llm(context, max_tokens=400, stop=["Nurse:"], echo=False)
      response = output["choices"][0]["text"]
      response = response.strip()

    context += response
    print (context)

    history.append((message,response))
    return history


with gr.Blocks() as demo:
    gr.Markdown("# AI Patient Chatbot")
    with gr.Group():
        with gr.Tab("Patient Chatbot"):
            chatbot = gr.Chatbot()
            message = gr.Textbox(label="Enter your message to Barry", placeholder="Type here...", lines=2)
            send_message = gr.Button("Submit")
            send_message.click(AIPatient, inputs=[message], outputs=[chatbot])
            save_chatlog = gr.Button("Save Chatlog")
            #send_message.click(SaveChatlog, inputs=[message], outputs=[chatbot])


            #message.submit(AIPatient, inputs=[message], outputs=[chatbot])

demo.launch(debug=True,share=False,inbrowser=True)