File size: 1,831 Bytes
4ac76b6
 
e6125fa
 
4ac76b6
b04f829
 
4ac76b6
b04f829
d962703
 
 
 
 
 
 
b04f829
d962703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6125fa
d962703
 
 
 
 
e6125fa
4ac76b6
e6125fa
 
4ac76b6
 
 
 
 
d962703
 
4ac76b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d962703
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr

from db import get_db
from chain import get_chain

import logging
logger = logging.getLogger(__name__)


logger.info('Instantiating vectordb')
vectordb = get_db(
    chunk_size=1000,
    chunk_overlap=200,
    model_name = 'intfloat/multilingual-e5-large-instruct',
)


logger.info('Instantiating chain')
chain = get_chain(
    vectordb,
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    max_new_tokens=512,
    top_k=30,
    temperature=0.1,
    repetition_penalty=1.03,
    search_type="mmr",
    k=3,
    fetch_k=5,
    template="""Use the following sentences of context to answer the question at the end.
If you don't know the answer, that is if the answer is not in the context, then just say that you don't know, don't try to make up an answer.
Always say "Thanks for asking!" at the end of the answer.

{context}

Question: {question}
Helpful Answer:"""
)

def respond(
    question,
    _, # Ignore the message history parameter since we are doing one-off invocations
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    logger.info(f'respond called by Gradio ChatInterface with question={question}')
    return chain.invoke({'question': question})


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch(
        show_error=True,
        enable_monitoring=True
    )