File size: 2,304 Bytes
a4522e9
 
 
 
210160b
a4522e9
 
 
 
210160b
 
 
a4522e9
210160b
 
a4522e9
210160b
a4522e9
210160b
a4522e9
 
 
 
 
 
 
 
 
210160b
a4522e9
 
210160b
a4522e9
210160b
a4522e9
 
 
 
210160b
 
a4522e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210160b
a4522e9
210160b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
try:
  from langchain_community.vectorstores import Chroma
except:
  from langchain_community.vectorstores import Chroma

from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory


# Import the necessary libraries.
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

# Initialize a ChatGroq object with a temperature of 0 and the "mixtral-8x7b-32768" model.
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192",api_key='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')

from langchain_community.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"trust_remote_code":True}) 





memory = ConversationBufferWindowMemory(
    memory_key="history", k=3, return_only_outputs=True
)


chain = ConversationChain(
    llm=llm,
    
    prompt=prompt,
    memory=memory,
    verbose=True,
)




# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
    """
    Generates a conversational response from the Llama model.

    Parameters:
        message (str): User's input message.
        history (list): Past conversation history.

    Returns:
        str: Generated response from the Llama model.
    """
    query_text =message

    results = db.similarity_search_with_relevance_scores(query_text, k=2)
    if len(results) == 0 or results[0][1] < 0.5:
        print(f"Unable to find matching results.")


    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])

    template = """
    The following is a conversation between a human an AI. Answer  question based only on the conversation.

    Current conversation:
    {history}

    """



    s="""

    \n question: {input}

    \n answer:""".strip()


    prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)

    #print(template)
    chain.prompt=prompt
    res = chain.predict(input=query_text)
    return res
        #return response.strip()



import gradio as gr
iface = gr.Interface(fn=get_llama_response, inputs=gr.Textbox(),
             outputs="textbox")
iface.launch(share=True)