File size: 4,124 Bytes
5306da4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from openai import OpenAI
import os
from dotenv import load_dotenv
from prompts import give_system_prompt_for_rag, system_prompt, improved_system_prompt_for_rag
from chat_database import get_chat_history
from embeddings import get_query_embeddings
from qdrent import search_embeddings
from pydantic import BaseModel
import asyncio

load_dotenv()

provider = os.getenv("LLM_PROVIDER")

if (provider == "google"):
    client = OpenAI(
        api_key=os.getenv("GEMINI_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )
elif (provider == "together"):
    client = OpenAI(
        api_key=os.getenv("TOGETHER_API_KEY"),
        base_url=""
    )
else:
    client = OpenAI(
        api_key=""
    )

llm_model = os.getenv("LLL_MODEL")


class IntentAndRagResponseFormat(BaseModel):
    intent: bool
    rag: bool


class RagResponseFormat(BaseModel):
    rag: bool


def getResponse(text, session_id):
    try:
        res = get_chat_history(session_id)
        chat_history = res[0]
        user_prompt = res[1]
        chat_history.insert(
            0, {"role": "system", "content": system_prompt(user_prompt)})
        response = client.chat.completions.create(
            model=llm_model,
            messages=chat_history,
            stream=True
        )
        return response
    except Exception as e:
        print("Error in getResponse : ", e)


async def getResponseAsync(text, session_id):
    loop = asyncio.get_running_loop()
    response = await loop.run_in_executor(None, getResponse, text, session_id)
    return response


def getResponseWithRAG(text, session_id):
    try:
        chat_history = get_chat_history(session_id)
        prompt = []
        query_embeddings = get_query_embeddings(text)
        content = search_embeddings(session_id, query_embeddings)

        paragraphs = ""

        for index, item in enumerate(content, start=1):
            text = item.payload["text"]

            if len(text) > 100:
                paragraphs += f"Paragraph {index} : {text}\n\n"

        system_prompt = improved_system_prompt_for_rag(paragraphs)
        prompt.append(
            {"role": "system", "content": system_prompt})
        prompt.append(
            {"role": "user", "content": f"Previous chat : {str(chat_history)}"})
        prompt.append({"role": "user", "content": f"Query : {text}"})
        response = client.chat.completions.create(
            model=llm_model,
            messages=prompt,
            stream=True
        )
        return response
    except Exception as e:
        print("Error in getResponse : ", e)


async def getResponseWithRagAsync(text, session_id):
    loop = asyncio.get_running_loop()
    response = await loop.run_in_executor(None, getResponseWithRAG, text, session_id)
    return response


def check_for_user_intent_and_rag(text, session_id):
    try:
        recent_history = get_chat_history(session_id)
        response = client.beta.chat.completions.parse(
            model=llm_model,
            messages=[{"role": "system", "content": intent_and_rag_system_prompt}, {
                "role": "user", "content": f"User Query : {text}"}, {
                "role": "user", "content": f"Previous chat : {str(recent_history)}"}],
            response_format=IntentAndRagResponseFormat,
        )
        return response.choices[0].message.parsed
    except Exception as e:
        print("Error in check_for_user_intent_and_rag : ", e)


def check_for_rag(text, session_id):
    try:
        chat_history = get_chat_history(session_id)
        recent_history = chat_history[-10:]
        response = client.beta.chat.completions.parse(
            model=llm_model,
            messages=[{"role": "system", "content": system_prompt_for_checking_rag}, {
                "role": "user", "content": f"User Query : {text}"}, {
                "role": "user", "content": f"Previous chat : {str(recent_history)}"}],
            response_format=RagResponseFormat,
        )
        return response.choices[0].message.parsed
    except Exception as e:
        print("Error in check_for_user_intent_and_rag : ", e)