File size: 4,124 Bytes
5306da4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
from openai import OpenAI
import os
from dotenv import load_dotenv
from prompts import give_system_prompt_for_rag, system_prompt, improved_system_prompt_for_rag
from chat_database import get_chat_history
from embeddings import get_query_embeddings
from qdrent import search_embeddings
from pydantic import BaseModel
import asyncio
load_dotenv()
provider = os.getenv("LLM_PROVIDER")
if (provider == "google"):
client = OpenAI(
api_key=os.getenv("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
elif (provider == "together"):
client = OpenAI(
api_key=os.getenv("TOGETHER_API_KEY"),
base_url=""
)
else:
client = OpenAI(
api_key=""
)
llm_model = os.getenv("LLL_MODEL")
class IntentAndRagResponseFormat(BaseModel):
intent: bool
rag: bool
class RagResponseFormat(BaseModel):
rag: bool
def getResponse(text, session_id):
try:
res = get_chat_history(session_id)
chat_history = res[0]
user_prompt = res[1]
chat_history.insert(
0, {"role": "system", "content": system_prompt(user_prompt)})
response = client.chat.completions.create(
model=llm_model,
messages=chat_history,
stream=True
)
return response
except Exception as e:
print("Error in getResponse : ", e)
async def getResponseAsync(text, session_id):
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(None, getResponse, text, session_id)
return response
def getResponseWithRAG(text, session_id):
try:
chat_history = get_chat_history(session_id)
prompt = []
query_embeddings = get_query_embeddings(text)
content = search_embeddings(session_id, query_embeddings)
paragraphs = ""
for index, item in enumerate(content, start=1):
text = item.payload["text"]
if len(text) > 100:
paragraphs += f"Paragraph {index} : {text}\n\n"
system_prompt = improved_system_prompt_for_rag(paragraphs)
prompt.append(
{"role": "system", "content": system_prompt})
prompt.append(
{"role": "user", "content": f"Previous chat : {str(chat_history)}"})
prompt.append({"role": "user", "content": f"Query : {text}"})
response = client.chat.completions.create(
model=llm_model,
messages=prompt,
stream=True
)
return response
except Exception as e:
print("Error in getResponse : ", e)
async def getResponseWithRagAsync(text, session_id):
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(None, getResponseWithRAG, text, session_id)
return response
def check_for_user_intent_and_rag(text, session_id):
try:
recent_history = get_chat_history(session_id)
response = client.beta.chat.completions.parse(
model=llm_model,
messages=[{"role": "system", "content": intent_and_rag_system_prompt}, {
"role": "user", "content": f"User Query : {text}"}, {
"role": "user", "content": f"Previous chat : {str(recent_history)}"}],
response_format=IntentAndRagResponseFormat,
)
return response.choices[0].message.parsed
except Exception as e:
print("Error in check_for_user_intent_and_rag : ", e)
def check_for_rag(text, session_id):
try:
chat_history = get_chat_history(session_id)
recent_history = chat_history[-10:]
response = client.beta.chat.completions.parse(
model=llm_model,
messages=[{"role": "system", "content": system_prompt_for_checking_rag}, {
"role": "user", "content": f"User Query : {text}"}, {
"role": "user", "content": f"Previous chat : {str(recent_history)}"}],
response_format=RagResponseFormat,
)
return response.choices[0].message.parsed
except Exception as e:
print("Error in check_for_user_intent_and_rag : ", e)
|