Spaces:

OrganizedProgrammers
/

Knowledgeable

Sleeping

App Files Files Community

Almaatla commited on Jun 28, 2024

Commit

ae5ea44

verified ·

1 Parent(s): 9e87c9c

updated chunk size, number of retrieved docs & use llama3 in stream

Browse files

Files changed (1) hide show

app.py +4 -4

app.py CHANGED Viewed

@@ -31,8 +31,8 @@ def tiktoken_len(text):
     return len(tokens)
 text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size=800,
-    chunk_overlap=400,
     length_function=tiktoken_len,
     separators=["\n\n", "\n", " ", ""]
 )
@@ -257,7 +257,7 @@ def ask_llm(system, user_input):
     client = Groq(api_key=os.environ["GROQ_KEY"])
     chat_completion = client.chat.completions.create(
         messages=messages,
-        model='mixtral-8x7b-32768',
     )
     return chat_completion.choices[0].message.content
@@ -307,7 +307,7 @@ def ask_gpt(query, ui_session_id, history):
       print(f"SESSION: {session_id} database does not exist")
       return f"SESSION: {session_id} database does not exist","",""
-    docs = db.similarity_search(query, k=5)
     documents = "\n\n*-*-*-*-*-*\n\n".join(f"Content: {doc.page_content}\n" for doc in docs)
     system = f"# Instructions\nTake a deep breath and resonate step by step.\nYou are a helpful standard assistant. Your have only one mission and that consists in answering to the user input based on the **provided documents**. If the answer to the question that is asked by the user isn't contained in the **provided documents**, say so but **don't make up an answer**. I chose you because you can say 'I don't know' so please don't do like the other LLMs and don't define acronyms that aren\'t present in the following **PROVIDED DOCUMENTS** double check if it is present before answering. If some of the information can be useful for the user you can tell him.\nFinish your response by **ONE** follow up question that the provided documents could answer.\n\nThe documents are separated by the string \'*-*-*-*-*-*\'. Do not provide any explanations or details.\n\n# **Provided documents**: {documents}."

     return len(tokens)
 text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=750,
+    chunk_overlap=350,
     length_function=tiktoken_len,
     separators=["\n\n", "\n", " ", ""]
 )
     client = Groq(api_key=os.environ["GROQ_KEY"])
     chat_completion = client.chat.completions.create(
         messages=messages,
+        model="llama3-70b-8192",#'mixtral-8x7b-32768',
     )
     return chat_completion.choices[0].message.content
       print(f"SESSION: {session_id} database does not exist")
       return f"SESSION: {session_id} database does not exist","",""
+    docs = db.similarity_search(query, k=4)
     documents = "\n\n*-*-*-*-*-*\n\n".join(f"Content: {doc.page_content}\n" for doc in docs)
     system = f"# Instructions\nTake a deep breath and resonate step by step.\nYou are a helpful standard assistant. Your have only one mission and that consists in answering to the user input based on the **provided documents**. If the answer to the question that is asked by the user isn't contained in the **provided documents**, say so but **don't make up an answer**. I chose you because you can say 'I don't know' so please don't do like the other LLMs and don't define acronyms that aren\'t present in the following **PROVIDED DOCUMENTS** double check if it is present before answering. If some of the information can be useful for the user you can tell him.\nFinish your response by **ONE** follow up question that the provided documents could answer.\n\nThe documents are separated by the string \'*-*-*-*-*-*\'. Do not provide any explanations or details.\n\n# **Provided documents**: {documents}."