SearchGPT

Running

App Files Files Community

Shreyas094 commited on Aug 4, 2024

Commit

43ce500

verified ·

1 Parent(s): c702374

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -53

app.py CHANGED Viewed

@@ -184,74 +184,72 @@ def delete_documents(selected_docs):
     return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
-def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
-    print(f"Starting generate_chunked_response with {num_calls} calls")
     full_response = ""
     messages = [{"role": "user", "content": prompt}]
     if model == "@cf/meta/llama-3.1-8b-instruct":
-        # Cloudflare API
         for i in range(num_calls):
             print(f"Starting Cloudflare API call {i+1}")
-            if should_stop:
-                print("Stop clicked, breaking loop")
-                break
             try:
-                response = requests.post(
-                    f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
-                    headers={"Authorization": f"Bearer {API_TOKEN}"},
-                    json={
-                        "stream": true,
-                        "messages": [
-                            {"role": "system", "content": "You are a friendly assistant"},
-                            {"role": "user", "content": prompt}
-                        ],
-                        "max_tokens": max_tokens,
-                        "temperature": temperature
-                    },
-                    stream=true
-                )
-                for line in response.iter_lines():
-                    if should_stop:
-                        print("Stop clicked during streaming, breaking")
-                        break
-                    if line:
-                        try:
-                            json_data = json.loads(line.decode('utf-8').split('data: ')[1])
-                            chunk = json_data['response']
-                            full_response += chunk
-                        except json.JSONDecodeError:
-                            continue
                 print(f"Cloudflare API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Cloudflare: {str(e)}")
     else:
-        # Original Hugging Face API logic
         client = InferenceClient(model, token=huggingface_token)
         for i in range(num_calls):
             print(f"Starting Hugging Face API call {i+1}")
-            if should_stop:
-                print("Stop clicked, breaking loop")
-                break
             try:
-                for message in client.chat_completion(
                     messages=messages,
                     max_tokens=max_tokens,
                     temperature=temperature,
-                    stream=True,
                 ):
-                    if should_stop:
-                        print("Stop clicked during streaming, breaking")
-                        break
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
                 print(f"Hugging Face API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Hugging Face: {str(e)}")
     # Clean up the response
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
@@ -272,7 +270,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
-    return final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
@@ -465,16 +463,15 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         yield "No documents available. Please upload PDF documents to answer questions."
         return
-    # New function to get documents by source
-    def get_documents_by_source(database, sources):
-        return [doc for doc in database.docstore._dict.values() if doc.metadata["source"] in sources]
     # Log all documents in the database
-    logging.info(f"All documents in database: {[doc.metadata['source'] for doc in database.docstore._dict.values()]}")
     # Get only the selected documents
-    selected_docs_content = get_documents_by_source(database, selected_docs)
     logging.info(f"Number of selected documents: {len(selected_docs_content)}")
     if not selected_docs_content:
         logging.warning(f"No documents found for the selected sources: {selected_docs}")
@@ -482,11 +479,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         return
     # Perform similarity search on selected documents
-    relevant_docs = sorted(selected_docs_content,
-                           key=lambda doc: database.similarity_search_with_score(query, k=1, filter={"source": doc.metadata["source"]})[0][1],
-                           reverse=True)
-    logging.info(f"Number of relevant documents: {len(relevant_docs)}")
     for doc in relevant_docs:
         logging.info(f"Document source: {doc.metadata['source']}")
@@ -575,7 +575,8 @@ use_web_search = gr.Checkbox(label="Use Web Search", value=True)
 custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
         gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),

     return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
+async def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
+    print(f"Starting generate_chunked_response with model: {model}, num_calls: {num_calls}")
     full_response = ""
     messages = [{"role": "user", "content": prompt}]
     if model == "@cf/meta/llama-3.1-8b-instruct":
+        # Cloudflare API logic
+        headers = {
+            "Authorization": f"Bearer {API_TOKEN}",
+            "Content-Type": "application/json"
+        }
         for i in range(num_calls):
             print(f"Starting Cloudflare API call {i+1}")
             try:
+                async with httpx.AsyncClient() as client:
+                    async with client.stream(
+                        "POST",
+                        f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
+                        json={
+                            "messages": messages,
+                            "stream": True,
+                            "max_tokens": max_tokens,
+                            "temperature": temperature
+                        },
+                        headers=headers
+                    ) as response:
+                        async for line in response.aiter_lines():
+                            if line.startswith("data: "):
+                                try:
+                                    json_data = json.loads(line[6:])
+                                    chunk = json_data.get('response', '')
+                                    full_response += chunk
+                                    yield full_response
+                                except json.JSONDecodeError:
+                                    continue
                 print(f"Cloudflare API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Cloudflare: {str(e)}")
+                if i == num_calls - 1:  # If this is the last attempt
+                    yield full_response  # Yield whatever response we have so far
     else:
+        # Hugging Face API logic
         client = InferenceClient(model, token=huggingface_token)
         for i in range(num_calls):
             print(f"Starting Hugging Face API call {i+1}")
             try:
+                async for message in client.chat_completion_stream(
                     messages=messages,
                     max_tokens=max_tokens,
                     temperature=temperature,
                 ):
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
+                        yield full_response
                 print(f"Hugging Face API call {i+1} completed")
             except Exception as e:
                 print(f"Error in generating response from Hugging Face: {str(e)}")
+                if i == num_calls - 1:  # If this is the last attempt
+                    yield full_response  # Yield whatever response we have so far
+    if not full_response:
+        yield "I apologize, but I couldn't generate a response at this time. Please try again later."
     # Clean up the response
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
+    yield final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
         yield "No documents available. Please upload PDF documents to answer questions."
         return
     # Log all documents in the database
+    all_docs = list(database.docstore._dict.values())
+    logging.info(f"All documents in database: {[doc.metadata['source'] for doc in all_docs]}")
+    logging.info(f"Number of documents in database: {len(all_docs)}")
     # Get only the selected documents
+    selected_docs_content = [doc for doc in all_docs if doc.metadata["source"] in selected_docs]
     logging.info(f"Number of selected documents: {len(selected_docs_content)}")
+    logging.info(f"Selected documents: {[doc.metadata['source'] for doc in selected_docs_content]}")
     if not selected_docs_content:
         logging.warning(f"No documents found for the selected sources: {selected_docs}")
         return
     # Perform similarity search on selected documents
+    relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
+    logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
+    logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
+    if not relevant_docs:
+        logging.warning("No relevant documents found after similarity search")
+        yield "No relevant information found in the selected documents. Please try rephrasing your query."
+        return
     for doc in relevant_docs:
         logging.info(f"Document source: {doc.metadata['source']}")
 custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
 demo = gr.ChatInterface(
+    fn=respond,
+    async_mode=True,
     additional_inputs=[
         gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),