Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Aug 28, 2024

Commit

e3b0733

verified ·

1 Parent(s): cca553c

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -36

app.py CHANGED Viewed

@@ -352,6 +352,41 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
     except Exception as e:
         return f"An error occurred during summarization: {str(e)}"
 # Modify the existing respond function to handle both PDF and web search
 def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
     logging.info(f"User Query: {message}")
@@ -381,8 +416,8 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
             yield final_summary
         else:
             yield "Unable to generate a response. Please try a different query."
     else:
-        # PDF and Office documents search logic
         try:
             embed = get_embeddings()
             pdf_database = None
@@ -413,44 +448,29 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
                 yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
                 return
-            context_str = "\n".join([doc.page_content for doc in relevant_docs])
-            logging.info(f"Total context length: {len(context_str)}")
-            for doc in relevant_docs:
-                logging.info(f"Document source: {doc.metadata['source']}")
-                logging.info(f"Document content preview: {doc.page_content[:100]}...")  # Log first 100 characters of each document
-            if model == "@cf/meta/llama-3.1-8b-instruct":
-                logging.info("Using Cloudflare API")
-                # Use Cloudflare API with the retrieved context
-                for response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
                     yield response
-            else:
-                logging.info("Using Hugging Face API")
-                # Use Hugging Face API
-                messages = [
-                    {"role": "system", "content": "You are a highly specialized assistant with expertise in analyzing and summarizing various types of documents including PDFs, Word documents, and Excel spreadsheets. Your goal is to provide accurate, detailed, and precise summaries based on the context provided. Avoid making assumptions or adding information that is not explicitly supported by the context from the documents."},
-                    {"role": "user", "content": f"Using the following context from the uploaded documents:\n{context_str}\n\nPlease generate a step-by-step reasoning before arriving at a comprehensive and accurate summary addressing the following question: '{message}'. Ensure your response is strictly based on the provided context, highlighting key metrics, trends, and significant details relevant to the query. Avoid any speculative or unverified information."}
-                ]
-                client = InferenceClient(model, token=huggingface_token)
-                response = ""
-                for i in range(num_calls):
-                    logging.info(f"API call {i+1}/{num_calls}")
-                    for message in client.chat_completion(
-                        messages=messages,
-                        max_tokens=20000,
-                        temperature=temperature,
-                        stream=True,
-                        top_p=0.8,
-                    ):
-                        if message.choices and message.choices[0].delta and message.choices[0].delta.content:
-                            chunk = message.choices[0].delta.content
-                            response += chunk
-                            yield response  # Yield partial response
-                logging.info("Finished generating response")
         except Exception as e:
             logging.error(f"Error with {model}: {str(e)}")

     except Exception as e:
         return f"An error occurred during summarization: {str(e)}"
+def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2):
+    logging.info(f"Getting response from Excel using model: {model}")
+    messages = [
+        {"role": "system", "content": "You are a highly specialized data analyst with expertise in analyzing and summarizing Excel spreadsheets. Your goal is to provide accurate, detailed, and precise summaries based on the data provided. Focus on identifying key metrics, trends, and significant details relevant to the query. Avoid making assumptions or adding information that is not explicitly supported by the data."},
+        {"role": "user", "content": f"Using the following data extracted from Excel spreadsheets:\n{context}\n\nPlease analyze this data and provide a comprehensive answer to the following question: '{query}'. Include relevant statistics, trends, and insights. If appropriate, suggest visualizations that could help illustrate the findings."}
+    ]
+    if model.startswith("duckduckgo/"):
+        # Use DuckDuckGo chat with context
+        return get_response_from_duckduckgo(query, model, context, num_calls, temperature)
+    elif model == "@cf/meta/llama-3.1-8b-instruct":
+        # Use Cloudflare API
+        return get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="excel")
+    else:
+        # Use Hugging Face API
+        client = InferenceClient(model, token=huggingface_token)
+        response = ""
+        for i in range(num_calls):
+            logging.info(f"API call {i+1}/{num_calls}")
+            for message in client.chat_completion(
+                messages=messages,
+                max_tokens=20000,
+                temperature=temperature,
+                stream=True,
+                top_p=0.8,
+            ):
+                if message.choices and message.choices[0].delta and message.choices[0].delta.content:
+                    chunk = message.choices[0].delta.content
+                    response += chunk
+                    yield response  # Yield partial response
+        logging.info("Finished generating response for Excel data")
 # Modify the existing respond function to handle both PDF and web search
 def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
     logging.info(f"User Query: {message}")
             yield final_summary
         else:
             yield "Unable to generate a response. Please try a different query."
     else:
         try:
             embed = get_embeddings()
             pdf_database = None
                 yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
                 return
+            # Separate Excel documents from others
+            excel_docs = [doc for doc in relevant_docs if doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
+            other_docs = [doc for doc in relevant_docs if not doc.metadata["source"].lower().endswith(('.xlsx', '.xls'))]
+            excel_context = "\n".join([doc.page_content for doc in excel_docs])
+            other_context = "\n".join([doc.page_content for doc in other_docs])
+            logging.info(f"Excel context length: {len(excel_context)}")
+            logging.info(f"Other context length: {len(other_context)}")
+            # Process Excel documents
+            if excel_docs:
+                for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
                     yield response
+            # Process other documents (PDF, Word)
+            if other_docs:
+                if model == "@cf/meta/llama-3.1-8b-instruct":
+                    for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
+                        yield response
+                else:
+                    for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
+                        yield response
         except Exception as e:
             logging.error(f"Error with {model}: {str(e)}")