Spaces:

juancho72h
/

maintenance-rmm-demo

Paused

App Files Files Community

juancho72h commited on Oct 1, 2024

Commit

e5e0026

verified ·

1 Parent(s): 79041c3

Upload app.py

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -6,6 +6,12 @@ import torch
 from dotenv import load_dotenv
 from pinecone import Pinecone
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 # Detect GPU availability and set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -42,6 +48,12 @@ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarc
 # Initialize chat history manually
 chat_history = []
 # Helper function to recursively flatten any list to a string
 def flatten_to_string(data):
     if isinstance(data, list):
@@ -53,12 +65,15 @@ def flatten_to_string(data):
 # Function to interact with Pinecone and OpenAI GPT-4
 def get_model_response(human_input):
     try:
         # Embed the query
         query_embedding = torch.tensor(embedding_model.embed_query(human_input)).to(device)
         query_embedding = query_embedding.cpu().numpy().tolist()
-        # Query Pinecone index
-        search_results = index.query(vector=query_embedding, top_k=2, include_metadata=True)
         context_list, images = [], []
         for ind, result in enumerate(search_results['matches']):
@@ -66,9 +81,15 @@ def get_model_response(human_input):
             image_url = flatten_to_string(result.get('metadata', {}).get('image_path', None))
             figure_desc = flatten_to_string(result.get('metadata', {}).get('figure_description', ''))
-            context_list.append(f"Relevant information: {document_content}")
-            if image_url and figure_desc:
-                images.append((figure_desc, image_url))
         context_string = '\n\n'.join(context_list)

 from dotenv import load_dotenv
 from pinecone import Pinecone
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from fuzzywuzzy import fuzz
+import logging
+import re  # To help with preprocessing
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Detect GPU availability and set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Initialize chat history manually
 chat_history = []
+# Helper function to preprocess text (removing unnecessary words)
+def preprocess_text(text):
+    # Convert text to lowercase and remove special characters
+    text = re.sub(r'[^\w\s]', '', text.lower())
+    return text.strip()
 # Helper function to recursively flatten any list to a string
 def flatten_to_string(data):
     if isinstance(data, list):
 # Function to interact with Pinecone and OpenAI GPT-4
 def get_model_response(human_input):
     try:
+        # Preprocess the human input (cleaning up unnecessary words)
+        processed_input = preprocess_text(human_input)
         # Embed the query
         query_embedding = torch.tensor(embedding_model.embed_query(human_input)).to(device)
         query_embedding = query_embedding.cpu().numpy().tolist()
+        # Query Pinecone index with top_k=5 to get more potential matches
+        search_results = index.query(vector=query_embedding, top_k=5, include_metadata=True)
         context_list, images = [], []
         for ind, result in enumerate(search_results['matches']):
             image_url = flatten_to_string(result.get('metadata', {}).get('image_path', None))
             figure_desc = flatten_to_string(result.get('metadata', {}).get('figure_description', ''))
+            # Preprocess the figure description and match keywords
+            processed_figure_desc = preprocess_text(figure_desc)
+            similarity_score = fuzz.token_set_ratio(processed_input, processed_figure_desc)
+            logging.info(f"Matching '{processed_input}' with '{processed_figure_desc}', similarity score: {similarity_score}")
+            if similarity_score >= 80:  # Keep the threshold at 80 for now
+                context_list.append(f"Relevant information: {document_content}")
+                if image_url and figure_desc:
+                    images.append((figure_desc, image_url))
         context_string = '\n\n'.join(context_list)