Spaces:

Tonic
/

YiJina

Build error

App Files Files Community

Tonic commited on Jul 13, 2024

Commit

a86354d

1 Parent(s): 399e250

learning how to code with the post-introspector

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -65,18 +65,16 @@ class EmbeddingGenerator:
                 {"role": "user", "content": escaped_input_text}
             ]
         )
-        intention_output = intention_completion.choices[0].message['content']
         # Parse and route the intention
         parsed_task = parse_and_route(intention_output)
-        selected_task = list(parsed_task.keys())[0]
         # Construct the prompt
-        try:
             task_description = tasks[selected_task]
-        except KeyError:
             print(f"Selected task not found: {selected_task}")
-            return f"Error: Task '{selected_task}' not found. Please select a valid task."
         query_prefix = f"Instruct: {task_description}\nQuery: "
         queries = [escaped_input_text]
@@ -89,13 +87,14 @@ class EmbeddingGenerator:
                 {"role": "user", "content": escaped_input_text}
             ]
         )
-        metadata_output = metadata_completion.choices[0].message['content']
         metadata = self.extract_metadata(metadata_output)
         # Get the embeddings
         with torch.no_grad():
             inputs = self.tokenizer(queries, return_tensors='pt', padding=True, truncation=True, max_length=4096).to(self.device)
-            outputs = self.model(**inputs)
             query_embeddings = outputs.last_hidden_state.mean(dim=1)
             # Normalize embeddings
@@ -118,7 +117,7 @@ class MyEmbeddingFunction(EmbeddingFunction):
         self.embedding_generator = embedding_generator
     def __call__(self, input: Documents) -> (Embeddings, list):
-        embeddings_with_metadata = [self.embedding_generator.compute_embeddings(doc) for doc in input]
         embeddings = [item[0] for item in embeddings_with_metadata]
         metadata = [item[1] for item in embeddings_with_metadata]
         embeddings_flattened = [emb for sublist in embeddings for emb in sublist]

                 {"role": "user", "content": escaped_input_text}
             ]
         )
+        intention_output = intention_completion.choices[0].message.content
         # Parse and route the intention
         parsed_task = parse_and_route(intention_output)
+        selected_task = parsed_task
         # Construct the prompt
+        if selected_task in tasks:
             task_description = tasks[selected_task]
+        else:
+            task_description = tasks["DEFAULT"]
             print(f"Selected task not found: {selected_task}")
         query_prefix = f"Instruct: {task_description}\nQuery: "
         queries = [escaped_input_text]
                 {"role": "user", "content": escaped_input_text}
             ]
         )
+        metadata_output = metadata_completion.choices[0].message.content
         metadata = self.extract_metadata(metadata_output)
         # Get the embeddings
         with torch.no_grad():
             inputs = self.tokenizer(queries, return_tensors='pt', padding=True, truncation=True, max_length=4096).to(self.device)
+            outputs = self.model(**inputs)
+            query_embeddings = outputs["sentence_embeddings"].mean(dim=1)
             query_embeddings = outputs.last_hidden_state.mean(dim=1)
             # Normalize embeddings
         self.embedding_generator = embedding_generator
     def __call__(self, input: Documents) -> (Embeddings, list):
+        embeddings_with_metadata = [self.embedding_generator.compute_embeddings(doc.page_content) for doc in input]
         embeddings = [item[0] for item in embeddings_with_metadata]
         metadata = [item[1] for item in embeddings_with_metadata]
         embeddings_flattened = [emb for sublist in embeddings for emb in sublist]