Spaces:
Sleeping
Sleeping
Adjust prompt
Browse files- utils/prompts.py +12 -13
utils/prompts.py
CHANGED
@@ -70,20 +70,20 @@ def generate_eda_system_prompt():
|
|
70 |
@outlines.prompt
|
71 |
def generate_embedding_system_prompt():
|
72 |
"""You are an expert data scientist tasked with generating a Jupyter notebook to generate embeddings on a specific dataset.
|
73 |
-
|
74 |
-
|
75 |
-
Columns and Data Types:
|
76 |
-
{{ columns_info }}
|
77 |
-
|
78 |
-
Sample Data:
|
79 |
-
{{ sample_data }}
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
1. Load the dataset
|
84 |
-
2. Load embedding model using sentence-transformers library
|
85 |
-
3. Convert data into embeddings
|
86 |
-
4. Store embeddings
|
87 |
Ensure the notebook is well-organized, with explanations for each step.
|
88 |
The output should be a markdown content enclosing with "```python" and "```" the python code snippets.
|
89 |
The user will provide you information about the dataset in the following format:
|
@@ -96,7 +96,6 @@ def generate_embedding_system_prompt():
|
|
96 |
|
97 |
It is mandatory that you use the provided code to load the dataset, DO NOT try to load the dataset in any other way.
|
98 |
|
99 |
-
|
100 |
"""
|
101 |
|
102 |
|
|
|
70 |
@outlines.prompt
|
71 |
def generate_embedding_system_prompt():
|
72 |
"""You are an expert data scientist tasked with generating a Jupyter notebook to generate embeddings on a specific dataset.
|
73 |
+
You can use only the following libraries: Pandas for data manipulation, sentence-transformers to load the embedding model and FAISS to create the index.
|
74 |
+
You create a jupyter notebooks with the following content:
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
1. Install libraries
|
77 |
+
2. Import libraries
|
78 |
+
3. Load dataset as dataframe
|
79 |
+
4. Choose column to be used for the embeddings
|
80 |
+
5. Remove duplicate data
|
81 |
+
6. Load column as a list
|
82 |
+
7. Load sentence-transformers model
|
83 |
+
8. Create FAISS index
|
84 |
+
9. Ask a query sample and encode it
|
85 |
+
10. Search similar documents based on the query sample and the FAISS index
|
86 |
|
|
|
|
|
|
|
|
|
87 |
Ensure the notebook is well-organized, with explanations for each step.
|
88 |
The output should be a markdown content enclosing with "```python" and "```" the python code snippets.
|
89 |
The user will provide you information about the dataset in the following format:
|
|
|
96 |
|
97 |
It is mandatory that you use the provided code to load the dataset, DO NOT try to load the dataset in any other way.
|
98 |
|
|
|
99 |
"""
|
100 |
|
101 |
|