derek-thomas
commited on
Commit
·
65f2fab
1
Parent(s):
615535c
Updating Embeddings space
Browse files- src/utilities.py +2 -1
src/utilities.py
CHANGED
|
@@ -11,6 +11,7 @@ SUBREDDIT = os.environ["SUBREDDIT"]
|
|
| 11 |
USERNAME = os.environ["USERNAME"]
|
| 12 |
OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}"
|
| 13 |
PROCESSED_DATASET = os.environ['PROCESSED_DATASET']
|
|
|
|
| 14 |
|
| 15 |
logger = setup_logger(__name__)
|
| 16 |
|
|
@@ -29,7 +30,7 @@ def load_datasets():
|
|
| 29 |
|
| 30 |
def merge_and_update_datasets(dataset, original_dataset):
|
| 31 |
# Get client
|
| 32 |
-
client = Client(
|
| 33 |
|
| 34 |
# Merge and figure out which rows need to be updated with embeddings
|
| 35 |
odf = original_dataset['train'].to_pandas()
|
|
|
|
| 11 |
USERNAME = os.environ["USERNAME"]
|
| 12 |
OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}"
|
| 13 |
PROCESSED_DATASET = os.environ['PROCESSED_DATASET']
|
| 14 |
+
embeddings_space = f"{USERNAME}/nomic-embeddings"
|
| 15 |
|
| 16 |
logger = setup_logger(__name__)
|
| 17 |
|
|
|
|
| 30 |
|
| 31 |
def merge_and_update_datasets(dataset, original_dataset):
|
| 32 |
# Get client
|
| 33 |
+
client = Client(embeddings_space)
|
| 34 |
|
| 35 |
# Merge and figure out which rows need to be updated with embeddings
|
| 36 |
odf = original_dataset['train'].to_pandas()
|