Upload folder using huggingface_hub
Browse files- rag_fns/setup_load.py +9 -2
rag_fns/setup_load.py
CHANGED
@@ -7,6 +7,8 @@ import numpy as np
|
|
7 |
from dotenv import load_dotenv
|
8 |
from openai import OpenAI
|
9 |
from pyprojroot import here
|
|
|
|
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
@@ -38,8 +40,13 @@ def import_data() -> tuple[list[str], np.ndarray, dict[str, Any]]:
|
|
38 |
tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
|
39 |
"""
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
talk_ids = data2load["talk_ids"]
|
45 |
embeds = data2load["embeds"]
|
|
|
7 |
from dotenv import load_dotenv
|
8 |
from openai import OpenAI
|
9 |
from pyprojroot import here
|
10 |
+
import requests
|
11 |
+
import io
|
12 |
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
40 |
tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
|
41 |
"""
|
42 |
|
43 |
+
target_file_url = "https://raw.githubusercontent.com/AlanFeder/dcr-multi-frameworks/main/data/interim/embeds_talks_dcr.pkl"
|
44 |
+
|
45 |
+
response = requests.get(target_file_url)
|
46 |
+
response.raise_for_status() # Ensure we got a successful response
|
47 |
+
|
48 |
+
# Load the .pkl file into a Python object
|
49 |
+
data2load = pickle.load(io.BytesIO(response.content))
|
50 |
|
51 |
talk_ids = data2load["talk_ids"]
|
52 |
embeds = data2load["embeds"]
|