AlanFeder commited on
Commit
d97d619
·
verified ·
1 Parent(s): 1c4216d

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. rag_fns/setup_load.py +9 -2
rag_fns/setup_load.py CHANGED
@@ -7,6 +7,8 @@ import numpy as np
7
  from dotenv import load_dotenv
8
  from openai import OpenAI
9
  from pyprojroot import here
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
@@ -38,8 +40,13 @@ def import_data() -> tuple[list[str], np.ndarray, dict[str, Any]]:
38
  tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
39
  """
40
 
41
- with open(here() / "data" / "interim" / "embeds_talks_dcr.pkl", "rb") as f:
42
- data2load = pickle.load(f)
 
 
 
 
 
43
 
44
  talk_ids = data2load["talk_ids"]
45
  embeds = data2load["embeds"]
 
7
  from dotenv import load_dotenv
8
  from openai import OpenAI
9
  from pyprojroot import here
10
+ import requests
11
+ import io
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
40
  tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
41
  """
42
 
43
+ target_file_url = "https://raw.githubusercontent.com/AlanFeder/dcr-multi-frameworks/main/data/interim/embeds_talks_dcr.pkl"
44
+
45
+ response = requests.get(target_file_url)
46
+ response.raise_for_status() # Ensure we got a successful response
47
+
48
+ # Load the .pkl file into a Python object
49
+ data2load = pickle.load(io.BytesIO(response.content))
50
 
51
  talk_ids = data2load["talk_ids"]
52
  embeds = data2load["embeds"]