gourisankar85 commited on
Commit
ba71758
·
verified ·
1 Parent(s): 4fe5121

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +35 -35
config.py CHANGED
@@ -1,36 +1,36 @@
1
-
2
- import os
3
-
4
- class ConfigConstants:
5
- # Constants related to datasets and models
6
- DATA_SET_PATH= '/persistent/local_datasets'
7
- DATA_SET_NAMES = ['covidqa', 'cuad', 'techqa','delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa']
8
- EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-MiniLM-L3-v2"
9
- RE_RANKER_MODEL_NAME = 'cross-encoder/ms-marco-electra-base'
10
- GENERATION_MODEL_NAME = 'mixtral-8x7b-32768'
11
- VALIDATION_MODEL_NAME = 'llama3-70b-8192'
12
- GENERATION_MODELS = ["llama3-8b-8192", "qwen-2.5-32b", "mixtral-8x7b-32768", "gemma2-9b-it" ]
13
- VALIDATION_MODELS = ["llama3-70b-8192", "deepseek-r1-distill-llama-70b" ]
14
- DEFAULT_CHUNK_SIZE = 1000
15
- CHUNK_OVERLAP = 200
16
-
17
- class AppConfig:
18
- def __init__(self, vector_store, gen_llm, val_llm):
19
- self.vector_store = vector_store
20
- self.gen_llm = gen_llm
21
- self.val_llm = val_llm
22
- self.loaded_datasets = self.detect_loaded_datasets() # Auto-detect loaded datasets
23
-
24
- @staticmethod
25
- def detect_loaded_datasets():
26
- print('Calling detect_loaded_datasets')
27
- """Check which datasets are already stored locally."""
28
- local_path = ConfigConstants.DATA_SET_PATH
29
- if not os.path.exists(local_path):
30
- return set()
31
-
32
- dataset_files = os.listdir(local_path)
33
- loaded_datasets = {
34
- file.replace("_test.pkl", "") for file in dataset_files if file.endswith("_test.pkl")
35
- }
36
  return loaded_datasets
 
1
+
2
+ import os
3
+
4
+ class ConfigConstants:
5
+ # Constants related to datasets and models
6
+ DATA_SET_PATH= '/home/user/'
7
+ DATA_SET_NAMES = ['covidqa', 'cuad', 'techqa','delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa']
8
+ EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-MiniLM-L3-v2"
9
+ RE_RANKER_MODEL_NAME = 'cross-encoder/ms-marco-electra-base'
10
+ GENERATION_MODEL_NAME = 'mixtral-8x7b-32768'
11
+ VALIDATION_MODEL_NAME = 'llama3-70b-8192'
12
+ GENERATION_MODELS = ["llama3-8b-8192", "qwen-2.5-32b", "mixtral-8x7b-32768", "gemma2-9b-it" ]
13
+ VALIDATION_MODELS = ["llama3-70b-8192", "deepseek-r1-distill-llama-70b" ]
14
+ DEFAULT_CHUNK_SIZE = 1000
15
+ CHUNK_OVERLAP = 200
16
+
17
+ class AppConfig:
18
+ def __init__(self, vector_store, gen_llm, val_llm):
19
+ self.vector_store = vector_store
20
+ self.gen_llm = gen_llm
21
+ self.val_llm = val_llm
22
+ self.loaded_datasets = self.detect_loaded_datasets() # Auto-detect loaded datasets
23
+
24
+ @staticmethod
25
+ def detect_loaded_datasets():
26
+ print('Calling detect_loaded_datasets')
27
+ """Check which datasets are already stored locally."""
28
+ local_path = ConfigConstants.DATA_SET_PATH
29
+ if not os.path.exists(local_path):
30
+ return set()
31
+
32
+ dataset_files = os.listdir(local_path)
33
+ loaded_datasets = {
34
+ file.replace("_test.pkl", "") for file in dataset_files if file.endswith("_test.pkl")
35
+ }
36
  return loaded_datasets