S1131 commited on
Commit
1d92be9
·
verified ·
1 Parent(s): 5e632d0

Update app_utils.py

Browse files
Files changed (1) hide show
  1. app_utils.py +9 -8
app_utils.py CHANGED
@@ -4,6 +4,10 @@ Financial Chatbot Utilities
4
  Core functionality for RAG-based financial chatbot
5
  """
6
 
 
 
 
 
7
  import os
8
  import re
9
  import nltk
@@ -26,20 +30,17 @@ from sentence_transformers import CrossEncoder
26
  from sklearn.metrics.pairwise import cosine_similarity
27
 
28
  # Initialize NLTK stopwords
29
- nltk.download('stopwords')
30
- stop_words = set(stopwords.words('english'))
31
- # nltk.data.path.append('./nltk_data') # Point to local NLTK data
32
- # stop_words = set(nltk.corpus.stopwords.words('english'))
33
 
34
- # mount
35
- import sys
36
- sys.path.append('/mount/src/gen_ai_dev')
37
 
38
  # Configuration
39
  DATA_PATH = "./Infy financial report/"
40
  DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
41
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
42
- LLM_MODEL = "gpt2" # Or "distilgpt2" # Or "HuggingFaceH4/zephyr-7b-beta" or "microsoft/phi-2"
43
 
44
  # Environment settings
45
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
4
  Core functionality for RAG-based financial chatbot
5
  """
6
 
7
+ # mount
8
+ import sys
9
+ sys.path.append('/home/user/app')
10
+
11
  import os
12
  import re
13
  import nltk
 
30
  from sklearn.metrics.pairwise import cosine_similarity
31
 
32
  # Initialize NLTK stopwords
33
+ # nltk.download('stopwords')
34
+ # stop_words = set(stopwords.words('english'))
35
+ nltk.data.path.append('./nltk_data') # Point to local NLTK data
36
+ stop_words = set(nltk.corpus.stopwords.words('english'))
37
 
 
 
 
38
 
39
  # Configuration
40
  DATA_PATH = "./Infy financial report/"
41
  DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
42
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
43
+ LLM_MODEL = "microsoft/phi-1.5" # or "gpt2" # Or "distilgpt2" # Or "HuggingFaceH4/zephyr-7b-beta" or "microsoft/phi-2"
44
 
45
  # Environment settings
46
  os.environ["TOKENIZERS_PARALLELISM"] = "false"