S1131 commited on
Commit
dc05a4f
·
verified ·
1 Parent(s): 188bee1

utils optimised

Browse files
Files changed (1) hide show
  1. utils.py +34 -31
utils.py CHANGED
@@ -11,6 +11,7 @@ from nltk.corpus import stopwords
11
  from collections import deque
12
  from typing import Tuple
13
  import torch
 
14
 
15
  # LangChain components
16
  from langchain_community.document_loaders import PyPDFLoader
@@ -25,10 +26,10 @@ from sentence_transformers import CrossEncoder
25
  from sklearn.metrics.pairwise import cosine_similarity
26
 
27
  # Initialize NLTK stopwords
28
- # nltk.download('stopwords')
29
- # stop_words = set(stopwords.words('english'))
30
- nltk.data.path.append('./nltk_data') # Point to local NLTK data
31
- stop_words = set(nltk.corpus.stopwords.words('english'))
32
 
33
  # mount
34
  import sys
@@ -38,7 +39,7 @@ sys.path.append('/mount/src/gen_ai_dev')
38
  DATA_PATH = "./Infy financial report/"
39
  DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
40
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
41
- LLM_MODEL = "gpt2" # "HuggingFaceH4/zephyr-7b-beta" # "microsoft/phi-2"
42
 
43
  # Environment settings
44
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -214,24 +215,26 @@ guard = SafetyGuard()
214
  # LLM Initialization
215
  # ------------------------------
216
  try:
217
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
218
- model = AutoModelForCausalLM.from_pretrained(
219
- LLM_MODEL,
220
- device_map="cpu",
221
- torch_dtype=torch.float32
222
- )
223
-
224
- generator = pipeline(
225
- "text-generation",
226
- model=model,
227
- tokenizer=tokenizer,
228
- max_new_tokens=400,
229
- do_sample=True,
230
- temperature=0.3,
231
- top_k=30,
232
- top_p=0.9,
233
- repetition_penalty=1.2
234
- )
 
 
235
  except Exception as e:
236
  print(f"Error loading model: {e}")
237
  raise
@@ -268,24 +271,24 @@ Answer:"""
268
  print(f"\n\n[For Debug Only] Prompt: {prompt}\n\n")
269
  response = generator(prompt)[0]['generated_text']
270
  print(f"\n\n[For Debug Only] response: {response}\n\n")
271
-
272
  clean_response = extract_final_response(response)
273
  clean_response = guard.filter_output(clean_response)
274
  print(f"\n\n[For Debug Only] clean_response: {clean_response}\n\n")
275
-
276
  query_embed = embeddings.embed_query(query)
277
  print(f"\n\n[For Debug Only] query_embed: {query_embed}\n\n")
278
-
279
  response_embed = embeddings.embed_query(clean_response)
280
  print(f"\n\n[For Debug Only] response_embed: {response_embed}\n\n")
281
-
282
  confidence = cosine_similarity([query_embed], [response_embed])[0][0]
283
  print(f"\n\n[For Debug Only] confidence: {confidence}\n\n")
284
-
285
- memory.add_interaction(query, clean_response)
286
-
287
  print(f"\n\n[For Debug Only] I'm Done \n\n")
288
  return clean_response, round(confidence, 2)
289
 
290
  except Exception as e:
291
- return f"Error processing request: {e}", 0.0
 
11
  from collections import deque
12
  from typing import Tuple
13
  import torch
14
+ import streamlit as st
15
 
16
  # LangChain components
17
  from langchain_community.document_loaders import PyPDFLoader
 
26
  from sklearn.metrics.pairwise import cosine_similarity
27
 
28
  # Initialize NLTK stopwords
29
+ nltk.download('stopwords')
30
+ stop_words = set(stopwords.words('english'))
31
+ # nltk.data.path.append('./nltk_data') # Point to local NLTK data
32
+ # stop_words = set(nltk.corpus.stopwords.words('english'))
33
 
34
  # mount
35
  import sys
 
39
  DATA_PATH = "./Infy financial report/"
40
  DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
41
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
42
+ LLM_MODEL = "gpt2" # Or "distilgpt2" # Or "HuggingFaceH4/zephyr-7b-beta" or "microsoft/phi-2"
43
 
44
  # Environment settings
45
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
215
  # LLM Initialization
216
  # ------------------------------
217
  try:
218
+ @st.cache_resource
219
+ def load_generator():
220
+ tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
221
+ model = AutoModelForCausalLM.from_pretrained(
222
+ LLM_MODEL,
223
+ device_map="auto",
224
+ torch_dtype=torch.float16,
225
+ load_in_4bit=True
226
+ )
227
+ return pipeline(
228
+ "text-generation",
229
+ model=model,
230
+ tokenizer=tokenizer,
231
+ max_new_tokens=100,
232
+ do_sample=False,
233
+ temperature=0.7,
234
+ top_k=0,
235
+ top_p=1,
236
+ )
237
+ generator = load_generator()
238
  except Exception as e:
239
  print(f"Error loading model: {e}")
240
  raise
 
271
  print(f"\n\n[For Debug Only] Prompt: {prompt}\n\n")
272
  response = generator(prompt)[0]['generated_text']
273
  print(f"\n\n[For Debug Only] response: {response}\n\n")
274
+
275
  clean_response = extract_final_response(response)
276
  clean_response = guard.filter_output(clean_response)
277
  print(f"\n\n[For Debug Only] clean_response: {clean_response}\n\n")
278
+
279
  query_embed = embeddings.embed_query(query)
280
  print(f"\n\n[For Debug Only] query_embed: {query_embed}\n\n")
281
+
282
  response_embed = embeddings.embed_query(clean_response)
283
  print(f"\n\n[For Debug Only] response_embed: {response_embed}\n\n")
284
+
285
  confidence = cosine_similarity([query_embed], [response_embed])[0][0]
286
  print(f"\n\n[For Debug Only] confidence: {confidence}\n\n")
287
+
288
+ memory.add_interaction(query, clean_response)
289
+
290
  print(f"\n\n[For Debug Only] I'm Done \n\n")
291
  return clean_response, round(confidence, 2)
292
 
293
  except Exception as e:
294
+ return f"Error processing request: {e}", 0.0