Spaces:
Sleeping
Sleeping
utils optimised
Browse files
utils.py
CHANGED
@@ -11,6 +11,7 @@ from nltk.corpus import stopwords
|
|
11 |
from collections import deque
|
12 |
from typing import Tuple
|
13 |
import torch
|
|
|
14 |
|
15 |
# LangChain components
|
16 |
from langchain_community.document_loaders import PyPDFLoader
|
@@ -25,10 +26,10 @@ from sentence_transformers import CrossEncoder
|
|
25 |
from sklearn.metrics.pairwise import cosine_similarity
|
26 |
|
27 |
# Initialize NLTK stopwords
|
28 |
-
|
29 |
-
|
30 |
-
nltk.data.path.append('./nltk_data') # Point to local NLTK data
|
31 |
-
stop_words = set(nltk.corpus.stopwords.words('english'))
|
32 |
|
33 |
# mount
|
34 |
import sys
|
@@ -38,7 +39,7 @@ sys.path.append('/mount/src/gen_ai_dev')
|
|
38 |
DATA_PATH = "./Infy financial report/"
|
39 |
DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
|
40 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
41 |
-
LLM_MODEL = "gpt2" # "HuggingFaceH4/zephyr-7b-beta"
|
42 |
|
43 |
# Environment settings
|
44 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
@@ -214,24 +215,26 @@ guard = SafetyGuard()
|
|
214 |
# LLM Initialization
|
215 |
# ------------------------------
|
216 |
try:
|
217 |
-
|
218 |
-
|
219 |
-
LLM_MODEL
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
235 |
except Exception as e:
|
236 |
print(f"Error loading model: {e}")
|
237 |
raise
|
@@ -268,24 +271,24 @@ Answer:"""
|
|
268 |
print(f"\n\n[For Debug Only] Prompt: {prompt}\n\n")
|
269 |
response = generator(prompt)[0]['generated_text']
|
270 |
print(f"\n\n[For Debug Only] response: {response}\n\n")
|
271 |
-
|
272 |
clean_response = extract_final_response(response)
|
273 |
clean_response = guard.filter_output(clean_response)
|
274 |
print(f"\n\n[For Debug Only] clean_response: {clean_response}\n\n")
|
275 |
-
|
276 |
query_embed = embeddings.embed_query(query)
|
277 |
print(f"\n\n[For Debug Only] query_embed: {query_embed}\n\n")
|
278 |
-
|
279 |
response_embed = embeddings.embed_query(clean_response)
|
280 |
print(f"\n\n[For Debug Only] response_embed: {response_embed}\n\n")
|
281 |
-
|
282 |
confidence = cosine_similarity([query_embed], [response_embed])[0][0]
|
283 |
print(f"\n\n[For Debug Only] confidence: {confidence}\n\n")
|
284 |
-
|
285 |
-
memory.add_interaction(query, clean_response)
|
286 |
-
|
287 |
print(f"\n\n[For Debug Only] I'm Done \n\n")
|
288 |
return clean_response, round(confidence, 2)
|
289 |
|
290 |
except Exception as e:
|
291 |
-
return f"Error processing request: {e}", 0.0
|
|
|
11 |
from collections import deque
|
12 |
from typing import Tuple
|
13 |
import torch
|
14 |
+
import streamlit as st
|
15 |
|
16 |
# LangChain components
|
17 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
26 |
from sklearn.metrics.pairwise import cosine_similarity
|
27 |
|
28 |
# Initialize NLTK stopwords
|
29 |
+
nltk.download('stopwords')
|
30 |
+
stop_words = set(stopwords.words('english'))
|
31 |
+
# nltk.data.path.append('./nltk_data') # Point to local NLTK data
|
32 |
+
# stop_words = set(nltk.corpus.stopwords.words('english'))
|
33 |
|
34 |
# mount
|
35 |
import sys
|
|
|
39 |
DATA_PATH = "./Infy financial report/"
|
40 |
DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
|
41 |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
42 |
+
LLM_MODEL = "gpt2" # Or "distilgpt2" # Or "HuggingFaceH4/zephyr-7b-beta" or "microsoft/phi-2"
|
43 |
|
44 |
# Environment settings
|
45 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
215 |
# LLM Initialization
|
216 |
# ------------------------------
|
217 |
try:
|
218 |
+
@st.cache_resource
|
219 |
+
def load_generator():
|
220 |
+
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
|
221 |
+
model = AutoModelForCausalLM.from_pretrained(
|
222 |
+
LLM_MODEL,
|
223 |
+
device_map="auto",
|
224 |
+
torch_dtype=torch.float16,
|
225 |
+
load_in_4bit=True
|
226 |
+
)
|
227 |
+
return pipeline(
|
228 |
+
"text-generation",
|
229 |
+
model=model,
|
230 |
+
tokenizer=tokenizer,
|
231 |
+
max_new_tokens=100,
|
232 |
+
do_sample=False,
|
233 |
+
temperature=0.7,
|
234 |
+
top_k=0,
|
235 |
+
top_p=1,
|
236 |
+
)
|
237 |
+
generator = load_generator()
|
238 |
except Exception as e:
|
239 |
print(f"Error loading model: {e}")
|
240 |
raise
|
|
|
271 |
print(f"\n\n[For Debug Only] Prompt: {prompt}\n\n")
|
272 |
response = generator(prompt)[0]['generated_text']
|
273 |
print(f"\n\n[For Debug Only] response: {response}\n\n")
|
274 |
+
|
275 |
clean_response = extract_final_response(response)
|
276 |
clean_response = guard.filter_output(clean_response)
|
277 |
print(f"\n\n[For Debug Only] clean_response: {clean_response}\n\n")
|
278 |
+
|
279 |
query_embed = embeddings.embed_query(query)
|
280 |
print(f"\n\n[For Debug Only] query_embed: {query_embed}\n\n")
|
281 |
+
|
282 |
response_embed = embeddings.embed_query(clean_response)
|
283 |
print(f"\n\n[For Debug Only] response_embed: {response_embed}\n\n")
|
284 |
+
|
285 |
confidence = cosine_similarity([query_embed], [response_embed])[0][0]
|
286 |
print(f"\n\n[For Debug Only] confidence: {confidence}\n\n")
|
287 |
+
|
288 |
+
memory.add_interaction(query, clean_response)
|
289 |
+
|
290 |
print(f"\n\n[For Debug Only] I'm Done \n\n")
|
291 |
return clean_response, round(confidence, 2)
|
292 |
|
293 |
except Exception as e:
|
294 |
+
return f"Error processing request: {e}", 0.0
|