Spaces:

S1131
/

Streamlit

Sleeping

App Files Files Community

S1131 commited on Mar 13

Commit

69a10b1

verified ·

1 Parent(s): 1a4d9d3

Update utils.py

Browse files

Files changed (1) hide show

utils.py +36 -73

utils.py CHANGED Viewed

@@ -12,8 +12,6 @@ from collections import deque
 from typing import Tuple
 import torch
-import streamlit as st
 # LangChain components
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -26,31 +24,21 @@ from rank_bm25 import BM25Okapi
 from sentence_transformers import CrossEncoder
 from sklearn.metrics.pairwise import cosine_similarity
-import sys
-sys.path.append('/mount/src/gen_ai_dev')
-# these three lines swap the stdlib sqlite3 lib with the pysqlite3 package
-import pysqlite3
-import sys
-sys.modules["sqlite3"] = pysqlite3
-__import__('pysqlite3')
-import sys
-sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 # Initialize NLTK stopwords
 # nltk.download('stopwords')
 # stop_words = set(stopwords.words('english'))
 nltk.data.path.append('./nltk_data')  # Point to local NLTK data
 stop_words = set(nltk.corpus.stopwords.words('english'))
 # Configuration
 DATA_PATH = "./Infy financial report/"
 DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" #"microsoft/phi-2"
 # Environment settings
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -92,24 +80,12 @@ def load_and_chunk_documents():
 text_chunks = load_and_chunk_documents()
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-@st.cache_resource(show_spinner=False)
-def load_vector_db():
-    # Load and chunk documents
-    text_chunks = load_and_chunk_documents()
-    # Initialize embeddings
-    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-    # Create and return Chroma vector store
-    return Chroma.from_documents(
-        documents=text_chunks,
-        embedding=embeddings,
-        persist_directory="./chroma_db"
-    )
-# Initialize vector_db
-vector_db = load_vector_db()
 # BM25 setup
 bm25_corpus = [chunk.page_content for chunk in text_chunks]
@@ -137,8 +113,10 @@ class ConversationMemory:
             [f"Previous Q: {q}\nPrevious A: {r}" for q, r in self.buffer]
         )
 memory = ConversationMemory(max_size=3)
 # ------------------------------
 # Hybrid Retrieval System
 # ------------------------------
@@ -211,8 +189,8 @@ class SafetyGuard:
         query_lower = query.lower()
         if any(topic in query_lower for topic in self.blocked_topics):
             return False, "I only discuss financial topics."
-        # if not any(term in query_lower for term in self.financial_terms):
-        #     return False, "Please ask financial questions."
         return True, ""
     def filter_output(self, response: str) -> str:
@@ -236,37 +214,24 @@ guard = SafetyGuard()
 # LLM Initialization
 # ------------------------------
 try:
-    @st.cache_resource(show_spinner=False)
-    def load_generator():
-        tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
-        if torch.cuda.is_available():
-            model = AutoModelForCausalLM.from_pretrained(
-                LLM_MODEL,
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-                load_in_4bit=True
-            )
-        else:
-            model = AutoModelForCausalLM.from_pretrained(
-                LLM_MODEL,
-                device_map="cpu",
-                torch_dtype=torch.float32
-            )
-        return pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=400,
-            do_sample=True,
-            temperature=0.3,
-            top_k=30,
-            top_p=0.9,
-            repetition_penalty=1.2
-        )
-    # Later in your generate_answer function:
-    generator = load_generator()
 except Exception as e:
     print(f"Error loading model: {e}")
     raise
@@ -285,15 +250,13 @@ def extract_final_response(full_response: str) -> str:
 def generate_answer(query: str) -> Tuple[str, float]:
     try:
-        # Input validation
         is_valid, msg = guard.validate_input(query)
         if not is_valid:
             return msg, 0.0
-        # Retrieve context
         context = hybrid_retrieval(query)
-        # Generate response
         prompt = f"""<|im_start|>system
 You are a financial analyst. Provide a brief answer using the context.
 Context: {context}<|im_end|>
@@ -302,19 +265,19 @@ Context: {context}<|im_end|>
 <|im_start|>assistant
 Answer:"""
         response = generator(prompt)[0]['generated_text']
         clean_response = extract_final_response(response)
         clean_response = guard.filter_output(clean_response)
-        # Calculate confidence
         query_embed = embeddings.embed_query(query)
         response_embed = embeddings.embed_query(clean_response)
         confidence = cosine_similarity([query_embed], [response_embed])[0][0]
-        # Update memory
         memory.add_interaction(query, clean_response)
         return clean_response, round(confidence, 2)
     except Exception as e:
-        return f"Error processing request: {e}", 0.0

 from typing import Tuple
 import torch
 # LangChain components
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from sentence_transformers import CrossEncoder
 from sklearn.metrics.pairwise import cosine_similarity
 # Initialize NLTK stopwords
 # nltk.download('stopwords')
 # stop_words = set(stopwords.words('english'))
 nltk.data.path.append('./nltk_data')  # Point to local NLTK data
 stop_words = set(nltk.corpus.stopwords.words('english'))
+# mount
+import sys
+sys.path.append('/mount/src/gen_ai_dev')
 # Configuration
 DATA_PATH = "./Infy financial report/"
 DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL = "microsoft/phi-2"
 # Environment settings
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 text_chunks = load_and_chunk_documents()
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+vector_db = Chroma.from_documents(
+    documents=text_chunks,
+    embedding=embeddings,
+    persist_directory="./chroma_db"
+)
+vector_db.persist()
 # BM25 setup
 bm25_corpus = [chunk.page_content for chunk in text_chunks]
             [f"Previous Q: {q}\nPrevious A: {r}" for q, r in self.buffer]
         )
 memory = ConversationMemory(max_size=3)
 # ------------------------------
 # Hybrid Retrieval System
 # ------------------------------
         query_lower = query.lower()
         if any(topic in query_lower for topic in self.blocked_topics):
             return False, "I only discuss financial topics."
+        if not any(term in query_lower for term in self.financial_terms):
+            return False, "Please ask financial questions."
         return True, ""
     def filter_output(self, response: str) -> str:
 # LLM Initialization
 # ------------------------------
 try:
+    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
+    model = AutoModelForCausalLM.from_pretrained(
+        LLM_MODEL,
+        device_map="cpu",
+        torch_dtype=torch.float32
+    )
+    generator = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=400,
+        do_sample=True,
+        temperature=0.3,
+        top_k=30,
+        top_p=0.9,
+        repetition_penalty=1.2
+    )
 except Exception as e:
     print(f"Error loading model: {e}")
     raise
 def generate_answer(query: str) -> Tuple[str, float]:
     try:
         is_valid, msg = guard.validate_input(query)
         if not is_valid:
             return msg, 0.0
         context = hybrid_retrieval(query)
+        vector_db.persist()
         prompt = f"""<|im_start|>system
 You are a financial analyst. Provide a brief answer using the context.
 Context: {context}<|im_end|>
 <|im_start|>assistant
 Answer:"""
+        print(f"\n\n[For Debug Only] Prompt: {prompt}\n\n")
         response = generator(prompt)[0]['generated_text']
         clean_response = extract_final_response(response)
         clean_response = guard.filter_output(clean_response)
         query_embed = embeddings.embed_query(query)
         response_embed = embeddings.embed_query(clean_response)
         confidence = cosine_similarity([query_embed], [response_embed])[0][0]
         memory.add_interaction(query, clean_response)
         return clean_response, round(confidence, 2)
     except Exception as e:
+        return f"Error processing request: {e}", 0.0