B commited on
Commit
43add07
·
verified ·
1 Parent(s): 7aba694

other files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Infy[[:space:]]financial[[:space:]]report/INFY_2022_2023.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Infy[[:space:]]financial[[:space:]]report/INFY_2023_2024.pdf filter=lfs diff=lfs merge=lfs -text
Infy financial report/INFY_2022_2023.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cd6264b51e3979680d245eb917015058aff9652c3c1d9ee1b46a938272e858
3
+ size 13894776
Infy financial report/INFY_2023_2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a9bb9e802aff5f09733b8c78c88e9878732ac46e0fb29754c6da87ad47326a
3
+ size 11441269
README.md CHANGED
@@ -1,12 +1,41 @@
1
- ---
2
- title: Streamlit
3
- emoji:
4
- colorFrom: purple
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.43.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Financial Chatbot for Infosys Financial Reports
2
+ ------------------------------------------------
3
+ - This is a Retrieval-Augmented Generation (RAG) chatbot designed to answer questions about Infosys financial statements from the last two years (2022-2024).
4
+ - The chatbot uses open-source models and advanced retrieval techniques to provide accurate and concise answers.
5
+
6
+ Project Structure
7
+ ------------------
8
+ - The project is organized as follows:
9
+ ```
10
+ Financial-Chatbot/
11
+ ├── app.py # Streamlit application interface
12
+ ├── chroma_db/ # Chroma vector database storage
13
+ ├── Infy financial report/ # Folder containing Infosys financial PDFs
14
+ │ ├── INFY_2022_2023.pdf
15
+ │ └── INFY_2023_2024.pdf
16
+ ├── requirements.txt # Python dependencies
17
+ ├── utils.py # Core functionality and RAG implementation
18
+ └── README.md # This file
19
+ ```
20
+
21
+ Installation
22
+ --------------
23
+ Python Version: ```Python 3.10.xx```
24
+
25
+ Python lib requirements: ```pip install -r requirements.txt```
26
+
27
+
28
+ Place PDFs:
29
+ ------------
30
+ - Ensure the Infosys financial reports (INFY_2022_2023.pdf and INFY_2023_2024.pdf) are placed in the Infy financial report/ folder.
31
+
32
+
33
+ Running the Application
34
+ ------------------------
35
+ - To start the chatbot, run the following command:
36
+
37
+ ```streamlit run app.py --server.enableCORS false```
38
+
39
+ - The application will start and provide a local URL (e.g., http://localhost:8501). Open this URL in your browser to interact with the chatbot.
40
+
41
+
nltk_data/corpora/stopwords/english ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ about
3
+ above
4
+ after
5
+ again
6
+ against
7
+ ain
8
+ all
9
+ am
10
+ an
11
+ and
12
+ any
13
+ are
14
+ aren
15
+ aren't
16
+ as
17
+ at
18
+ be
19
+ because
20
+ been
21
+ before
22
+ being
23
+ below
24
+ between
25
+ both
26
+ but
27
+ by
28
+ can
29
+ couldn
30
+ couldn't
31
+ d
32
+ did
33
+ didn
34
+ didn't
35
+ do
36
+ does
37
+ doesn
38
+ doesn't
39
+ doing
40
+ don
41
+ don't
42
+ down
43
+ during
44
+ each
45
+ few
46
+ for
47
+ from
48
+ further
49
+ had
50
+ hadn
51
+ hadn't
52
+ has
53
+ hasn
54
+ hasn't
55
+ have
56
+ haven
57
+ haven't
58
+ having
59
+ he
60
+ he'd
61
+ he'll
62
+ her
63
+ here
64
+ hers
65
+ herself
66
+ he's
67
+ him
68
+ himself
69
+ his
70
+ how
71
+ i
72
+ i'd
73
+ if
74
+ i'll
75
+ i'm
76
+ in
77
+ into
78
+ is
79
+ isn
80
+ isn't
81
+ it
82
+ it'd
83
+ it'll
84
+ it's
85
+ its
86
+ itself
87
+ i've
88
+ just
89
+ ll
90
+ m
91
+ ma
92
+ me
93
+ mightn
94
+ mightn't
95
+ more
96
+ most
97
+ mustn
98
+ mustn't
99
+ my
100
+ myself
101
+ needn
102
+ needn't
103
+ no
104
+ nor
105
+ not
106
+ now
107
+ o
108
+ of
109
+ off
110
+ on
111
+ once
112
+ only
113
+ or
114
+ other
115
+ our
116
+ ours
117
+ ourselves
118
+ out
119
+ over
120
+ own
121
+ re
122
+ s
123
+ same
124
+ shan
125
+ shan't
126
+ she
127
+ she'd
128
+ she'll
129
+ she's
130
+ should
131
+ shouldn
132
+ shouldn't
133
+ should've
134
+ so
135
+ some
136
+ such
137
+ t
138
+ than
139
+ that
140
+ that'll
141
+ the
142
+ their
143
+ theirs
144
+ them
145
+ themselves
146
+ then
147
+ there
148
+ these
149
+ they
150
+ they'd
151
+ they'll
152
+ they're
153
+ they've
154
+ this
155
+ those
156
+ through
157
+ to
158
+ too
159
+ under
160
+ until
161
+ up
162
+ ve
163
+ very
164
+ was
165
+ wasn
166
+ wasn't
167
+ we
168
+ we'd
169
+ we'll
170
+ we're
171
+ were
172
+ weren
173
+ weren't
174
+ we've
175
+ what
176
+ when
177
+ where
178
+ which
179
+ while
180
+ who
181
+ whom
182
+ why
183
+ will
184
+ with
185
+ won
186
+ won't
187
+ wouldn
188
+ wouldn't
189
+ y
190
+ you
191
+ you'd
192
+ you'll
193
+ your
194
+ you're
195
+ yours
196
+ yourself
197
+ yourselves
198
+ you've
requirements.txt ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pysqlite3-binary
2
+ absl-py==2.1.0
3
+ accelerate==1.4.0
4
+ aiohappyeyeballs==2.5.0
5
+ aiohttp==3.11.13
6
+ aiosignal==1.3.2
7
+ altair==5.5.0
8
+ annotated-types==0.7.0
9
+ anyio==4.8.0
10
+ argon2-cffi==23.1.0
11
+ argon2-cffi-bindings==21.2.0
12
+ arrow==1.3.0
13
+ asgiref==3.8.1
14
+ asttokens==3.0.0
15
+ astunparse==1.6.3
16
+ async-lru==2.0.4
17
+ async-timeout==4.0.3
18
+ attrs==25.1.0
19
+ babel==2.17.0
20
+ backoff==2.2.1
21
+ bcrypt==4.3.0
22
+ beautifulsoup4==4.13.3
23
+ bitsandbytes==0.45.3
24
+ bleach==6.2.0
25
+ blinker==1.9.0
26
+ build==1.2.2.post1
27
+ cachetools==5.5.2
28
+ certifi==2025.1.31
29
+ cffi==1.17.1
30
+ charset-normalizer==3.4.1
31
+ chroma-hnswlib==0.7.6
32
+ chromadb==0.6.3
33
+ click==8.1.8
34
+ coloredlogs==15.0.1
35
+ comm==0.2.2
36
+ dataclasses-json==0.6.7
37
+ debugpy==1.8.13
38
+ decorator==5.2.1
39
+ defusedxml==0.7.1
40
+ Deprecated==1.2.18
41
+ distro==1.9.0
42
+ durationpy==0.9
43
+ exceptiongroup==1.2.2
44
+ executing==2.2.0
45
+ faiss-cpu==1.10.0
46
+ fastapi==0.115.11
47
+ fastjsonschema==2.21.1
48
+ filelock==3.17.0
49
+ flatbuffers==25.2.10
50
+ fqdn==1.5.1
51
+ frozenlist==1.5.0
52
+ fsspec==2025.3.0
53
+ gast==0.6.0
54
+ gitdb==4.0.12
55
+ GitPython==3.1.44
56
+ google-auth==2.38.0
57
+ google-pasta==0.2.0
58
+ googleapis-common-protos==1.69.1
59
+ greenlet==3.1.1
60
+ grpcio==1.71.0
61
+ h11==0.14.0
62
+ h5py==3.13.0
63
+ httpcore==1.0.7
64
+ httptools==0.6.4
65
+ httpx==0.28.1
66
+ httpx-sse==0.4.0
67
+ huggingface-hub==0.29.3
68
+ humanfriendly==10.0
69
+ idna==3.10
70
+ importlib_metadata==8.5.0
71
+ importlib_resources==6.5.2
72
+ ipykernel==6.29.5
73
+ ipython==8.34.0
74
+ ipywidgets==8.1.5
75
+ isoduration==20.11.0
76
+ jedi==0.19.2
77
+ Jinja2==3.1.6
78
+ joblib==1.4.2
79
+ json5==0.10.0
80
+ jsonpatch==1.33
81
+ jsonpointer==3.0.0
82
+ jsonschema==4.23.0
83
+ jsonschema-specifications==2024.10.1
84
+ jupyter-events==0.12.0
85
+ jupyter-lsp==2.2.5
86
+ jupyter_client==8.6.3
87
+ jupyter_core==5.7.2
88
+ jupyter_server==2.15.0
89
+ jupyter_server_terminals==0.5.3
90
+ jupyterlab==4.3.5
91
+ jupyterlab_pygments==0.3.0
92
+ jupyterlab_server==2.27.3
93
+ jupyterlab_widgets==3.0.13
94
+ keras==3.9.0
95
+ kubernetes==32.0.1
96
+ langchain==0.3.20
97
+ langchain-community==0.3.19
98
+ langchain-core==0.3.43
99
+ langchain-huggingface==0.1.2
100
+ langchain-text-splitters==0.3.6
101
+ langsmith==0.3.13
102
+ libclang==18.1.1
103
+ Markdown==3.7
104
+ markdown-it-py==3.0.0
105
+ MarkupSafe==3.0.2
106
+ marshmallow==3.26.1
107
+ matplotlib-inline==0.1.7
108
+ mdurl==0.1.2
109
+ mistune==3.1.2
110
+ ml-dtypes==0.4.1
111
+ mmh3==5.1.0
112
+ monotonic==1.6
113
+ mpmath==1.3.0
114
+ multidict==6.1.0
115
+ mypy-extensions==1.0.0
116
+ namex==0.0.8
117
+ narwhals==1.30.0
118
+ nbclient==0.10.2
119
+ nbconvert==7.16.6
120
+ nbformat==5.10.4
121
+ nest-asyncio==1.6.0
122
+ networkx==3.4.2
123
+ nltk==3.9.1
124
+ notebook_shim==0.2.4
125
+ numpy==2.0.2
126
+ oauthlib==3.2.2
127
+ onnxruntime==1.21.0
128
+ opentelemetry-api==1.30.0
129
+ opentelemetry-exporter-otlp-proto-common==1.30.0
130
+ opentelemetry-exporter-otlp-proto-grpc==1.30.0
131
+ opentelemetry-instrumentation==0.51b0
132
+ opentelemetry-instrumentation-asgi==0.51b0
133
+ opentelemetry-instrumentation-fastapi==0.51b0
134
+ opentelemetry-proto==1.30.0
135
+ opentelemetry-sdk==1.30.0
136
+ opentelemetry-semantic-conventions==0.51b0
137
+ opentelemetry-util-http==0.51b0
138
+ opt_einsum==3.4.0
139
+ optree==0.14.1
140
+ orjson==3.10.15
141
+ overrides==7.7.0
142
+ packaging==24.2
143
+ pandas==2.2.3
144
+ pandocfilters==1.5.1
145
+ parso==0.8.4
146
+ pexpect==4.9.0
147
+ pillow==11.1.0
148
+ platformdirs==4.3.6
149
+ posthog==3.19.1
150
+ prometheus_client==0.21.1
151
+ prompt_toolkit==3.0.50
152
+ propcache==0.3.0
153
+ protobuf==5.29.3
154
+ psutil==7.0.0
155
+ ptyprocess==0.7.0
156
+ pure_eval==0.2.3
157
+ pyarrow==19.0.1
158
+ pyasn1==0.6.1
159
+ pyasn1_modules==0.4.1
160
+ pycparser==2.22
161
+ pydantic==2.10.6
162
+ pydantic-settings==2.8.1
163
+ pydantic_core==2.27.2
164
+ pydeck==0.9.1
165
+ Pygments==2.19.1
166
+ pypdf==5.3.1
167
+ PyPika==0.48.9
168
+ pyproject_hooks==1.2.0
169
+ python-dateutil==2.9.0.post0
170
+ python-dotenv==1.0.1
171
+ python-json-logger==3.3.0
172
+ pytz==2025.1
173
+ PyYAML==6.0.2
174
+ pyzmq==26.2.1
175
+ rank-bm25==0.2.2
176
+ referencing==0.36.2
177
+ regex==2024.11.6
178
+ requests==2.32.3
179
+ requests-oauthlib==2.0.0
180
+ requests-toolbelt==1.0.0
181
+ rfc3339-validator==0.1.4
182
+ rfc3986-validator==0.1.1
183
+ rich==13.9.4
184
+ rpds-py==0.23.1
185
+ rsa==4.9
186
+ safetensors==0.5.3
187
+ scikit-learn==1.6.1
188
+ scipy==1.15.2
189
+ Send2Trash==1.8.3
190
+ sentence-transformers==3.4.1
191
+ shellingham==1.5.4
192
+ six==1.17.0
193
+ smmap==5.0.2
194
+ sniffio==1.3.1
195
+ soupsieve==2.6
196
+ SQLAlchemy==2.0.38
197
+ stack-data==0.6.3
198
+ starlette==0.46.1
199
+ streamlit==1.43.1
200
+ sympy==1.13.1
201
+ tenacity==9.0.0
202
+ termcolor==2.5.0
203
+ terminado==0.18.1
204
+ tf_keras==2.18.0
205
+ threadpoolctl==3.5.0
206
+ tinycss2==1.4.0
207
+ tokenizers==0.21.0
208
+ toml==0.10.2
209
+ tomli==2.2.1
210
+ torch==2.6.0
211
+ tornado==6.4.2
212
+ tqdm==4.67.1
213
+ traitlets==5.14.3
214
+ transformers==4.49.0
215
+ triton==3.2.0
216
+ typer==0.15.2
217
+ types-python-dateutil==2.9.0.20241206
218
+ typing-inspect==0.9.0
219
+ typing_extensions==4.12.2
220
+ tzdata==2025.1
221
+ uri-template==1.3.0
222
+ urllib3==2.3.0
223
+ uvicorn==0.34.0
224
+ uvloop==0.21.0
225
+ watchdog==6.0.0
226
+ watchfiles==1.0.4
227
+ wcwidth==0.2.13
228
+ webcolors==24.11.1
229
+ webencodings==0.5.1
230
+ websocket-client==1.8.0
231
+ websockets==15.0.1
232
+ Werkzeug==3.1.3
233
+ widgetsnbextension==4.0.13
234
+ wrapt==1.17.2
235
+ yarl==1.18.3
236
+ zipp==3.21.0
237
+ zstandard==0.23.0
utils.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ """
3
+ Financial Chatbot Utilities
4
+ Core functionality for RAG-based financial chatbot
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import nltk
10
+ from nltk.corpus import stopwords
11
+ from collections import deque
12
+ from typing import Tuple
13
+ import torch
14
+
15
+ import streamlit as st
16
+
17
+ # LangChain components
18
+ from langchain_community.document_loaders import PyPDFLoader
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+ from langchain_community.vectorstores import Chroma
21
+ from langchain_huggingface import HuggingFaceEmbeddings
22
+
23
+ # Models and ML
24
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
25
+ from rank_bm25 import BM25Okapi
26
+ from sentence_transformers import CrossEncoder
27
+ from sklearn.metrics.pairwise import cosine_similarity
28
+
29
+ import sys
30
+
31
+ sys.path.append('/mount/src/gen_ai_dev')
32
+
33
+ # these three lines swap the stdlib sqlite3 lib with the pysqlite3 package
34
+ import pysqlite3
35
+ import sys
36
+ sys.modules["sqlite3"] = pysqlite3
37
+
38
+ __import__('pysqlite3')
39
+ import sys
40
+
41
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
42
+
43
+ # Initialize NLTK stopwords
44
+ # nltk.download('stopwords')
45
+ # stop_words = set(stopwords.words('english'))
46
+ nltk.data.path.append('./nltk_data') # Point to local NLTK data
47
+ stop_words = set(nltk.corpus.stopwords.words('english'))
48
+
49
+ # Configuration
50
+ DATA_PATH = "./Infy financial report/"
51
+ DATA_FILES = ["INFY_2022_2023.pdf", "INFY_2023_2024.pdf"]
52
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
53
+ LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" #"microsoft/phi-2"
54
+
55
+ # Environment settings
56
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
57
+ os.environ["CHROMA_DISABLE_TELEMETRY"] = "true"
58
+
59
+ # Suppress specific warnings
60
+ import warnings
61
+
62
+ warnings.filterwarnings("ignore", message=".*oneDNN custom operations.*")
63
+ warnings.filterwarnings("ignore", message=".*cuBLAS factory.*")
64
+
65
+
66
+ # ------------------------------
67
+ # Load and Chunk Documents
68
+ # ------------------------------
69
+ def load_and_chunk_documents():
70
+ """Load and split PDF documents into manageable chunks"""
71
+ text_splitter = RecursiveCharacterTextSplitter(
72
+ chunk_size=500,
73
+ chunk_overlap=100,
74
+ separators=["\n\n", "\n", ".", " ", ""]
75
+ )
76
+
77
+ all_chunks = []
78
+ for file in DATA_FILES:
79
+ try:
80
+ loader = PyPDFLoader(os.path.join(DATA_PATH, file))
81
+ pages = loader.load()
82
+ all_chunks.extend(text_splitter.split_documents(pages))
83
+ except Exception as e:
84
+ print(f"Error loading {file}: {e}")
85
+
86
+ return all_chunks
87
+
88
+
89
+ # ------------------------------
90
+ # Vector Store and Search Setup
91
+ # ------------------------------
92
+ text_chunks = load_and_chunk_documents()
93
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
94
+
95
+
96
+ @st.cache_resource(show_spinner=False)
97
+ def load_vector_db():
98
+ # Load and chunk documents
99
+ text_chunks = load_and_chunk_documents()
100
+
101
+ # Initialize embeddings
102
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
103
+
104
+ # Create and return Chroma vector store
105
+ return Chroma.from_documents(
106
+ documents=text_chunks,
107
+ embedding=embeddings,
108
+ persist_directory="./chroma_db"
109
+ )
110
+
111
+ # Initialize vector_db
112
+ vector_db = load_vector_db()
113
+
114
+ # BM25 setup
115
+ bm25_corpus = [chunk.page_content for chunk in text_chunks]
116
+ bm25_tokenized = [doc.split() for doc in bm25_corpus]
117
+ bm25 = BM25Okapi(bm25_tokenized)
118
+
119
+ # Cross-encoder for re-ranking
120
+ cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
121
+
122
+
123
+ # ------------------------------
124
+ # Conversation Memory
125
+ # ------------------------------
126
+ class ConversationMemory:
127
+ """Stores recent conversation context"""
128
+
129
+ def __init__(self, max_size=5):
130
+ self.buffer = deque(maxlen=max_size)
131
+
132
+ def add_interaction(self, query: str, response: str) -> None:
133
+ self.buffer.append((query, response))
134
+
135
+ def get_context(self) -> str:
136
+ return "\n".join(
137
+ [f"Previous Q: {q}\nPrevious A: {r}" for q, r in self.buffer]
138
+ )
139
+
140
+ memory = ConversationMemory(max_size=3)
141
+
142
+ # ------------------------------
143
+ # Hybrid Retrieval System
144
+ # ------------------------------
145
+ def hybrid_retrieval(query: str, top_k: int = 5) -> str:
146
+ try:
147
+ # Semantic search
148
+ semantic_results = vector_db.similarity_search(query, k=top_k * 2)
149
+ print(f"\n\n[For Debug Only] Semantic Results: {semantic_results}")
150
+
151
+ # Keyword search
152
+ keyword_results = bm25.get_top_n(query.split(), bm25_corpus, n=top_k * 2)
153
+ print(f"\n\n[For Debug Only] Keyword Results: {keyword_results}\n\n")
154
+
155
+ # Combine and deduplicate results
156
+ combined = []
157
+ seen = set()
158
+
159
+ for doc in semantic_results:
160
+ content = doc.page_content
161
+ if content not in seen:
162
+ combined.append((content, "semantic"))
163
+ seen.add(content)
164
+
165
+ for doc in keyword_results:
166
+ if doc not in seen:
167
+ combined.append((doc, "keyword"))
168
+ seen.add(doc)
169
+
170
+ # Re-rank results using cross-encoder
171
+ pairs = [(query, content) for content, _ in combined]
172
+ scores = cross_encoder.predict(pairs)
173
+
174
+ # Sort by scores
175
+ sorted_results = sorted(
176
+ zip(combined, scores),
177
+ key=lambda x: x[1],
178
+ reverse=True
179
+ )
180
+
181
+ final_results = [f"[{source}] {content}" for (content, source), _ in sorted_results[:top_k]]
182
+
183
+ memory_context = memory.get_context()
184
+ if memory_context:
185
+ final_results.append(f"[memory] {memory_context}")
186
+
187
+ return "\n\n".join(final_results)
188
+
189
+ except Exception as e:
190
+ print(f"Retrieval error: {e}")
191
+ return ""
192
+
193
+
194
+ # ------------------------------
195
+ # Safety Guardrails
196
+ # ------------------------------
197
+ class SafetyGuard:
198
+ """Validates input and filters output"""
199
+
200
+ def __init__(self):
201
+ self.financial_terms = {
202
+ 'revenue', 'profit', 'ebitda', 'balance', 'cash',
203
+ 'income', 'fiscal', 'growth', 'margin', 'expense'
204
+ }
205
+ self.blocked_topics = {
206
+ 'politics', 'sports', 'entertainment', 'religion',
207
+ 'medical', 'hypothetical', 'opinion', 'personal'
208
+ }
209
+
210
+ def validate_input(self, query: str) -> Tuple[bool, str]:
211
+ query_lower = query.lower()
212
+ if any(topic in query_lower for topic in self.blocked_topics):
213
+ return False, "I only discuss financial topics."
214
+ # if not any(term in query_lower for term in self.financial_terms):
215
+ # return False, "Please ask financial questions."
216
+ return True, ""
217
+
218
+ def filter_output(self, response: str) -> str:
219
+ phrases_to_remove = {
220
+ "I'm not sure", "I don't know", "maybe",
221
+ "possibly", "could be", "uncertain", "perhaps"
222
+ }
223
+ for phrase in phrases_to_remove:
224
+ response = response.replace(phrase, "")
225
+
226
+ sentences = re.split(r'[.!?]', response)
227
+ if len(sentences) > 2:
228
+ response = '. '.join(sentences[:2]) + '.'
229
+
230
+ return response.strip()
231
+
232
+
233
+ guard = SafetyGuard()
234
+
235
+ # ------------------------------
236
+ # LLM Initialization
237
+ # ------------------------------
238
+ try:
239
+ @st.cache_resource(show_spinner=False)
240
+ def load_generator():
241
+ tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
242
+ if torch.cuda.is_available():
243
+ model = AutoModelForCausalLM.from_pretrained(
244
+ LLM_MODEL,
245
+ device_map="auto",
246
+ torch_dtype=torch.bfloat16,
247
+ load_in_4bit=True
248
+ )
249
+ else:
250
+ model = AutoModelForCausalLM.from_pretrained(
251
+ LLM_MODEL,
252
+ device_map="cpu",
253
+ torch_dtype=torch.float32
254
+ )
255
+ return pipeline(
256
+ "text-generation",
257
+ model=model,
258
+ tokenizer=tokenizer,
259
+ max_new_tokens=400,
260
+ do_sample=True,
261
+ temperature=0.3,
262
+ top_k=30,
263
+ top_p=0.9,
264
+ repetition_penalty=1.2
265
+ )
266
+
267
+
268
+ # Later in your generate_answer function:
269
+ generator = load_generator()
270
+ except Exception as e:
271
+ print(f"Error loading model: {e}")
272
+ raise
273
+
274
+
275
+ # ------------------------------
276
+ # Response Generation
277
+ # ------------------------------
278
+ def extract_final_response(full_response: str) -> str:
279
+ parts = full_response.split("<|im_start|>assistant")
280
+ if len(parts) > 1:
281
+ response = parts[-1].split("<|im_end|>")[0]
282
+ return re.sub(r'\s+', ' ', response).strip()
283
+ return full_response
284
+
285
+
286
+ def generate_answer(query: str) -> Tuple[str, float]:
287
+ try:
288
+ # Input validation
289
+ is_valid, msg = guard.validate_input(query)
290
+ if not is_valid:
291
+ return msg, 0.0
292
+
293
+ # Retrieve context
294
+ context = hybrid_retrieval(query)
295
+
296
+ # Generate response
297
+ prompt = f"""<|im_start|>system
298
+ You are a financial analyst. Provide a brief answer using the context.
299
+ Context: {context}<|im_end|>
300
+ <|im_start|>user
301
+ {query}<|im_end|>
302
+ <|im_start|>assistant
303
+ Answer:"""
304
+
305
+ response = generator(prompt)[0]['generated_text']
306
+ clean_response = extract_final_response(response)
307
+ clean_response = guard.filter_output(clean_response)
308
+
309
+ # Calculate confidence
310
+ query_embed = embeddings.embed_query(query)
311
+ response_embed = embeddings.embed_query(clean_response)
312
+ confidence = cosine_similarity([query_embed], [response_embed])[0][0]
313
+
314
+ # Update memory
315
+ memory.add_interaction(query, clean_response)
316
+
317
+ return clean_response, round(confidence, 2)
318
+
319
+ except Exception as e:
320
+ return f"Error processing request: {e}", 0.0