Spaces:
Running
Running
Commit
·
71a7938
1
Parent(s):
c3c5b37
push files
Browse files- .gitattributes +12 -0
- .gitignore +4 -0
- Dockerfile +14 -0
- __init__.py +0 -0
- agent.py +256 -0
- chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/data_level0.bin +3 -0
- chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/header.bin +3 -0
- chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/index_metadata.pickle +3 -0
- chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/length.bin +3 -0
- chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/link_lists.bin +3 -0
- chroma_db/chroma.sqlite3 +3 -0
- constants.py +23 -0
- document/HimachalPradesh.docx +3 -0
- document/IndiaSchemes.docx +3 -0
- document/MadhyaPradesh.docx +3 -0
- document/Public_health_engineering_2010.docx +3 -0
- document/UN_SDG_3_Detailed_Solutions.docx +3 -0
- document/Uttar_Pradesh_UT_Healthcare_Schemes_.docx +3 -0
- fetch.py +38 -0
- generate.py +8 -0
- main.py +32 -0
- requirements.txt +206 -0
- schemas.py +6 -0
.gitattributes
CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/link_lists.bin filter=lfs diff=lfs merge=lfs -text
|
37 |
+
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/length.bin filter=lfs diff=lfs merge=lfs -text
|
38 |
+
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/index_metadata.pickle filter=lfs diff=lfs merge=lfs -text
|
39 |
+
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/header.bin filter=lfs diff=lfs merge=lfs -text
|
40 |
+
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/data_level0.bin filter=lfs diff=lfs merge=lfs -text
|
41 |
+
document/MadhyaPradesh.docx filter=lfs diff=lfs merge=lfs -text
|
42 |
+
document/Public_health_engineering_2010.docx filter=lfs diff=lfs merge=lfs -text
|
43 |
+
chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
44 |
+
document/UN_SDG_3_Detailed_Solutions.docx filter=lfs diff=lfs merge=lfs -text
|
45 |
+
document/IndiaSchemes.docx filter=lfs diff=lfs merge=lfs -text
|
46 |
+
document/Uttar_Pradesh_UT_Healthcare_Schemes_.docx filter=lfs diff=lfs merge=lfs -text
|
47 |
+
document/HimachalPradesh.docx filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
agent_advance.py
|
2 |
+
*.ipynb
|
3 |
+
__pycache__/
|
4 |
+
hackathon-healthcare-solutions-9e6f46d0a21e.json
|
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.12
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
WORKDIR /code
|
5 |
+
RUN chown -R user:user /code
|
6 |
+
ENV HOME=/home/user
|
7 |
+
ENV PATH=/home/user/.local/bin:$PATH
|
8 |
+
WORKDIR $HOME/app
|
9 |
+
COPY ./requirements.txt ./
|
10 |
+
RUN pip install --no-cache-dir -r ./requirements.txt
|
11 |
+
COPY --chown=user . $HOME/app
|
12 |
+
# COPY --chown=user:user . /code
|
13 |
+
# COPY . .
|
14 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
__init__.py
ADDED
File without changes
|
agent.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from langchain_community.document_loaders import TextLoader, DirectoryLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader
|
4 |
+
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
+
from langchain_community.vectorstores import PGVector
|
7 |
+
from langchain.chains import RetrievalQA
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
import json
|
10 |
+
import time
|
11 |
+
# import google.generativeai as genai
|
12 |
+
from google import genai
|
13 |
+
from google.oauth2 import service_account
|
14 |
+
load_dotenv()
|
15 |
+
# credential_file = os.getenv("GOOGLE_APPLICATION_CREDENTIAL")
|
16 |
+
# # Load environment variables
|
17 |
+
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_file
|
18 |
+
|
19 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
20 |
+
if GEMINI_API_KEY is None:
|
21 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
22 |
+
|
23 |
+
conf = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
|
24 |
+
|
25 |
+
service_account_info = json.loads(conf)
|
26 |
+
credentials = service_account.Credentials.from_service_account_info(service_account_info)
|
27 |
+
|
28 |
+
DOCUMENT_DIR = 'document/'
|
29 |
+
COLLECTION_NAME = "health_documents"
|
30 |
+
|
31 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", GEMINI_API_KEY=GEMINI_API_KEY, temperature=0.7, credentials=credentials)
|
32 |
+
|
33 |
+
|
34 |
+
print("Models initialized successfully.")
|
35 |
+
def load_documents(directory):
|
36 |
+
loader = DirectoryLoader(
|
37 |
+
directory,
|
38 |
+
glob="**/*.txt",
|
39 |
+
loader_cls=TextLoader
|
40 |
+
)
|
41 |
+
documents = loader.load()
|
42 |
+
|
43 |
+
docx_loader = DirectoryLoader(
|
44 |
+
directory,
|
45 |
+
glob="**/*.docx",
|
46 |
+
loader_cls=UnstructuredWordDocumentLoader,
|
47 |
+
loader_kwargs={"mode": "elements"}
|
48 |
+
)
|
49 |
+
documents.extend(docx_loader.load())
|
50 |
+
print(f"Loaded {len(documents)} documents.")
|
51 |
+
pdf_loader = DirectoryLoader(
|
52 |
+
directory,
|
53 |
+
glob="**/*.pdf",
|
54 |
+
loader_cls=UnstructuredPDFLoader
|
55 |
+
)
|
56 |
+
documents.extend(pdf_loader.load())
|
57 |
+
print(f"Loaded {len(documents)} documents.")
|
58 |
+
return documents
|
59 |
+
|
60 |
+
import os
|
61 |
+
from dotenv import load_dotenv
|
62 |
+
from langchain_community.llms import HuggingFacePipeline
|
63 |
+
# from langchain_community.embeddings import HuggingFaceEmbeddings
|
64 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
65 |
+
from langchain_community.document_loaders import TextLoader, DirectoryLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader
|
66 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
67 |
+
from langchain.chains import RetrievalQA
|
68 |
+
from langchain.prompts import PromptTemplate
|
69 |
+
from langchain_community.vectorstores.utils import filter_complex_metadata
|
70 |
+
from langchain_community.vectorstores import Chroma
|
71 |
+
import torch
|
72 |
+
from constants import CHROMA_PATH
|
73 |
+
|
74 |
+
# Load environment variables (if needed)
|
75 |
+
load_dotenv()
|
76 |
+
|
77 |
+
# Define the directory containing the documents
|
78 |
+
DOCUMENT_DIR = 'document/'
|
79 |
+
|
80 |
+
# Define the path for ChromaDB persistent storage
|
81 |
+
|
82 |
+
# Initialize Hugging Face Embeddings
|
83 |
+
# You can change the model to any suitable embedding model
|
84 |
+
embeddings = HuggingFaceEmbeddings(
|
85 |
+
model_name="sentence-transformers/gtr-t5-large",
|
86 |
+
model_kwargs={'device': 'cpu'}
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
def load_documents(directory):
|
91 |
+
"""Load documents from multiple file types."""
|
92 |
+
documents = []
|
93 |
+
|
94 |
+
# Load text files
|
95 |
+
text_loader = DirectoryLoader(
|
96 |
+
directory,
|
97 |
+
glob="**/*.txt",
|
98 |
+
loader_cls=TextLoader
|
99 |
+
)
|
100 |
+
documents.extend(text_loader.load())
|
101 |
+
|
102 |
+
# Load Word documents
|
103 |
+
docx_loader = DirectoryLoader(
|
104 |
+
directory,
|
105 |
+
glob="**/*.docx",
|
106 |
+
loader_cls=UnstructuredWordDocumentLoader,
|
107 |
+
loader_kwargs={"mode": "elements"}
|
108 |
+
)
|
109 |
+
documents.extend(docx_loader.load())
|
110 |
+
|
111 |
+
# Load PDF files
|
112 |
+
pdf_loader = DirectoryLoader(
|
113 |
+
directory,
|
114 |
+
glob="**/*.pdf",
|
115 |
+
loader_cls=UnstructuredPDFLoader
|
116 |
+
)
|
117 |
+
documents.extend(pdf_loader.load())
|
118 |
+
|
119 |
+
print(f"Loaded {len(documents)} documents.")
|
120 |
+
return documents
|
121 |
+
|
122 |
+
def split_documents(documents, chunk_size=1000, chunk_overlap=200):
|
123 |
+
"""Split documents into smaller chunks."""
|
124 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
125 |
+
chunk_size=chunk_size,
|
126 |
+
chunk_overlap=chunk_overlap
|
127 |
+
)
|
128 |
+
chunks = text_splitter.split_documents(documents)
|
129 |
+
return chunks
|
130 |
+
|
131 |
+
def create_and_store_embeddings(chunks):
|
132 |
+
"""Create or load ChromaDB vector store."""
|
133 |
+
# Ensure the Chroma path exists
|
134 |
+
os.makedirs(CHROMA_PATH, exist_ok=True)
|
135 |
+
print(f"unfiltered chunks: {len(chunks)}")
|
136 |
+
filtered_chunks = []
|
137 |
+
for chunk in chunks:
|
138 |
+
# Create a new document with filtered metadata
|
139 |
+
filtered_metadata = {k: v for k, v in chunk.metadata.items()
|
140 |
+
if isinstance(v, (str, int, float, bool))}
|
141 |
+
chunk.metadata = filtered_metadata
|
142 |
+
filtered_chunks.append(chunk)
|
143 |
+
|
144 |
+
print(f"Filtered metadata for {len(filtered_chunks)} chunks.")
|
145 |
+
# Create or load the vector store
|
146 |
+
vector_store = Chroma.from_documents(
|
147 |
+
documents=filtered_chunks,
|
148 |
+
embedding=embeddings,
|
149 |
+
persist_directory=CHROMA_PATH
|
150 |
+
)
|
151 |
+
|
152 |
+
print("Created ChromaDB vector store.")
|
153 |
+
return vector_store
|
154 |
+
|
155 |
+
def load_vectordb(path:str=CHROMA_PATH):
|
156 |
+
if os.path.exists(path):
|
157 |
+
vector_store = Chroma(persist_directory=path, embedding_function=embeddings)
|
158 |
+
print("Loaded ChromaDB vector store.")
|
159 |
+
return vector_store
|
160 |
+
else:
|
161 |
+
raise ValueError(f"ChromaDB path {path} does not exist.")
|
162 |
+
|
163 |
+
from langchain.chains import LLMChain
|
164 |
+
from langchain.chains.base import Chain
|
165 |
+
|
166 |
+
def create_health_agent(vector_store):
|
167 |
+
"""Create a custom retrieval QA chain for health-related queries."""
|
168 |
+
prompt_template = """You are a helpful health assistant. Who will talk to the user as human and resolve their queries.
|
169 |
+
|
170 |
+
Use Previous_Conversation to maintain consistency in the conversation.
|
171 |
+
These are Previous_Conversation between you and user.
|
172 |
+
Previous_Conversation: \n{previous_conversation}
|
173 |
+
Thoroughly analyze the Context, and also use context to answer the questions, aside of your knowledge.
|
174 |
+
Keep the answer concise.
|
175 |
+
|
176 |
+
Context: {context}
|
177 |
+
Question: {question}
|
178 |
+
Answer:"""
|
179 |
+
|
180 |
+
PROMPT = PromptTemplate(
|
181 |
+
template=prompt_template,
|
182 |
+
input_variables=["context", "question", "previous_conversation"]
|
183 |
+
)
|
184 |
+
|
185 |
+
if llm is None:
|
186 |
+
raise ValueError("No language model initialized. Please check the model initialization.")
|
187 |
+
|
188 |
+
# Create a retriever
|
189 |
+
retriever = vector_store.as_retriever(search_kwargs={"k": 10})
|
190 |
+
|
191 |
+
class CustomRetrievalQA(Chain):
|
192 |
+
retriever: object
|
193 |
+
llm_chain: LLMChain
|
194 |
+
|
195 |
+
@property
|
196 |
+
def input_keys(self):
|
197 |
+
return ['query', 'previous_conversation']
|
198 |
+
|
199 |
+
@property
|
200 |
+
def output_keys(self):
|
201 |
+
return ['result']
|
202 |
+
|
203 |
+
def _call(self, inputs):
|
204 |
+
query = inputs['query']
|
205 |
+
previous_conversation = inputs.get('previous_conversation', '')
|
206 |
+
|
207 |
+
# Retrieve relevant documents
|
208 |
+
docs = retriever.get_relevant_documents(query)
|
209 |
+
context = "\n".join([doc.page_content for doc in docs])
|
210 |
+
|
211 |
+
# Prepare inputs for the LLM chain
|
212 |
+
llm_inputs = {
|
213 |
+
'context': context,
|
214 |
+
'question': query,
|
215 |
+
'previous_conversation': previous_conversation
|
216 |
+
}
|
217 |
+
|
218 |
+
# Generate response
|
219 |
+
result = self.llm_chain(llm_inputs)
|
220 |
+
return {'result': result['text']}
|
221 |
+
|
222 |
+
# Create the LLM chain
|
223 |
+
llm_chain = LLMChain(llm=llm, prompt=PROMPT)
|
224 |
+
|
225 |
+
# Create and return the custom chain
|
226 |
+
return CustomRetrievalQA(retriever=retriever, llm_chain=llm_chain)
|
227 |
+
|
228 |
+
|
229 |
+
def agent_with_db():
|
230 |
+
# 1. Load documents
|
231 |
+
vector_store = load_vectordb(CHROMA_PATH)
|
232 |
+
UPDATE_DB = os.getenv("UPDATE_DB")
|
233 |
+
if UPDATE_DB.lower()=="true":
|
234 |
+
UPDATE_DB = True
|
235 |
+
if vector_store is None or UPDATE_DB is True:
|
236 |
+
print("Loading documents...")
|
237 |
+
print(vector_store, UPDATE_DB)
|
238 |
+
|
239 |
+
documents = load_documents(DOCUMENT_DIR)
|
240 |
+
|
241 |
+
print("Splitting documents into chunks...")
|
242 |
+
chunks = split_documents(documents)
|
243 |
+
print(f"Split into {len(chunks)} chunks.")
|
244 |
+
|
245 |
+
print("Creating and storing embeddings in ChromaDB...")
|
246 |
+
try:
|
247 |
+
vector_store = create_and_store_embeddings(chunks)
|
248 |
+
print("Embeddings stored successfully in ChromaDB.")
|
249 |
+
except Exception as e:
|
250 |
+
print(f"An error occurred while creating or storing embeddings: {e}")
|
251 |
+
return
|
252 |
+
|
253 |
+
print("Creating the health agent...")
|
254 |
+
health_agent = create_health_agent(vector_store)
|
255 |
+
|
256 |
+
return health_agent
|
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0403ddf49979cb439caddaba3d5d3f94b3fc7f5ff489022c9ce1e4ee1fa03676
|
3 |
+
size 19272000
|
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e3b214402d967712323b30b42de7e0f58436425d88c301e653c7998a07815a8
|
3 |
+
size 100
|
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d78373234558c5a9d409fc6c5a886f8eca22b82912e45785aa84174f6308fc2
|
3 |
+
size 346027
|
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94c8d34fa33a290d05a10b3918e770eeb71a0f842bde94648bc774b890821655
|
3 |
+
size 24000
|
chroma_db/1cfedb52-a018-45d3-ae4d-7636faa1a650/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61059079438ee819f0494817d2db28129affccf856b43bded4b19b64ae21ea91
|
3 |
+
size 51880
|
chroma_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b9d40639ca308e68a3ab6709959ca7a0c120332434e488848092d3d716a2ba6
|
3 |
+
size 50413568
|
constants.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SYSTEM_PROMPT1 = """You are a helpful assistant. Given a question, you should answer it by first thinking about the reasoning
|
2 |
+
process in the mind and then providing the final answer. The output format of reasoning process and final
|
3 |
+
answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "<think>
|
4 |
+
reasoning process here </think><answer> final answer here </answer>". You should perform thinking
|
5 |
+
with decomposing, reflecting, brainstorming, verifying, refining, and revising. Besides, you can perform
|
6 |
+
searching for uncertain knowledge if necessary with the format of "<|begin_of_query|> search query
|
7 |
+
(only keywords) here <|end_of_query|>". Then, the search system will provide you with the retrieval
|
8 |
+
information with the format of "<|begin_of_documents|> ...search results... <|end_of_documents|>"."""
|
9 |
+
|
10 |
+
SYSTEM_PROMPT2 = """The User asks a question, and the Assistant solves it. The Assistant first thinks about the reasoning
|
11 |
+
process in the mind and then provides the User with the final answer. The output format of reasoning
|
12 |
+
process and final answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "<think> reasoning process here </think><answer> final answer here </answer>". During the
|
13 |
+
thinking process, **the Assistant can perform searching** for uncertain knowledge if necessary with
|
14 |
+
the format of "<|begin_of_query|> search query (only list keywords, such as "keyword_1 keyword_2
|
15 |
+
...")<|end_of_query|>". **A query must involve only a single triple**. Then, the search system will
|
16 |
+
provide the Assistant with the retrieval information with the format of "<|begin_of_documents|> ...search
|
17 |
+
results... <|end_of_documents|>".
|
18 |
+
"""
|
19 |
+
|
20 |
+
SYSTEM_PROMPT = """Your are a helpful health assistant. Given a query you should answer it.
|
21 |
+
"""
|
22 |
+
|
23 |
+
CHROMA_PATH = 'chroma_db'
|
document/HimachalPradesh.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66ba9023c7dee42eb0402883afc8d93a7648fde27d6201e6877d668616e5605d
|
3 |
+
size 36581
|
document/IndiaSchemes.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbb92146dcec5d999fa01ed6f7426ff578999d440851749ec7aa35bf15d0bce8
|
3 |
+
size 33410
|
document/MadhyaPradesh.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15984a39341005cee1c114d9d13426d1eafbb4c97877138fccb1d6877c47194d
|
3 |
+
size 6219668
|
document/Public_health_engineering_2010.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bfd70c623753288bc44d16ab4146a8263e9eb9780a99a2adbfab0d743aa3d20
|
3 |
+
size 20458801
|
document/UN_SDG_3_Detailed_Solutions.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1ca1393a81d31fc2508057ba30f4a58e54d1412722fa9594c7351fd32bdc1ef
|
3 |
+
size 28818
|
document/Uttar_Pradesh_UT_Healthcare_Schemes_.docx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da8c8f891c06114b46f034bcc4605e5b9ce6cc15835731671d885d434ca7a0e6
|
3 |
+
size 6226144
|
fetch.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
"""
|
5 |
+
Contains the example code to retrieve response from the server in python-requests"""
|
6 |
+
|
7 |
+
## without previous_state
|
8 |
+
url = "http://localhost:8000/retrieve"
|
9 |
+
headers = {
|
10 |
+
"accept": "application/json",
|
11 |
+
"Content-Type": "application/json"
|
12 |
+
}
|
13 |
+
data = {
|
14 |
+
"query": "what's my name?"
|
15 |
+
}
|
16 |
+
|
17 |
+
response = requests.post(url, headers=headers, json=data)
|
18 |
+
|
19 |
+
print(response.json())
|
20 |
+
|
21 |
+
## with previous_state
|
22 |
+
|
23 |
+
url = "http://localhost:8000/retrieve"
|
24 |
+
headers = {
|
25 |
+
"accept": "application/json",
|
26 |
+
"Content-Type": "application/json"
|
27 |
+
}
|
28 |
+
data = {
|
29 |
+
"previous_state": [
|
30 |
+
{"message": "hi", "response": "Hi there! How can I help you today?\n"},
|
31 |
+
{"message": "my name is arpit", "response": "hi arpit. What's up with you?\n"}
|
32 |
+
],
|
33 |
+
"query": "what's my name?"
|
34 |
+
}
|
35 |
+
|
36 |
+
response = requests.post(url, headers=headers, json=data)
|
37 |
+
|
38 |
+
print(response.json())
|
generate.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google import genai
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from os import getenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
GEMINI_API_KEY = getenv("GEMINI_API_KEY")
|
8 |
+
from .constants import GEMINI_API_KEY
|
main.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
import os
|
4 |
+
from agent import agent_with_db
|
5 |
+
from schemas import request
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
allowed_origins = os.getenv("ALLOWED_ORIGINS").split(',')
|
11 |
+
|
12 |
+
app.add_middleware(
|
13 |
+
CORSMiddleware,
|
14 |
+
allow_origins=allowed_origins,
|
15 |
+
allow_credentials=True,
|
16 |
+
allow_methods=["*"],
|
17 |
+
allow_headers=["*"],
|
18 |
+
)
|
19 |
+
global agent
|
20 |
+
agent = agent_with_db()
|
21 |
+
|
22 |
+
@app.post("/retrieve", status_code=200)
|
23 |
+
async def retrieve(request:request):
|
24 |
+
prev_conv = request.previous_state
|
25 |
+
print(prev_conv)
|
26 |
+
if prev_conv is None:
|
27 |
+
prev_conv = "No previous conversation available, first time"
|
28 |
+
query = request.query
|
29 |
+
prev_conv = str(prev_conv)
|
30 |
+
response = agent({"query": query, "previous_conversation": prev_conv})
|
31 |
+
|
32 |
+
return {"response": response["result"]}
|
requirements.txt
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==24.1.0
|
2 |
+
aiohappyeyeballs==2.6.1
|
3 |
+
aiohttp==3.11.14
|
4 |
+
aiosignal==1.3.2
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.9.0
|
7 |
+
asgiref==3.8.1
|
8 |
+
asttokens==3.0.0
|
9 |
+
attrs==25.3.0
|
10 |
+
backoff==2.2.1
|
11 |
+
bcrypt==4.3.0
|
12 |
+
beautifulsoup4==4.13.3
|
13 |
+
build==1.2.2.post1
|
14 |
+
cachetools==5.5.2
|
15 |
+
certifi==2025.1.31
|
16 |
+
cffi==1.17.1
|
17 |
+
chardet==5.2.0
|
18 |
+
charset-normalizer==3.4.1
|
19 |
+
chroma-hnswlib==0.7.6
|
20 |
+
chromadb==0.6.3
|
21 |
+
click==8.1.8
|
22 |
+
coloredlogs==15.0.1
|
23 |
+
comm==0.2.2
|
24 |
+
cryptography==44.0.2
|
25 |
+
dataclasses-json==0.6.7
|
26 |
+
debugpy==1.8.13
|
27 |
+
decorator==5.2.1
|
28 |
+
Deprecated==1.2.18
|
29 |
+
distro==1.9.0
|
30 |
+
dnspython==2.7.0
|
31 |
+
durationpy==0.9
|
32 |
+
email_validator==2.2.0
|
33 |
+
emoji==2.14.1
|
34 |
+
eval_type_backport==0.2.2
|
35 |
+
executing==2.2.0
|
36 |
+
fastapi==0.115.12
|
37 |
+
fastapi-cli==0.0.7
|
38 |
+
filelock==3.18.0
|
39 |
+
filetype==1.2.0
|
40 |
+
flatbuffers==25.2.10
|
41 |
+
frozenlist==1.5.0
|
42 |
+
fsspec==2025.3.0
|
43 |
+
google-ai-generativelanguage==0.6.17
|
44 |
+
google-api-core==2.24.2
|
45 |
+
google-auth==2.38.0
|
46 |
+
google-genai==1.7.0
|
47 |
+
googleapis-common-protos==1.69.2
|
48 |
+
greenlet==3.1.1
|
49 |
+
grpcio==1.71.0
|
50 |
+
grpcio-status==1.71.0
|
51 |
+
h11==0.14.0
|
52 |
+
html5lib==1.1
|
53 |
+
httpcore==1.0.7
|
54 |
+
httptools==0.6.4
|
55 |
+
httpx==0.28.1
|
56 |
+
httpx-sse==0.4.0
|
57 |
+
huggingface-hub==0.29.3
|
58 |
+
humanfriendly==10.0
|
59 |
+
idna==3.10
|
60 |
+
importlib_metadata==8.6.1
|
61 |
+
importlib_resources==6.5.2
|
62 |
+
ipykernel==6.29.5
|
63 |
+
ipython==9.0.2
|
64 |
+
ipython_pygments_lexers==1.1.1
|
65 |
+
itsdangerous==2.2.0
|
66 |
+
jedi==0.19.2
|
67 |
+
Jinja2==3.1.6
|
68 |
+
joblib==1.4.2
|
69 |
+
jsonpatch==1.33
|
70 |
+
jsonpointer==3.0.0
|
71 |
+
jupyter_client==8.6.3
|
72 |
+
jupyter_core==5.7.2
|
73 |
+
kubernetes==32.0.1
|
74 |
+
langchain==0.3.21
|
75 |
+
langchain-community==0.3.20
|
76 |
+
langchain-core==0.3.48
|
77 |
+
langchain-google-genai==2.1.1
|
78 |
+
langchain-huggingface==0.1.2
|
79 |
+
langchain-text-splitters==0.3.7
|
80 |
+
langdetect==1.0.9
|
81 |
+
langsmith==0.3.18
|
82 |
+
lxml==5.3.1
|
83 |
+
markdown-it-py==3.0.0
|
84 |
+
MarkupSafe==3.0.2
|
85 |
+
marshmallow==3.26.1
|
86 |
+
matplotlib-inline==0.1.7
|
87 |
+
mdurl==0.1.2
|
88 |
+
mmh3==5.1.0
|
89 |
+
monotonic==1.6
|
90 |
+
mpmath==1.3.0
|
91 |
+
multidict==6.2.0
|
92 |
+
mypy-extensions==1.0.0
|
93 |
+
nest-asyncio==1.6.0
|
94 |
+
networkx==3.4.2
|
95 |
+
nltk==3.9.1
|
96 |
+
numpy==2.2.4
|
97 |
+
nvidia-cublas-cu12==12.4.5.8
|
98 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
99 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
100 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
101 |
+
nvidia-cudnn-cu12==9.1.0.70
|
102 |
+
nvidia-cufft-cu12==11.2.1.3
|
103 |
+
nvidia-curand-cu12==10.3.5.147
|
104 |
+
nvidia-cusolver-cu12==11.6.1.9
|
105 |
+
nvidia-cusparse-cu12==12.3.1.170
|
106 |
+
nvidia-cusparselt-cu12==0.6.2
|
107 |
+
nvidia-nccl-cu12==2.21.5
|
108 |
+
nvidia-nvjitlink-cu12==12.4.127
|
109 |
+
nvidia-nvtx-cu12==12.4.127
|
110 |
+
oauthlib==3.2.2
|
111 |
+
olefile==0.47
|
112 |
+
onnxruntime==1.21.0
|
113 |
+
opentelemetry-api==1.31.1
|
114 |
+
opentelemetry-exporter-otlp-proto-common==1.31.1
|
115 |
+
opentelemetry-exporter-otlp-proto-grpc==1.31.1
|
116 |
+
opentelemetry-instrumentation==0.52b1
|
117 |
+
opentelemetry-instrumentation-asgi==0.52b1
|
118 |
+
opentelemetry-instrumentation-fastapi==0.52b1
|
119 |
+
opentelemetry-proto==1.31.1
|
120 |
+
opentelemetry-sdk==1.31.1
|
121 |
+
opentelemetry-semantic-conventions==0.52b1
|
122 |
+
opentelemetry-util-http==0.52b1
|
123 |
+
orjson==3.10.16
|
124 |
+
overrides==7.7.0
|
125 |
+
packaging==24.2
|
126 |
+
parso==0.8.4
|
127 |
+
pexpect==4.9.0
|
128 |
+
pillow==11.1.0
|
129 |
+
platformdirs==4.3.7
|
130 |
+
posthog==3.21.0
|
131 |
+
prompt_toolkit==3.0.50
|
132 |
+
propcache==0.3.1
|
133 |
+
proto-plus==1.26.1
|
134 |
+
protobuf==5.29.4
|
135 |
+
psutil==7.0.0
|
136 |
+
ptyprocess==0.7.0
|
137 |
+
pure_eval==0.2.3
|
138 |
+
pyasn1==0.6.1
|
139 |
+
pyasn1_modules==0.4.1
|
140 |
+
pycparser==2.22
|
141 |
+
pydantic==2.10.6
|
142 |
+
pydantic-extra-types==2.10.3
|
143 |
+
pydantic-settings==2.8.1
|
144 |
+
pydantic_core==2.27.2
|
145 |
+
Pygments==2.19.1
|
146 |
+
pypdf==5.4.0
|
147 |
+
PyPika==0.48.9
|
148 |
+
pyproject_hooks==1.2.0
|
149 |
+
python-dateutil==2.9.0.post0
|
150 |
+
python-docx==1.1.2
|
151 |
+
python-dotenv==1.1.0
|
152 |
+
python-iso639==2025.2.18
|
153 |
+
python-magic==0.4.27
|
154 |
+
python-multipart==0.0.20
|
155 |
+
python-oxmsg==0.0.2
|
156 |
+
PyYAML==6.0.2
|
157 |
+
pyzmq==26.3.0
|
158 |
+
RapidFuzz==3.12.2
|
159 |
+
regex==2024.11.6
|
160 |
+
requests==2.32.3
|
161 |
+
requests-oauthlib==2.0.0
|
162 |
+
requests-toolbelt==1.0.0
|
163 |
+
rich==13.9.4
|
164 |
+
rich-toolkit==0.13.2
|
165 |
+
rsa==4.9
|
166 |
+
safetensors==0.5.3
|
167 |
+
scikit-learn==1.6.1
|
168 |
+
scipy==1.15.2
|
169 |
+
sentence-transformers==3.4.1
|
170 |
+
setuptools==78.1.0
|
171 |
+
shellingham==1.5.4
|
172 |
+
six==1.17.0
|
173 |
+
sniffio==1.3.1
|
174 |
+
soupsieve==2.6
|
175 |
+
SQLAlchemy==2.0.39
|
176 |
+
stack-data==0.6.3
|
177 |
+
starlette==0.46.1
|
178 |
+
sympy==1.13.1
|
179 |
+
tenacity==9.0.0
|
180 |
+
threadpoolctl==3.6.0
|
181 |
+
tokenizers==0.21.1
|
182 |
+
torch==2.6.0
|
183 |
+
tornado==6.4.2
|
184 |
+
tqdm==4.67.1
|
185 |
+
traitlets==5.14.3
|
186 |
+
transformers==4.50.1
|
187 |
+
triton==3.2.0
|
188 |
+
typer==0.15.2
|
189 |
+
typing-inspect==0.9.0
|
190 |
+
typing-inspection==0.4.0
|
191 |
+
typing_extensions==4.13.0
|
192 |
+
ujson==5.10.0
|
193 |
+
unstructured==0.17.2
|
194 |
+
unstructured-client==0.31.3
|
195 |
+
urllib3==2.3.0
|
196 |
+
uvicorn==0.34.0
|
197 |
+
uvloop==0.21.0
|
198 |
+
watchfiles==1.0.4
|
199 |
+
wcwidth==0.2.13
|
200 |
+
webencodings==0.5.1
|
201 |
+
websocket-client==1.8.0
|
202 |
+
websockets==15.0.1
|
203 |
+
wrapt==1.17.2
|
204 |
+
yarl==1.18.3
|
205 |
+
zipp==3.21.0
|
206 |
+
zstandard==0.23.0
|
schemas.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import Optional, Literal, List, Dict, Any
|
3 |
+
|
4 |
+
class request(BaseModel):
|
5 |
+
previous_state: Optional[List[Dict]]=None
|
6 |
+
query: str
|