WebashalarForML commited on
Commit
5f2ea7c
·
verified ·
1 Parent(s): acff3f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -22
app.py CHANGED
@@ -1,10 +1,13 @@
1
  from flask import Flask, render_template, request, redirect, url_for, session
2
  import os
3
  from werkzeug.utils import secure_filename
4
- from retrival import generate_data_store
 
5
  from langchain_community.vectorstores import Chroma
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.prompts import ChatPromptTemplate
 
 
8
  from huggingface_hub import InferenceClient
9
  from langchain.schema import Document
10
  from langchain_core.documents import Document
@@ -13,6 +16,7 @@ import re
13
  import glob
14
  import shutil
15
  from werkzeug.utils import secure_filename
 
16
 
17
  app = Flask(__name__)
18
 
@@ -22,16 +26,19 @@ app.secret_key = os.urandom(24)
22
  # Configurations
23
  UPLOAD_FOLDER = "uploads/"
24
  VECTOR_DB_FOLDER = "VectorDB/"
25
- NLTK_FOLDER = "nltk_data/"
26
 
27
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
28
- os.environ["MPLCONFIGDIR"] = "/tmp"
29
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
30
  os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
31
- os.makedirs(NLTK_FOLDER, exist_ok=True)
32
 
33
  # Global variables
34
  CHROMA_PATH = None
 
 
 
35
  PROMPT_TEMPLATE = """
36
  You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the context provided and the question asked. Consider only the following context strictly, and use it to answer the question. Do not include any external information.
37
 
@@ -45,8 +52,28 @@ Question:
45
 
46
  Response:
47
  """
48
- HFT = os.getenv('HF_TOKEN')
49
- client = InferenceClient(api_key=HFT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  @app.route('/', methods=['GET'])
52
  def home():
@@ -60,12 +87,14 @@ def chat():
60
  print("sessionhist1",session['history'])
61
 
62
  global CHROMA_PATH
 
 
63
  old_db = session.get('old_db', None)
64
  print(f"Selected DB: {CHROMA_PATH}")
65
 
66
- if old_db != None:
67
- if CHROMA_PATH != old_db:
68
- session['history'] = []
69
 
70
  #print("sessionhist1",session['history'])
71
 
@@ -74,25 +103,46 @@ def chat():
74
  if CHROMA_PATH is None:
75
  return render_template('chat.html', error="No vector database selected!", history=[])
76
 
77
- # Load the selected database
78
- embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
79
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
80
- results = db.similarity_search_with_relevance_scores(query_text, k=3)
81
- context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  # Prepare the prompt and query the model
84
  prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
85
- prompt = prompt_template.format(context=context_text, question=query_text)
 
86
  print("results------------------->",prompt)
87
- response = client.chat.completions.create(
88
- model="mistralai/Mistral-7B-Instruct-v0.3",
89
- messages=[{"role": "system", "content": "You are an assistant specifically designed to generate responses based on the context provided. Your task is to answer questions strictly using the context without adding any external knowledge or information. Please ensure that your responses are relevant, accurate, and based solely on the given context."},
90
- {"role": "user", "content": prompt}],
91
- max_tokens=1000,
92
- temperature=0.3
 
 
 
 
93
  )
94
- data = response.choices[0].message.content
95
 
 
 
 
 
96
  if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
97
  data = "We do not have information related to your query on our end."
98
 
@@ -159,12 +209,44 @@ def list_dbs():
159
 
160
  @app.route('/select-db/<db_name>', methods=['POST'])
161
  def select_db(db_name):
 
 
162
  global CHROMA_PATH
163
  print(f"Selected DB: {CHROMA_PATH}")
164
  CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
165
  CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
166
  print(f"Selected DB: {CHROMA_PATH}")
 
 
 
 
 
 
 
 
 
167
  return redirect(url_for('chat'))
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  if __name__ == "__main__":
170
- app.run(debug=False, use_reloader=False)
 
 
 
1
  from flask import Flask, render_template, request, redirect, url_for, session
2
  import os
3
  from werkzeug.utils import secure_filename
4
+ #from retrival import generate_data_store
5
+ from retrival2 import generate_data_store,add_document_to_existing_db, delete_chunks_by_source
6
  from langchain_community.vectorstores import Chroma
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.prompts import ChatPromptTemplate
9
+ from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
10
+ from langchain_huggingface import HuggingFaceEndpoint
11
  from huggingface_hub import InferenceClient
12
  from langchain.schema import Document
13
  from langchain_core.documents import Document
 
16
  import glob
17
  import shutil
18
  from werkzeug.utils import secure_filename
19
+ import asyncio
20
 
21
  app = Flask(__name__)
22
 
 
26
  # Configurations
27
  UPLOAD_FOLDER = "uploads/"
28
  VECTOR_DB_FOLDER = "VectorDB/"
29
+ #TABLE_DB_FOLDER = "TableDB/"
30
 
31
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
32
+
33
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
34
  os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
35
+ #os.makedirs(TABLE_DB_FOLDER, exist_ok=True)
36
 
37
  # Global variables
38
  CHROMA_PATH = None
39
+ #TABLE_PATH = None
40
+
41
+ #System prompt
42
  PROMPT_TEMPLATE = """
43
  You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the context provided and the question asked. Consider only the following context strictly, and use it to answer the question. Do not include any external information.
44
 
 
52
 
53
  Response:
54
  """
55
+
56
+ # PROMPT_TEMPLATE = """
57
+ # You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the provided context, table data, and the question asked. Consider only the given inputs strictly and use them to answer the question. Do not include any external information.
58
+
59
+ # If the table variable contains tabular data, analyze and extract all relevant details from it. Provide a structured response summarizing the table data if it is relevant to the question. If the table data is not relevant, base your answer only on the context.
60
+
61
+ # Context:
62
+ # {context}
63
+
64
+ # Table:
65
+ # {table}
66
+
67
+ # ---
68
+
69
+ # Question:
70
+ # {question}
71
+
72
+ # Response:
73
+ # """
74
+
75
+ #HFT = os.getenv('HF_TOKEN')
76
+ #client = InferenceClient(api_key=HFT)
77
 
78
  @app.route('/', methods=['GET'])
79
  def home():
 
87
  print("sessionhist1",session['history'])
88
 
89
  global CHROMA_PATH
90
+ #global TABLE_PATH
91
+
92
  old_db = session.get('old_db', None)
93
  print(f"Selected DB: {CHROMA_PATH}")
94
 
95
+ # if old_db != None:
96
+ # if CHROMA_PATH != old_db:
97
+ # session['history'] = []
98
 
99
  #print("sessionhist1",session['history'])
100
 
 
103
  if CHROMA_PATH is None:
104
  return render_template('chat.html', error="No vector database selected!", history=[])
105
 
106
+ # Load the selected Document Database
107
+ #embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
108
+ embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
109
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
110
+ results_document = db.similarity_search_with_relevance_scores(query_text, k=3)
111
+
112
+ print("results------------------->",results_document)
113
+ context_text_document = "\n\n---\n\n".join([doc.page_content for doc, _score in results_document])
114
+
115
+
116
+ # # Load the selected Table Database
117
+ # #embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
118
+ # embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
119
+ # tdb = Chroma(persist_directory=TABLE_PATH, embedding_function=embedding_function)
120
+ # results_table = tdb.similarity_search_with_relevance_scores(query_text, k=2)
121
+
122
+ # print("results------------------->",results_table)
123
+ # context_text_table = "\n\n---\n\n".join([doc.page_content for doc, _score in results_table])
124
 
125
  # Prepare the prompt and query the model
126
  prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
127
+ prompt = prompt_template.format(context=context_text_document,question=query_text)
128
+ #prompt = prompt_template.format(context=context_text_document,table=context_text_table, question=query_text)
129
  print("results------------------->",prompt)
130
+
131
+
132
+ #Model Defining and its use
133
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
134
+ HFT = os.environ["HF_TOKEN"]
135
+ llm = HuggingFaceEndpoint(
136
+ repo_id=repo_id,
137
+ max_tokens=3000,
138
+ temperature=0.8,
139
+ huggingfacehub_api_token=HFT,
140
  )
 
141
 
142
+ data= llm(prompt)
143
+ #data = response.choices[0].message.content
144
+
145
+ # filtering the uneccessary context.
146
  if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
147
  data = "We do not have information related to your query on our end."
148
 
 
209
 
210
  @app.route('/select-db/<db_name>', methods=['POST'])
211
  def select_db(db_name):
212
+
213
+ #Selecting the Documnet Vector DB
214
  global CHROMA_PATH
215
  print(f"Selected DB: {CHROMA_PATH}")
216
  CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
217
  CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
218
  print(f"Selected DB: {CHROMA_PATH}")
219
+
220
+ #Selecting the Table Vector DB
221
+ # global TABLE_PATH
222
+ # print(f"Selected DB: {TABLE_PATH}")
223
+ # TABLE_PATH = os.path.join(TABLE_DB_FOLDER, db_name)
224
+ # TABLE_PATH = TABLE_PATH.replace("\\", "/")
225
+ # print(f"Selected DB: {TABLE_PATH}")
226
+
227
+
228
  return redirect(url_for('chat'))
229
 
230
+ # @app.route('/update-dbs/<db_name>', methods=['GET','POST'])
231
+ # def update_db(db_name):
232
+ # if request.method == 'POST':
233
+ # db_name = request.form['db_name']
234
+
235
+ # # Get all files from the uploaded folder
236
+ # files = request.files.getlist('folder')
237
+ # if not files:
238
+ # return "No files uploaded", 400
239
+ # print(f"Selected DB: {db_name}")
240
+ # DB_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
241
+ # DB_PATH = DB_PATH.replace("\\", "/")
242
+ # print(f"Selected DB: {DB_PATH}")
243
+
244
+ # generate_data_store(DB_PATH, db_name)
245
+
246
+ # return redirect(url_for('list_dbs'))
247
+ # return render_template('update_db.html')
248
+
249
  if __name__ == "__main__":
250
+ app.run(debug=False, use_reloader=False)
251
+
252
+