WebashalarForML commited on
Commit
bec7b04
·
verified ·
1 Parent(s): b0ec9c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -165
app.py CHANGED
@@ -1,166 +1,166 @@
1
- from flask import Flask, render_template, request, redirect, url_for, session
2
- import os
3
- from werkzeug.utils import secure_filename
4
- from retrival import generate_data_store
5
- from langchain_community.vectorstores import Chroma
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.prompts import ChatPromptTemplate
8
- from huggingface_hub import InferenceClient
9
- from langchain.schema import Document
10
- from langchain_core.documents import Document
11
- from dotenv import load_dotenv
12
- import re
13
- import glob
14
- import shutil
15
- from werkzeug.utils import secure_filename
16
-
17
- app = Flask(__name__)
18
-
19
- # Set the secret key for session management
20
- app.secret_key = os.urandom(24)
21
-
22
- # Configurations
23
- UPLOAD_FOLDER = "uploads/"
24
- VECTOR_DB_FOLDER = "VectorDB/"
25
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
26
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
27
- os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
28
-
29
- # Global variables
30
- CHROMA_PATH = None
31
- PROMPT_TEMPLATE = """
32
- You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the context provided and the question asked. Consider only the following context strictly, and use it to answer the question. Do not include any external information.
33
-
34
- Context:
35
- {context}
36
-
37
- ---
38
-
39
- Question:
40
- {question}
41
-
42
- Response:
43
- """
44
- HFT = os.getenv('HF_TOKEN')
45
- client = InferenceClient(api_key=HFT)
46
-
47
- @app.route('/', methods=['GET'])
48
- def home():
49
- return render_template('home.html')
50
-
51
- @app.route('/chat', methods=['GET', 'POST'])
52
- def chat():
53
-
54
- if 'history' not in session:
55
- session['history'] = []
56
- print("sessionhist1",session['history'])
57
-
58
- global CHROMA_PATH
59
- old_db = session.get('old_db', None)
60
- print(f"Selected DB: {CHROMA_PATH}")
61
-
62
- if old_db != None:
63
- if CHROMA_PATH != old_db:
64
- session['history'] = []
65
-
66
- #print("sessionhist1",session['history'])
67
-
68
- if request.method == 'POST':
69
- query_text = request.form['query_text']
70
- if CHROMA_PATH is None:
71
- return render_template('chat.html', error="No vector database selected!", history=[])
72
-
73
- # Load the selected database
74
- embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
75
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
76
- results = db.similarity_search_with_relevance_scores(query_text, k=3)
77
- context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
78
-
79
- # Prepare the prompt and query the model
80
- prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
81
- prompt = prompt_template.format(context=context_text, question=query_text)
82
- print("results------------------->",prompt)
83
- response = client.chat.completions.create(
84
- model="mistralai/Mistral-7B-Instruct-v0.3",
85
- messages=[{"role": "system", "content": "You are an assistant specifically designed to generate responses based on the context provided. Your task is to answer questions strictly using the context without adding any external knowledge or information. Please ensure that your responses are relevant, accurate, and based solely on the given context."},
86
- {"role": "user", "content": prompt}],
87
- max_tokens=500,
88
- temperature=0.3
89
- )
90
- data = response.choices[0].message.content
91
-
92
- if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
93
- data = "We do not have information related to your query on our end."
94
-
95
- # Save the query and answer to the session history
96
- session['history'].append((query_text, data))
97
-
98
- # Mark the session as modified to ensure it gets saved
99
- session.modified = True
100
- print("sessionhist2",session['history'])
101
-
102
- return render_template('chat.html', query_text=query_text, answer=data, history=session['history'],old_db=CHROMA_PATH)
103
-
104
- return render_template('chat.html', history=session['history'], old_db=CHROMA_PATH)
105
-
106
- @app.route('/create-db', methods=['GET', 'POST'])
107
- def create_db():
108
- if request.method == 'POST':
109
- db_name = request.form['db_name']
110
-
111
- # Get all files from the uploaded folder
112
- files = request.files.getlist('folder')
113
- if not files:
114
- return "No files uploaded", 400
115
-
116
- # Define the base upload path
117
- upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
118
- #upload_base_path = upload_base_path.replace("\\", "/")
119
- print(f"Base Upload Path: {upload_base_path}")
120
- os.makedirs(upload_base_path, exist_ok=True)
121
-
122
- # Save each file and recreate folder structure
123
- for file in files:
124
- print("file , files",files,file)
125
- #relative_path = file.filename # This should contain the subfolder structure
126
- file_path = os.path.join(upload_base_path)
127
- #file_path = file_path.replace("\\", "/")
128
-
129
- # Ensure the directory exists before saving the file
130
- print(f"Saving to: {file_path}")
131
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
132
-
133
-
134
- # Get the file path and save it
135
- file_path = os.path.join(upload_base_path, secure_filename(file.filename))
136
- file.save(file_path)
137
- # with open(file_path, 'wb') as f:
138
- # shutil.copyfileobj(file.stream, f)
139
- # Generate datastore
140
- generate_data_store(upload_base_path, db_name)
141
-
142
- # # Clean up uploaded files (if needed)
143
- # uploaded_files = glob.glob(os.path.join(app.config['UPLOAD_FOLDER'], '*'))
144
- # for f in uploaded_files:
145
- # os.remove(f)
146
-
147
- return redirect(url_for('list_dbs'))
148
-
149
- return render_template('create_db.html')
150
-
151
- @app.route('/list-dbs', methods=['GET'])
152
- def list_dbs():
153
- vector_dbs = [name for name in os.listdir(VECTOR_DB_FOLDER) if os.path.isdir(os.path.join(VECTOR_DB_FOLDER, name))]
154
- return render_template('list_dbs.html', vector_dbs=vector_dbs)
155
-
156
- @app.route('/select-db/<db_name>', methods=['POST'])
157
- def select_db(db_name):
158
- global CHROMA_PATH
159
- print(f"Selected DB: {CHROMA_PATH}")
160
- CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
161
- CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
162
- print(f"Selected DB: {CHROMA_PATH}")
163
- return redirect(url_for('chat'))
164
-
165
- if __name__ == "__main__":
166
  app.run(debug=False, use_reloader=False)
 
1
+ from flask import Flask, render_template, request, redirect, url_for, session
2
+ import os
3
+ from werkzeug.utils import secure_filename
4
+ from retrival import generate_data_store
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from huggingface_hub import InferenceClient
9
+ from langchain.schema import Document
10
+ from langchain_core.documents import Document
11
+ from dotenv import load_dotenv
12
+ import re
13
+ import glob
14
+ import shutil
15
+ from werkzeug.utils import secure_filename
16
+
17
+ app = Flask(__name__)
18
+
19
+ # Set the secret key for session management
20
+ app.secret_key = os.urandom(24)
21
+
22
+ # Configurations
23
+ UPLOAD_FOLDER = "uploads/"
24
+ VECTOR_DB_FOLDER = "VectorDB/"
25
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
26
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
27
+ os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
28
+
29
+ # Global variables
30
+ CHROMA_PATH = None
31
+ PROMPT_TEMPLATE = """
32
+ You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the context provided and the question asked. Consider only the following context strictly, and use it to answer the question. Do not include any external information.
33
+
34
+ Context:
35
+ {context}
36
+
37
+ ---
38
+
39
+ Question:
40
+ {question}
41
+
42
+ Response:
43
+ """
44
+ HFT = os.getenv('HF_TOKEN')
45
+ client = InferenceClient(api_key=HFT)
46
+
47
+ @app.route('/', methods=['GET'])
48
+ def home():
49
+ return render_template('home.html')
50
+
51
+ @app.route('/chat', methods=['GET', 'POST'])
52
+ def chat():
53
+
54
+ if 'history' not in session:
55
+ session['history'] = []
56
+ print("sessionhist1",session['history'])
57
+
58
+ global CHROMA_PATH
59
+ old_db = session.get('old_db', None)
60
+ print(f"Selected DB: {CHROMA_PATH}")
61
+
62
+ if old_db != None:
63
+ if CHROMA_PATH != old_db:
64
+ session['history'] = []
65
+
66
+ #print("sessionhist1",session['history'])
67
+
68
+ if request.method == 'POST':
69
+ query_text = request.form['query_text']
70
+ if CHROMA_PATH is None:
71
+ return render_template('chat.html', error="No vector database selected!", history=[])
72
+
73
+ # Load the selected database
74
+ embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
75
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
76
+ results = db.similarity_search_with_relevance_scores(query_text, k=3)
77
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
78
+
79
+ # Prepare the prompt and query the model
80
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
81
+ prompt = prompt_template.format(context=context_text, question=query_text)
82
+ print("results------------------->",prompt)
83
+ response = client.chat.completions.create(
84
+ model="mistralai/Mistral-7B-Instruct-v0.3",
85
+ messages=[{"role": "system", "content": "You are an assistant specifically designed to generate responses based on the context provided. Your task is to answer questions strictly using the context without adding any external knowledge or information. Please ensure that your responses are relevant, accurate, and based solely on the given context."},
86
+ {"role": "user", "content": prompt}],
87
+ max_tokens=1000,
88
+ temperature=0.3
89
+ )
90
+ data = response.choices[0].message.content
91
+
92
+ if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
93
+ data = "We do not have information related to your query on our end."
94
+
95
+ # Save the query and answer to the session history
96
+ session['history'].append((query_text, data))
97
+
98
+ # Mark the session as modified to ensure it gets saved
99
+ session.modified = True
100
+ print("sessionhist2",session['history'])
101
+
102
+ return render_template('chat.html', query_text=query_text, answer=data, history=session['history'],old_db=CHROMA_PATH)
103
+
104
+ return render_template('chat.html', history=session['history'], old_db=CHROMA_PATH)
105
+
106
+ @app.route('/create-db', methods=['GET', 'POST'])
107
+ def create_db():
108
+ if request.method == 'POST':
109
+ db_name = request.form['db_name']
110
+
111
+ # Get all files from the uploaded folder
112
+ files = request.files.getlist('folder')
113
+ if not files:
114
+ return "No files uploaded", 400
115
+
116
+ # if not exist
117
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
118
+ # Define the base upload path
119
+ upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
120
+ #upload_base_path = upload_base_path.replace("\\", "/")
121
+ print(f"Base Upload Path: {upload_base_path}")
122
+ os.makedirs(upload_base_path, exist_ok=True)
123
+
124
+ # Save each file and recreate folder structure
125
+ for file in files:
126
+ print("file , files",files,file)
127
+ #relative_path = file.filename # This should contain the subfolder structure
128
+ file_path = os.path.join(upload_base_path)
129
+ #file_path = file_path.replace("\\", "/")
130
+
131
+ # Ensure the directory exists before saving the file
132
+ print(f"Saving to: {file_path}")
133
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
134
+
135
+
136
+ # Get the file path and save it
137
+ file_path = os.path.join(upload_base_path, secure_filename(file.filename))
138
+ file.save(file_path)
139
+
140
+ # Generate datastore
141
+ generate_data_store(upload_base_path, db_name)
142
+
143
+ # # Clean up uploaded files (if needed)
144
+ #if os.path.exists(app.config['UPLOAD_FOLDER']):
145
+ # shutil.rmtree(app.config['UPLOAD_FOLDER'])
146
+
147
+ return redirect(url_for('list_dbs'))
148
+
149
+ return render_template('create_db.html')
150
+
151
+ @app.route('/list-dbs', methods=['GET'])
152
+ def list_dbs():
153
+ vector_dbs = [name for name in os.listdir(VECTOR_DB_FOLDER) if os.path.isdir(os.path.join(VECTOR_DB_FOLDER, name))]
154
+ return render_template('list_dbs.html', vector_dbs=vector_dbs)
155
+
156
+ @app.route('/select-db/<db_name>', methods=['POST'])
157
+ def select_db(db_name):
158
+ global CHROMA_PATH
159
+ print(f"Selected DB: {CHROMA_PATH}")
160
+ CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
161
+ CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
162
+ print(f"Selected DB: {CHROMA_PATH}")
163
+ return redirect(url_for('chat'))
164
+
165
+ if __name__ == "__main__":
166
  app.run(debug=False, use_reloader=False)