WebashalarForML commited on
Commit
1f2ba2d
·
verified ·
1 Parent(s): 0698b30

Upload 10 files

Browse files
app_3.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, session
2
+ import os
3
+ from werkzeug.utils import secure_filename
4
+ from retrival import generate_data_store
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from huggingface_hub import InferenceClient
9
+ from langchain.schema import Document
10
+ from langchain_core.documents import Document
11
+ from dotenv import load_dotenv
12
+ import re
13
+ import glob
14
+ import shutil
15
+ from werkzeug.utils import secure_filename
16
+
17
+ app = Flask(__name__)
18
+
19
+ # Set the secret key for session management
20
+ app.secret_key = os.urandom(24)
21
+
22
+ # Configurations
23
+ UPLOAD_FOLDER = "uploads/"
24
+ VECTOR_DB_FOLDER = "VectorDB/"
25
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
26
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
27
+ os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
28
+
29
+ # Global variables
30
+ CHROMA_PATH = None
31
+ PROMPT_TEMPLATE = """
32
+ You are working with a retrieval-augmented generation (RAG) setup. Your task is to generate a response based on the context provided and the question asked. Consider only the following context strictly, and use it to answer the question. Do not include any external information.
33
+
34
+ Context:
35
+ {context}
36
+
37
+ ---
38
+
39
+ Question:
40
+ {question}
41
+
42
+ Response:
43
+ """
44
+ HFT = os.getenv('HF_TOKEN')
45
+ client = InferenceClient(api_key=HFT)
46
+
47
+ @app.route('/', methods=['GET'])
48
+ def home():
49
+ return render_template('home.html')
50
+
51
+ @app.route('/chat', methods=['GET', 'POST'])
52
+ def chat():
53
+
54
+ if 'history' not in session:
55
+ session['history'] = []
56
+ print("sessionhist1",session['history'])
57
+
58
+ global CHROMA_PATH
59
+ old_db = session.get('old_db', None)
60
+ print(f"Selected DB: {CHROMA_PATH}")
61
+
62
+ if old_db != None:
63
+ if CHROMA_PATH != old_db:
64
+ session['history'] = []
65
+
66
+ #print("sessionhist1",session['history'])
67
+
68
+ if request.method == 'POST':
69
+ query_text = request.form['query_text']
70
+ if CHROMA_PATH is None:
71
+ return render_template('chat.html', error="No vector database selected!", history=[])
72
+
73
+ # Load the selected database
74
+ embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
75
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
76
+ results = db.similarity_search_with_relevance_scores(query_text, k=3)
77
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
78
+
79
+ # Prepare the prompt and query the model
80
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
81
+ prompt = prompt_template.format(context=context_text, question=query_text)
82
+ print("results------------------->",prompt)
83
+ response = client.chat.completions.create(
84
+ model="mistralai/Mistral-7B-Instruct-v0.3",
85
+ messages=[{"role": "system", "content": "You are an assistant specifically designed to generate responses based on the context provided. Your task is to answer questions strictly using the context without adding any external knowledge or information. Please ensure that your responses are relevant, accurate, and based solely on the given context."},
86
+ {"role": "user", "content": prompt}],
87
+ max_tokens=500,
88
+ temperature=0.3
89
+ )
90
+ data = response.choices[0].message.content
91
+
92
+ if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
93
+ data = "We do not have information related to your query on our end."
94
+
95
+ # Save the query and answer to the session history
96
+ session['history'].append((query_text, data))
97
+
98
+ # Mark the session as modified to ensure it gets saved
99
+ session.modified = True
100
+ print("sessionhist2",session['history'])
101
+
102
+ return render_template('chat.html', query_text=query_text, answer=data, history=session['history'],old_db=CHROMA_PATH)
103
+
104
+ return render_template('chat.html', history=session['history'], old_db=CHROMA_PATH)
105
+
106
+ @app.route('/create-db', methods=['GET', 'POST'])
107
+ def create_db():
108
+ if request.method == 'POST':
109
+ db_name = request.form['db_name']
110
+
111
+ # Get all files from the uploaded folder
112
+ files = request.files.getlist('folder')
113
+ if not files:
114
+ return "No files uploaded", 400
115
+
116
+ # Define the base upload path
117
+ upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
118
+ #upload_base_path = upload_base_path.replace("\\", "/")
119
+ print(f"Base Upload Path: {upload_base_path}")
120
+ os.makedirs(upload_base_path, exist_ok=True)
121
+
122
+ # Save each file and recreate folder structure
123
+ for file in files:
124
+ print("file , files",files,file)
125
+ #relative_path = file.filename # This should contain the subfolder structure
126
+ file_path = os.path.join(upload_base_path)
127
+ #file_path = file_path.replace("\\", "/")
128
+
129
+ # Ensure the directory exists before saving the file
130
+ print(f"Saving to: {file_path}")
131
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
132
+
133
+
134
+ # Get the file path and save it
135
+ file_path = os.path.join(upload_base_path, secure_filename(file.filename))
136
+ file.save(file_path)
137
+ # with open(file_path, 'wb') as f:
138
+ # shutil.copyfileobj(file.stream, f)
139
+ # Generate datastore
140
+ generate_data_store(upload_base_path, db_name)
141
+
142
+ # # Clean up uploaded files (if needed)
143
+ # uploaded_files = glob.glob(os.path.join(app.config['UPLOAD_FOLDER'], '*'))
144
+ # for f in uploaded_files:
145
+ # os.remove(f)
146
+
147
+ return redirect(url_for('list_dbs'))
148
+
149
+ return render_template('create_db.html')
150
+
151
+ @app.route('/list-dbs', methods=['GET'])
152
+ def list_dbs():
153
+ vector_dbs = [name for name in os.listdir(VECTOR_DB_FOLDER) if os.path.isdir(os.path.join(VECTOR_DB_FOLDER, name))]
154
+ return render_template('list_dbs.html', vector_dbs=vector_dbs)
155
+
156
+ @app.route('/select-db/<db_name>', methods=['POST'])
157
+ def select_db(db_name):
158
+ global CHROMA_PATH
159
+ print(f"Selected DB: {CHROMA_PATH}")
160
+ CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
161
+ CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
162
+ print(f"Selected DB: {CHROMA_PATH}")
163
+ return redirect(url_for('chat'))
164
+
165
+ if __name__ == "__main__":
166
+ app.run(debug=False, use_reloader=False)
requirements2.txt ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.4.4
3
+ aiohttp==3.11.11
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ ansicon==1.89.0
7
+ anyio==4.7.0
8
+ asgiref==3.8.1
9
+ attrs==24.3.0
10
+ backoff==2.2.1
11
+ bcrypt==4.2.1
12
+ beautifulsoup4==4.12.3
13
+ blessed==1.20.0
14
+ blinker==1.9.0
15
+ build==1.2.2.post1
16
+ cachetools==5.5.0
17
+ certifi==2024.12.14
18
+ cffi==1.17.1
19
+ chardet==5.2.0
20
+ charset-normalizer==3.4.0
21
+ chroma-hnswlib==0.7.3
22
+ chromadb==0.5.0
23
+ click==8.1.8
24
+ colorama==0.4.6
25
+ coloredlogs==15.0.1
26
+ cryptography==44.0.0
27
+ dataclasses-json==0.6.7
28
+ Deprecated==1.2.15
29
+ distro==1.9.0
30
+ durationpy==0.9
31
+ emoji==2.14.0
32
+ eval_type_backport==0.2.2
33
+ fastapi==0.115.6
34
+ filelock==3.16.1
35
+ filetype==1.2.0
36
+ Flask==3.1.0
37
+ flatbuffers==24.12.23
38
+ frozenlist==1.5.0
39
+ fsspec==2024.12.0
40
+ google-auth==2.37.0
41
+ googleapis-common-protos==1.66.0
42
+ greenlet==3.1.1
43
+ grpcio==1.68.1
44
+ h11==0.14.0
45
+ html5lib==1.1
46
+ httpcore==1.0.7
47
+ httptools==0.6.4
48
+ httpx==0.27.2
49
+ httpx-sse==0.4.0
50
+ huggingface-hub==0.27.0
51
+ humanfriendly==10.0
52
+ idna==3.10
53
+ importlib_metadata==8.5.0
54
+ importlib_resources==6.4.5
55
+ InstructorEmbedding==1.0.1
56
+ itsdangerous==2.2.0
57
+ Jinja2==3.1.5
58
+ jinxed==1.3.0
59
+ jiter==0.8.2
60
+ joblib==1.4.2
61
+ jsonpatch==1.33
62
+ jsonpath-python==1.0.6
63
+ jsonpointer==3.0.0
64
+ kubernetes==31.0.0
65
+ langchain==0.3.13
66
+ langchain-community==0.3.13
67
+ langchain-core==0.3.28
68
+ langchain-huggingface==0.1.2
69
+ langchain-openai==0.2.14
70
+ langchain-text-splitters==0.3.4
71
+ langdetect==1.0.9
72
+ langsmith==0.2.4
73
+ lxml==5.3.0
74
+ markdown-it-py==3.0.0
75
+ MarkupSafe==3.0.2
76
+ marshmallow==3.23.2
77
+ mdurl==0.1.2
78
+ mistralai==1.2.5
79
+ mmh3==4.1.0
80
+ monotonic==1.6
81
+ mpmath==1.3.0
82
+ multidict==6.1.0
83
+ mypy-extensions==1.0.0
84
+ nest-asyncio==1.6.0
85
+ networkx==3.4.2
86
+ nltk==3.9.1
87
+ numpy==1.26.4
88
+ oauthlib==3.2.2
89
+ olefile==0.47
90
+ onnxruntime==1.17.1
91
+ openai==1.58.1
92
+ opentelemetry-api==1.29.0
93
+ opentelemetry-exporter-otlp-proto-common==1.29.0
94
+ opentelemetry-exporter-otlp-proto-grpc==1.29.0
95
+ opentelemetry-instrumentation==0.50b0
96
+ opentelemetry-instrumentation-asgi==0.50b0
97
+ opentelemetry-instrumentation-fastapi==0.50b0
98
+ opentelemetry-proto==1.29.0
99
+ opentelemetry-sdk==1.29.0
100
+ opentelemetry-semantic-conventions==0.50b0
101
+ opentelemetry-util-http==0.50b0
102
+ orjson==3.10.12
103
+ overrides==7.7.0
104
+ packaging==24.2
105
+ pillow==11.0.0
106
+ pinecone-client==5.0.1
107
+ pinecone-notebooks==0.1.1
108
+ pinecone-plugin-inference==1.1.0
109
+ pinecone-plugin-interface==0.0.7
110
+ pinecone-text==0.9.0
111
+ posthog==3.7.4
112
+ propcache==0.2.1
113
+ protobuf==5.29.2
114
+ psutil==6.1.1
115
+ pyasn1==0.6.1
116
+ pyasn1_modules==0.4.1
117
+ pycparser==2.22
118
+ pydantic==2.9.2
119
+ pydantic-settings==2.7.0
120
+ pydantic_core==2.23.4
121
+ Pygments==2.18.0
122
+ pypdf==5.1.0
123
+ PyPika==0.48.9
124
+ pyproject_hooks==1.2.0
125
+ pyreadline3==3.5.4
126
+ python-dateutil==2.9.0.post0
127
+ python-docx==1.1.2
128
+ python-dotenv==1.0.1
129
+ python-iso639==2024.10.22
130
+ python-magic==0.4.27
131
+ python-magic-bin==0.4.14
132
+ python-oxmsg==0.0.1
133
+ PyYAML==6.0.2
134
+ RapidFuzz==3.11.0
135
+ readchar==4.2.1
136
+ regex==2024.11.6
137
+ requests==2.32.3
138
+ requests-oauthlib==2.0.0
139
+ requests-toolbelt==1.0.0
140
+ revel==0.9.1
141
+ rich==13.9.4
142
+ rsa==4.9
143
+ safetensors==0.4.5
144
+ scikit-learn==1.6.0
145
+ scipy==1.14.1
146
+ sentence-transformers==3.3.1
147
+ setuptools==75.6.0
148
+ shellingham==1.5.4
149
+ six==1.17.0
150
+ sniffio==1.3.1
151
+ soupsieve==2.6
152
+ SQLAlchemy==2.0.36
153
+ starlette==0.41.3
154
+ sympy==1.13.1
155
+ tenacity==9.0.0
156
+ threadpoolctl==3.5.0
157
+ tiktoken==0.8.0
158
+ tokenizers==0.21.0
159
+ torch==2.5.1
160
+ tqdm==4.67.1
161
+ transformers==4.47.1
162
+ typer==0.15.1
163
+ types-requests==2.32.0.20241016
164
+ typing-inspect==0.9.0
165
+ typing_extensions==4.12.2
166
+ u==1.0
167
+ unstructured==0.16.11
168
+ unstructured-client==0.28.1
169
+ urllib3==2.3.0
170
+ uvicorn==0.34.0
171
+ watchfiles==1.0.3
172
+ wcwidth==0.2.13
173
+ webencodings==0.5.1
174
+ websocket-client==1.8.0
175
+ websockets==14.1
176
+ Werkzeug==3.1.3
177
+ wget==3.2
178
+ wrapt==1.17.0
179
+ yarl==1.18.3
180
+ zipp==3.21.0
retrival.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceInstructEmbeddings,HuggingFaceEmbeddings # for embedding task
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter # for converting the large documents into smaller chunks
4
+ from langchain.schema import Document
5
+ from langchain_core.documents import Document
6
+ from langchain_openai import OpenAIEmbeddings
7
+ from langchain_community.vectorstores import Chroma
8
+ import openai
9
+ import openai
10
+ import os
11
+ import shutil
12
+ import uuid
13
+
14
+
15
+ # Configurations
16
+ UPLOAD_FOLDER = "./uploads"
17
+ VECTOR_DB_FOLDER = "./VectorDB"
18
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
+ os.makedirs(VECTOR_DB_FOLDER, exist_ok=True)
20
+
21
+
22
+ def load_document(data_path):
23
+
24
+ # Load documents
25
+ loader = DirectoryLoader(data_path, glob="*.*")
26
+ print("loader",loader)
27
+ document = loader.load()
28
+ return document
29
+
30
+ # Creating the chunks of Data from the knowledge
31
+ def split_text(documents: list[Document]):
32
+ text_splitter = RecursiveCharacterTextSplitter(
33
+ chunk_size = 1000,
34
+ chunk_overlap = 500,
35
+ length_function = len,
36
+ add_start_index=True,
37
+ )
38
+ chunks = text_splitter.split_documents(documents)
39
+ print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
40
+
41
+ return chunks
42
+
43
+ # # Chroma for creating the vector db whcch we will use for the searching relvant data.
44
+ # def save_to_chroma(chunks: list[Document],name: str):
45
+ # print
46
+ # CHROMA_PATH = f"./VectorDB/chroma_{name}"
47
+ # # Clear out the database first.
48
+ # if os.path.exists(CHROMA_PATH):
49
+ # shutil.rmtree(CHROMA_PATH)
50
+
51
+ # # Initialize SBERT embedding function
52
+ # embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
53
+ # db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
54
+
55
+ # # Add documents and persist the database
56
+ # db.add_documents(chunks)
57
+ # db.persist()
58
+ # # Return the database instance or a success status
59
+ # return db
60
+
61
+ async def save_to_chroma(chunks: list[Document], name: str):
62
+ CHROMA_PATH = f"./VectorDB/chroma_{name}"
63
+
64
+ # Clear out the database first
65
+ if os.path.exists(CHROMA_PATH):
66
+ shutil.rmtree(CHROMA_PATH)
67
+
68
+ try:
69
+ # Initialize SBERT embedding function
70
+ embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
71
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
72
+
73
+ # Add documents and persist the database
74
+ print("Adding documents to the database...")
75
+ db.add_documents(chunks)
76
+ print("Persisting the database...")
77
+ db.persist()
78
+ print("Database successfully saved.")
79
+
80
+ return db
81
+ except Exception as e:
82
+ print("Error while saving to Chroma:", e)
83
+ return None
84
+
85
+ def get_unique_sources(chroma_path):
86
+ # Load the Chroma database
87
+ db = Chroma(persist_directory=chroma_path)
88
+
89
+ # Retrieve all metadata from the database
90
+ metadata_list = db.get()['metadatas']
91
+
92
+ # Extract unique sources from metadata
93
+ unique_sources = {metadata['source'] for metadata in metadata_list if 'source' in metadata}
94
+ return list(unique_sources)
95
+
96
+ def generate_data_store(file_path,db_name):
97
+ CHROMA_PATH = f"./VectorDB/chroma_{db_name}"
98
+ print(f"filepath===>{file_path} db_name =====>{db_name}")
99
+ try:
100
+ documents = load_document(file_path)
101
+ print("Documents loaded successfully.")
102
+ except Exception as e:
103
+ print(f"Error loading documents: {e}")
104
+ return
105
+
106
+ try:
107
+ chunks = split_text(documents)
108
+ print(f"Text split into {len(chunks)} chunks.")
109
+ except Exception as e:
110
+ print(f"Error splitting text: {e}")
111
+ return
112
+
113
+ try:
114
+ save_to_chroma(chunks, db_name)
115
+ print(f"Data saved to Chroma for database {db_name}.")
116
+ except Exception as e:
117
+ print(f"Error saving to Chroma: {e}")
118
+ return
119
+ # def main():
120
+ # data_path = "H:\\DEV PATEL\\RAG Project\\data1"
121
+ # db_name = "Product_data"
122
+ # generate_data_store(data_path,db_name)
123
+
124
+ # if __name__ == "__main__":
125
+ # main()
126
+
127
+
templates/base.html ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Document AI</title>
7
+ <!-- Bootstrap CSS -->
8
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
9
+ <style>
10
+ body {
11
+ background-color: #1e1e2f;
12
+ color: #f5f5f5;
13
+ font-family: 'Arial', sans-serif;
14
+ }
15
+
16
+ .navbar {
17
+ background-color: #2c2c3e;
18
+ border-bottom: 2px solid #444;
19
+ }
20
+
21
+ .navbar-brand {
22
+ font-weight: bold;
23
+ color: #f5f5f5 !important;
24
+ }
25
+
26
+ .navbar-nav .nav-link {
27
+ color: #cfcfcf !important;
28
+ }
29
+
30
+ .navbar-nav .nav-link:hover {
31
+ color: #ffffff !important;
32
+ }
33
+
34
+ .container {
35
+ margin-top: 50px;
36
+ }
37
+
38
+ .card {
39
+ background-color: #2c2c3e;
40
+ border: none;
41
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
42
+ transition: transform 0.3s ease;
43
+ }
44
+
45
+ .card:hover {
46
+ transform: scale(1.05);
47
+ }
48
+
49
+ .card-title {
50
+ color: #ffffff;
51
+ }
52
+
53
+ .card-body {
54
+ text-align: center;
55
+ }
56
+
57
+ .btn-primary {
58
+ background-color: #4c4cff;
59
+ border-color: #4c4cff;
60
+ }
61
+
62
+ .btn-primary:hover {
63
+ background-color: #3838e8;
64
+ border-color: #3838e8;
65
+ }
66
+
67
+ footer {
68
+ margin-top: 50px;
69
+ text-align: center;
70
+ color: #888;
71
+ }
72
+ </style>
73
+ </head>
74
+
75
+ <body>
76
+ <!-- Navigation Bar -->
77
+ <nav class="navbar navbar-expand-lg">
78
+ <div class="container-fluid">
79
+ <a class="navbar-brand" href="/">Document AI</a>
80
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav"
81
+ aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
82
+ <span class="navbar-toggler-icon"></span>
83
+ </button>
84
+ <div class="collapse navbar-collapse" id="navbarNav">
85
+ <ul class="navbar-nav ms-auto">
86
+ <li class="nav-item">
87
+ <a class="nav-link" href="/">Home</a>
88
+ </li>
89
+ <li class="nav-item">
90
+ <a class="nav-link" href="/create-db">Create DB</a>
91
+ </li>
92
+ <li class="nav-item">
93
+ <a class="nav-link" href="/list-dbs">List DBs</a>
94
+ </li>
95
+ <li class="nav-item">
96
+ <a class="nav-link" href="/chat">Chat</a>
97
+ </li>
98
+ </ul>
99
+ </div>
100
+ </div>
101
+ </nav>
102
+
103
+ <!-- Main Content -->
104
+ <div class="container">
105
+ {% block content %}
106
+ <!-- Page-specific content will go here -->
107
+ {% endblock %}
108
+ </div>
109
+
110
+ <!-- Footer -->
111
+ <footer>
112
+ <p>&copy; 2024 Document AI. All rights reserved.</p>
113
+ </footer>
114
+
115
+ <!-- Bootstrap JS -->
116
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
117
+
118
+ </body>
119
+ </html>
templates/chat.html ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+
3
+ {% block content %}
4
+
5
+ <div class="container">
6
+ <div class="row justify-content-center">
7
+ <!-- Left Pane -->
8
+ <div class="col-md-5">
9
+ <div class="card">
10
+ <div class="card-body">
11
+ <h5 class="card-title">Document AI</h5>
12
+ <p class="card-text">Enter a query and get an answer based on the stored context.</p>
13
+
14
+ <form method="POST" action="{{ url_for('chat') }}">
15
+ <div class="form-group">
16
+ <input type="text" name="query_text" placeholder="Enter your query" value="{{ query_text }}"
17
+ required class="form-control">
18
+ </div>
19
+ <div class="form-group mt-2">
20
+ <button type="submit" class="btn btn-primary">Submit</button>
21
+ </div>
22
+ </form>
23
+
24
+ {% if answer %}
25
+ <div class="answer-section mt-3">
26
+ <h6>Answer:</h6>
27
+ <div class="answer">{{ answer }}</div>
28
+ </div>
29
+ {% endif %}
30
+ </div>
31
+ </div>
32
+ </div>
33
+
34
+ <!-- Right Pane -->
35
+ <div class="col-md-5">
36
+ <div class="card">
37
+ <div class="card-body">
38
+ <h5 class="card-title">Previous Queries</h5>
39
+ <div class="history-section">
40
+ {% for question, answer in history %}
41
+ <div class="card mb-3">
42
+ <div class="card-body">
43
+ <div class="question">
44
+ <strong>Query:</strong> {{ question }}
45
+ </div>
46
+ <strong><hr></strong>
47
+ <div class="answer mt-2">
48
+ <strong>Answer:</strong> {{ answer }}
49
+ </div>
50
+ </div>
51
+ </div>
52
+ <!-- {% if not loop.last %}
53
+ <hr>
54
+ {% endif %} -->
55
+ {% endfor %}
56
+ </div>
57
+ </div>
58
+ </div>
59
+ </div>
60
+ </div>
61
+ </div>
62
+ {% endblock %}
templates/create_db.html ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+
3
+ {% block content %}
4
+ <div class="container mt-5">
5
+ <div class="card bg-dark text-white shadow">
6
+ <div class="card-header">
7
+ <h3>Create a New Vector Database</h3>
8
+ </div>
9
+ <div class="card-body">
10
+ <form method="post" enctype="multipart/form-data">
11
+ <!-- Database Name Input -->
12
+ <div class="form-group">
13
+ <label for="db_name" class="form-label">Database Name</label>
14
+ <input type="text" id="db_name" name="db_name" class="form-control" placeholder="Enter database name" required>
15
+ </div>
16
+
17
+ <!-- Folder Upload Input -->
18
+ <div class="form-group mt-3">
19
+ <label for="folder" class="form-label">Upload Folder</label>
20
+ <input type="file" id="folder" name="folder" class="form-control" webkitdirectory directory multiple required>
21
+ <small class="text-muted">Note: Folder upload is supported only in Chrome and Edge browsers.</small>
22
+ </div>
23
+
24
+ <!-- Submit Button -->
25
+ <div class="mt-4 text-center">
26
+ <button type="submit" class="btn btn-primary px-5">Create</button>
27
+ </div>
28
+ </form>
29
+ </div>
30
+ </div>
31
+ </div>
32
+ {% endblock %}
templates/home.html ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+
3
+ {% block content %}
4
+ <div class="row justify-content-center">
5
+ <div class="col-md-6">
6
+ <div class="card">
7
+ <div class="card-body">
8
+ <h5 class="card-title">Create a New Database</h5>
9
+ <p class="card-text">Upload your documents and create a vector database for retrieval.</p>
10
+ <a href="/create-db" class="btn btn-primary">Go to Create DB</a>
11
+ </div>
12
+ </div>
13
+ </div>
14
+ <div class="col-md-6">
15
+ <div class="card">
16
+ <div class="card-body">
17
+ <h5 class="card-title">Chat with AI</h5>
18
+ <p class="card-text">Ask questions and get answers based on your vector database.</p>
19
+ <a href="/list-dbs" class="btn btn-primary">Go to List of DB</a>
20
+ </div>
21
+ </div>
22
+ </div>
23
+ </div>
24
+ {% endblock %}
templates/index.html ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Query Answering System</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ font-family: 'Poppins', sans-serif;
11
+ margin: 0;
12
+ padding: 0;
13
+ background-color: #121212;
14
+ color: #f5f5f5;
15
+ display: flex;
16
+ justify-content: center;
17
+ align-items: center;
18
+ height: 100vh;
19
+ overflow: hidden;
20
+ }
21
+
22
+ .container {
23
+ display: flex;
24
+ justify-content: space-between;
25
+ width: 100%;
26
+ max-width: 1400px;
27
+ height: 100%;
28
+ }
29
+
30
+ .left-pane {
31
+ width: 45%;
32
+ background-color: #1f1f1f;
33
+ border-radius: 10px;
34
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
35
+ padding: 30px;
36
+ margin: 20px;
37
+ max-height: 90vh;
38
+ overflow-y: auto;
39
+ }
40
+
41
+ .right-pane {
42
+ width: 45%;
43
+ background-color: #1f1f1f;
44
+ border-radius: 10px;
45
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
46
+ padding: 30px;
47
+ margin: 20px;
48
+ max-height: 90vh;
49
+ overflow-y: auto;
50
+ position: sticky;
51
+ top: 0;
52
+ }
53
+
54
+ .header {
55
+ margin-bottom: 20px;
56
+ }
57
+
58
+ .header h1 {
59
+ font-size: 32px;
60
+ font-weight: 600;
61
+ margin-bottom: 10px;
62
+ color: #f5f5f5;
63
+ }
64
+
65
+ .header p {
66
+ font-size: 14px;
67
+ color: #bbb;
68
+ }
69
+
70
+ .form-group {
71
+ margin-bottom: 20px;
72
+ }
73
+
74
+ .form-group input[type="text"] {
75
+ width: 100%;
76
+ padding: 12px;
77
+ border: 2px solid #444;
78
+ background-color: #333;
79
+ color: #fff;
80
+ border-radius: 8px;
81
+ font-size: 16px;
82
+ transition: all 0.3s ease;
83
+ }
84
+
85
+ .form-group input[type="text"]:focus {
86
+ border-color: #007bff;
87
+ outline: none;
88
+ }
89
+
90
+ .form-group button {
91
+ padding: 12px 25px;
92
+ background-color: #007bff;
93
+ color: #fff;
94
+ border: none;
95
+ border-radius: 8px;
96
+ cursor: pointer;
97
+ font-size: 16px;
98
+ transition: background-color 0.3s ease;
99
+ }
100
+
101
+ .form-group button:hover {
102
+ background-color: #0056b3;
103
+ }
104
+
105
+ .answer-section {
106
+ margin-top: 30px;
107
+ background-color: #333;
108
+ padding: 20px;
109
+ border-radius: 8px;
110
+ box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3);
111
+ }
112
+
113
+ .answer-section h3 {
114
+ font-size: 22px;
115
+ font-weight: 600;
116
+ color: #f5f5f5;
117
+ }
118
+
119
+ .answer {
120
+ padding: 15px;
121
+ background-color: #444;
122
+ border-radius: 8px;
123
+ font-size: 16px;
124
+ white-space: pre-wrap;
125
+ color: #ddd;
126
+ }
127
+
128
+ .sources {
129
+ margin-top: 15px;
130
+ font-size: 14px;
131
+ color: #888;
132
+ }
133
+
134
+ .history-section {
135
+ margin-top: 30px;
136
+ background-color: #333;
137
+ padding: 20px;
138
+ border-radius: 8px;
139
+ }
140
+
141
+ .history-item {
142
+ margin-bottom: 15px;
143
+ background-color: #444;
144
+ padding: 15px;
145
+ border-radius: 8px;
146
+ }
147
+
148
+ .history-item .question {
149
+ font-weight: bold;
150
+ color: #f5f5f5;
151
+ }
152
+
153
+ .history-item .answer {
154
+ color: #ddd;
155
+ }
156
+
157
+ /* Responsive adjustments */
158
+ @media screen and (max-width: 768px) {
159
+ .container {
160
+ flex-direction: column;
161
+ width: 90%;
162
+ max-width: 600px;
163
+ }
164
+
165
+ .left-pane, .right-pane {
166
+ width: 100%;
167
+ margin: 10px 0;
168
+ }
169
+
170
+ .header h1 {
171
+ font-size: 28px;
172
+ }
173
+
174
+ .form-group input[type="text"],
175
+ .form-group button {
176
+ font-size: 14px;
177
+ }
178
+
179
+ .answer-section h3 {
180
+ font-size: 20px;
181
+ }
182
+ }
183
+ </style>
184
+ </head>
185
+ <body>
186
+
187
+ <div class="container">
188
+ <div class="left-pane">
189
+ <div class="header">
190
+ <h1>Document AI</h1>
191
+ <p>Enter a query and get an answer based on the stored context.</p>
192
+ </div>
193
+
194
+ <form method="POST" action="{{ url_for('chat') }}">
195
+ <div class="form-group">
196
+ <input type="text" name="query_text" placeholder="Enter your query" value="{{ query_text }}" required>
197
+ </div>
198
+ <div class="form-group">
199
+ <button type="submit">Submit</button>
200
+ </div>
201
+ </form>
202
+
203
+ {% if answer %}
204
+ <div class="answer-section">
205
+ <h3>Answer:</h3>
206
+ <div class="answer">{{ answer }}</div>
207
+ </div>
208
+ {% endif %}
209
+ </div>
210
+
211
+ <div class="right-pane">
212
+ <div class="header">
213
+ <h1>Previous Queries</h1>
214
+ </div>
215
+ <div class="history-section">
216
+ {% for question, answer in history %}
217
+ <div class="history-item">
218
+ <div class="question">{{ question }}</div>
219
+ <div class="answer">{{ answer }}</div>
220
+ </div>
221
+ {% endfor %}
222
+ </div>
223
+ </div>
224
+ </div>
225
+
226
+ </body>
227
+ </html>
templates/index_old.html ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Query Answering System</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ font-family: 'Poppins', sans-serif;
11
+ margin: 0;
12
+ padding: 0;
13
+ background-color: #121212;
14
+ color: #f5f5f5;
15
+ display: flex;
16
+ justify-content: center;
17
+ align-items: center;
18
+ height: 100vh;
19
+ overflow: hidden;
20
+ }
21
+
22
+ .container {
23
+ background-color: #1f1f1f;
24
+ border-radius: 10px;
25
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
26
+ width: 100%;
27
+ max-width: 700px; /* Adjusted max-width for better fit */
28
+ padding: 30px; /* Reduced padding for a more compact design */
29
+ text-align: center;
30
+ margin: 20px; /* Margin added to ensure content is not touching the edges */
31
+ max-height: 90vh; /* Limit height to 90% of the viewport height */
32
+ overflow-y: auto; /* Allow scrolling if content overflows */
33
+ }
34
+
35
+ .header {
36
+ margin-bottom: 20px;
37
+ }
38
+
39
+ .header h1 {
40
+ font-size: 32px;
41
+ font-weight: 600;
42
+ margin-bottom: 10px;
43
+ color: #f5f5f5;
44
+ }
45
+
46
+ .header p {
47
+ font-size: 14px;
48
+ color: #bbb;
49
+ }
50
+
51
+ .form-group {
52
+ margin-bottom: 20px;
53
+ }
54
+
55
+ .form-group input[type="text"] {
56
+ width: 100%;
57
+ padding: 12px;
58
+ border: 2px solid #444;
59
+ background-color: #333;
60
+ color: #fff;
61
+ border-radius: 8px;
62
+ font-size: 16px;
63
+ transition: all 0.3s ease;
64
+ }
65
+
66
+ .form-group input[type="text"]:focus {
67
+ border-color: #007bff;
68
+ outline: none;
69
+ }
70
+
71
+ .form-group button {
72
+ padding: 12px 25px;
73
+ background-color: #007bff;
74
+ color: #fff;
75
+ border: none;
76
+ border-radius: 8px;
77
+ cursor: pointer;
78
+ font-size: 16px;
79
+ transition: background-color 0.3s ease;
80
+ }
81
+
82
+ .form-group button:hover {
83
+ background-color: #0056b3;
84
+ }
85
+
86
+ .answer-section {
87
+ margin-top: 30px;
88
+ background-color: #333;
89
+ padding: 20px;
90
+ border-radius: 8px;
91
+ box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3);
92
+ }
93
+
94
+ .answer-section h3 {
95
+ font-size: 22px;
96
+ font-weight: 600;
97
+ color: #f5f5f5;
98
+ }
99
+
100
+ .answer {
101
+ padding: 15px;
102
+ background-color: #444;
103
+ border-radius: 8px;
104
+ font-size: 16px;
105
+ white-space: pre-wrap;
106
+ color: #ddd;
107
+ }
108
+
109
+ .sources {
110
+ margin-top: 15px;
111
+ font-size: 14px;
112
+ color: #888;
113
+ }
114
+
115
+ /* Responsive adjustments */
116
+ @media screen and (max-width: 768px) {
117
+ .container {
118
+ padding: 20px;
119
+ width: 90%;
120
+ max-width: 600px;
121
+ }
122
+
123
+ .header h1 {
124
+ font-size: 28px;
125
+ }
126
+
127
+ .form-group input[type="text"],
128
+ .form-group button {
129
+ font-size: 14px;
130
+ }
131
+
132
+ .answer-section h3 {
133
+ font-size: 20px;
134
+ }
135
+ }
136
+ </style>
137
+ </head>
138
+ <body>
139
+
140
+ <div class="container">
141
+ <div class="header">
142
+ <h1>Documnet AI</h1>
143
+ <p>Enter a query and get an answer based on the stored context.</p>
144
+ </div>
145
+
146
+ <form method="POST" action="/">
147
+ <div class="form-group">
148
+ <input type="text" name="query_text" placeholder="Enter your query" value="{{ query_text }}" required>
149
+ </div>
150
+ <div class="form-group">
151
+ <button type="submit">Submit</button>
152
+ </div>
153
+ </form>
154
+
155
+ {% if answer %}
156
+ <div class="answer-section">
157
+ <h3>Answer:</h3>
158
+ <div class="answer">{{ answer }}</div>
159
+ <!-- <div class="sources">
160
+ <strong>Sources:</strong> {{ sources }}
161
+ </div> -->
162
+ </div>
163
+ {% endif %}
164
+ </div>
165
+
166
+ </body>
167
+ </html>
templates/list_dbs.html ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+
3
+ {% block content %}
4
+ <div class="container mt-5">
5
+ <div class="card bg-dark text-white shadow">
6
+ <div class="card-header text-center">
7
+ <h3>Available Vector Databases</h3>
8
+ </div>
9
+ <div class="card-body">
10
+ <h4 class="text-center mb-4">Select a Vector Database</h4>
11
+ <ul class="list-group">
12
+ {% for db in vector_dbs %}
13
+ <li class="list-group-item bg-dark text-white d-flex justify-content-between align-items-center">
14
+ <span>{{ db }}</span>
15
+ <form method="post" action="{{ url_for('select_db', db_name=db) }}" class="mb-0">
16
+ <button type="submit" class="btn btn-primary btn-sm">Select</button>
17
+ </form>
18
+ </li>
19
+ {% endfor %}
20
+ </ul>
21
+ </div>
22
+ </div>
23
+ </div>
24
+ {% endblock %}