puzan789 commited on
Commit
ad87194
·
1 Parent(s): 049ae4b
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +149 -0
  2. app.py +27 -0
  3. config.ini +13 -0
  4. core/__init__.py +16 -0
  5. core/api/__init__.py +0 -0
  6. core/api/chatai.py +497 -0
  7. core/api/jwt_bearer.py +27 -0
  8. core/api/user_management_api.py +152 -0
  9. core/models/__init__.py +0 -0
  10. core/models/apis_models.py +137 -0
  11. core/models/response_handling_models.py +19 -0
  12. core/models/utls.py +8 -0
  13. core/pipeline/__init__.py +0 -0
  14. core/pipeline/chataipipeline.py +61 -0
  15. core/pipeline/user_management_pipeline.py +75 -0
  16. core/prompts/__init__.py +0 -0
  17. core/prompts/custom_prompts.py +67 -0
  18. core/services/__init__.py +0 -0
  19. core/services/answer_query/__init__.py +0 -0
  20. core/services/answer_query/answerquery.py +167 -0
  21. core/services/document/__init__.py +0 -0
  22. core/services/document/add_document.py +37 -0
  23. core/services/embeddings/Qdrant_BM25_embedding.py +8 -0
  24. core/services/embeddings/__init__.py +0 -0
  25. core/services/embeddings/jina_embeddings.py +8 -0
  26. core/services/get_links/__init__.py +0 -0
  27. core/services/get_links/web_scraper.py +42 -0
  28. core/services/ocr/__init__.py +0 -0
  29. core/services/ocr/replicate_ocr/__init__.py +0 -0
  30. core/services/ocr/replicate_ocr/replicate_ocr.py +33 -0
  31. core/services/pdf_extraction/__init__.py +0 -0
  32. core/services/pdf_extraction/image_pdf/__init__.py +0 -0
  33. core/services/pdf_extraction/image_pdf/image_pdf_text_extraction.py +38 -0
  34. core/services/pdf_extraction/text_pdf/__init__.py +0 -0
  35. core/services/pdf_extraction/text_pdf/text_pdf_extraction.py +19 -0
  36. core/services/supabase/__init__.py +0 -0
  37. core/services/supabase/chat_ai_setup/__init__.py +0 -0
  38. core/services/supabase/chat_ai_setup/chataiusermanagement.py +32 -0
  39. core/services/supabase/limit/__init__.py +0 -0
  40. core/services/supabase/limit/limit_check.py +14 -0
  41. core/services/supabase/user_management/__init__.py +1 -0
  42. core/services/supabase/user_management/chat_history.py +8 -0
  43. core/services/supabase/user_management/chatbot_management.py +62 -0
  44. core/services/supabase/user_management/token_limit.py +45 -0
  45. core/services/supabase/user_management/user_service.py +155 -0
  46. core/services/vector_db/__init__.py +1 -0
  47. core/services/vector_db/qdrent/__init__.py +0 -0
  48. core/services/vector_db/qdrent/qdrentvector_db.py +34 -0
  49. core/services/vector_db/qdrent/upload_document.py +47 -0
  50. core/services/website_url/__init__.py +0 -0
.gitignore ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # Machine Learning and Speech Libraries
132
+ # TensorFlow
133
+ *.ckpt*
134
+ *.pbtxt
135
+ *.tfevents*
136
+ # PyTorch
137
+ *.pt
138
+ # Keras
139
+ *.h5
140
+ # Scikit-learn
141
+ *.pkl
142
+ # Speech Recognition
143
+ *.wav
144
+ *.mp3
145
+ .idea/
146
+ logs
147
+ images
148
+ resources
149
+ experiments
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import uvicorn
3
+ from fastapi import FastAPI, Depends
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from core import logging as logger
6
+ from core.api.chatai import chatai_api_router
7
+ from core.api.jwt_bearer import access_check_bearer
8
+ from core.api.user_management_api import user_management_api_router
9
+ from core.utils.utils import load_ini_config
10
+ config = load_ini_config("config.ini")
11
+ app = FastAPI(docs_url=config.get('fastapi_config', 'docs_url'), redoc_url=config.get('fastapi_config', 'redoc_url'),
12
+ openapi_url=config.get('fastapi_config', 'openapi_url')
13
+ )
14
+
15
+ PROTECTED = [Depends(access_check_bearer)]
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["*"],
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+ app.include_router(user_management_api_router, prefix="/chatai")
24
+ app.include_router(chatai_api_router, prefix="/chatai")
25
+ if __name__ == '__main__':
26
+ uvicorn.run(app, port=config.get("fastapi_config", "port"), host=config.get('fastapi_config', 'host'),
27
+ timeout_keep_alive=300, timeout_graceful_shutdown=600)
config.ini ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ [fastapi_config]
5
+ host = 0.0.0.0
6
+ port = 7860
7
+ docs_url = /docs
8
+ redoc_url = /redoc
9
+ openapi_url = /openapi.json
10
+
11
+
12
+ [oauth]
13
+ redirect_to : https://google.com/
core/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import logging.config
4
+ import yaml
5
+ import os
6
+
7
+ if os.path.exists("logs"):
8
+ pass
9
+ else:
10
+ os.makedirs("logs")
11
+
12
+ log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
13
+ with open(log_config_path, 'r') as file:
14
+ config = yaml.safe_load(file.read())
15
+
16
+ logging.config.dictConfig(config)
core/api/__init__.py ADDED
File without changes
core/api/chatai.py ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import string
4
+ import tempfile
5
+ from urllib.parse import urlparse
6
+ import requests
7
+ from fastapi import UploadFile, File, Form, HTTPException
8
+ from fastapi.requests import Request
9
+ from fastapi.routing import APIRouter
10
+ from supabase import create_client
11
+ from core import logging as logger
12
+ from core.api.user_management_api import user_management
13
+ from core.api.user_management_api import user_management as user_management_pipeline
14
+ from core.models.apis_models import *
15
+ from core.pipeline.chataipipeline import ChatAIPipeline
16
+ from core.services.supabase.user_management.token_limit import token_limit_check
17
+ from core.utils.error_handling import create_error_response, create_success_response, raise_http_exception
18
+ from core.utils.utils import get_ip_info, encode_to_base64, clean_text, decode_base64
19
+ from core.services.supabase.limit.limit_check import LimitChecker
20
+ from PyPDF2 import PdfReader
21
+ from dotenv import load_dotenv
22
+ load_dotenv()
23
+ import io
24
+
25
+ chatai_api_router = APIRouter(tags=["ChatAI"])
26
+ supabase_client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_KEY"))
27
+ supabase_client_ = supabase_client
28
+ ChatAI_pipeline = ChatAIPipeline()
29
+ url_limit,pdf_limit,ocr_limit=LimitChecker(supabase_client)
30
+
31
+ @chatai_api_router.post("/add_text")
32
+ async def add_text(request: AddTextRequest):
33
+ logger.info(f">>>AddText API Triggered By {request.vectorstore}<<<")
34
+ try:
35
+ vectorstore, text = request.vectorstore, request.text
36
+ username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
37
+ cleaned_text = " ".join(text.split())
38
+ num_token = len(cleaned_text)
39
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
40
+ len_text=num_token)
41
+ text = clean_text(text)
42
+ if lim:
43
+ dct = {
44
+ "output": {"text": text},
45
+ "source": "Text",
46
+ }
47
+ cleaned_text = " ".join(text.split()) # handles unnencessary spaces
48
+ # Count characters
49
+ num_token = len(cleaned_text)
50
+ logger.info(f"Number of token {num_token}")
51
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
52
+ file_name = user_management_pipeline.create_data_source_name(source_name="text", username=username)
53
+ supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
54
+
55
+ supa = supabase_client.table("ChatAI_ChatbotDataSources").insert(
56
+ {"username": username, "chatbotName": chat_bot_name, "dataSourceName": file_name,
57
+ "numTokens": num_token, "sourceEndpoint": "/add_text",
58
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
59
+ f"{file_name}_data.json")}).execute()
60
+
61
+ response = create_success_response(200, {"message": "Successfully added the text."})
62
+ logger.info(f">>>Text added successfully for {request.vectorstore}.<<<")
63
+
64
+ return response
65
+ else:
66
+ response = create_error_response(400,
67
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
68
+ return response
69
+
70
+ except Exception as e:
71
+ logger.error(f">>>Error in add_text: {e} for {request.vectorstore}.<<<")
72
+ raise_http_exception(500, "Internal Server Error")
73
+
74
+
75
+ @chatai_api_router.post("/answer_query")
76
+ async def answer_query(request: AnswerQueryRequest, req: Request):
77
+ logger.info(f">>>answer_query API Triggered By {request.vectorstore}<<<")
78
+ try:
79
+ username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
80
+ ip_address = req.client.host
81
+ city = get_ip_info(ip_address)
82
+ output, followup_questions, source = ChatAI_pipeline.answer_query_(query=request.query,
83
+ vectorstore=request.vectorstore,
84
+ llm_model=request.llm_model)
85
+ supa = supabase_client.table("ChatAI_ChatHistory").insert(
86
+ {"username": username, "chatbotName": chatbot_name, "llmModel": request.llm_model,
87
+ "question": request.query, "response": output, "IpAddress": ip_address, "ResponseTokenCount": len(output),
88
+ "vectorstore": request.vectorstore, "City": city}).execute()
89
+
90
+ response = create_success_response(200, data={"output": output, "follow_up_questions": followup_questions,
91
+ "source": source})
92
+ logger.info(f">>>Query answered successfully for {request.vectorstore}.<<<")
93
+ return response
94
+
95
+ except Exception as e:
96
+ logger.error(f">>>Error in answer_query: {e} for {request.vectorstore}.<<<")
97
+ raise e
98
+
99
+
100
+ @chatai_api_router.post("/get_links")
101
+ async def get_links(request: GetLinksRequest):
102
+ logger.info(f">>>get_links API Triggered By {request.url}<<<")
103
+ try:
104
+ response = ChatAI_pipeline.get_links_(url=request.url, timeout=30)
105
+ response = create_success_response(200, {"urls": response, "source": urlparse(request.url).netloc})
106
+ logger.info(f">>>Links fetched successfully for {request.url}.<<<")
107
+ return response
108
+
109
+ except Exception as e:
110
+ logger.error(f">>>Error in get_links: {e} for {request.url}.<<<")
111
+ raise_http_exception(500, "Internal Server Error")
112
+
113
+
114
+ @chatai_api_router.post("/image_pdf_text_extraction")
115
+ async def image_pdf_text_extraction(vectorstore: str = Form(...)
116
+ , pdf: UploadFile = File(...)):
117
+ logger.info(f">>>image_pdf_text_extraction API Triggered By {pdf.filename}<<<")
118
+ try:
119
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
120
+ pdf_bytes = await pdf.read()
121
+ source = pdf.filename
122
+ pdf_reader = PdfReader(io.BytesIO(pdf_bytes))
123
+ doc_len = len(pdf_reader.pages)
124
+ if doc_len<ocr_limit:
125
+ response = ChatAI_pipeline.image_pdf_text_extraction_(image_pdf=pdf_bytes)
126
+
127
+ num_tokens = 0
128
+ try:
129
+ num_tokens = len(" ".join([response[x] for x in response]))
130
+ except (KeyError, TypeError, AttributeError):
131
+ pass
132
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
133
+ len_text=num_tokens)
134
+ logger.info(f"this is the {lim}")
135
+ if lim:
136
+ dct = {
137
+ "output": response,
138
+ "source": source
139
+ }
140
+
141
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
142
+ file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
143
+ num_tokens = 0
144
+ try:
145
+ valid_responses = [response[x] for x in response if response[x] is not None]
146
+ num_tokens = len(" ".join(valid_responses))
147
+ except Exception as e:
148
+ num_tokens = 0
149
+
150
+ response = supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
151
+ supa = supabase_client.table("ChatAI_ChatbotDataSources").insert(
152
+ {"username": username,
153
+ "chatbotName": chatbot_name,
154
+ "dataSourceName": file_name,
155
+ "numTokens": num_tokens,
156
+ "sourceEndpoint": "/image_pdf_text_extraction",
157
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
158
+ f"{file_name}_data.json")}).execute()
159
+
160
+ response = create_success_response(200,
161
+ {"source": pdf.filename, "message": "Successfully extracted the text."})
162
+ logger.info(f">>>Text extracted successfully for {pdf.filename}.<<<")
163
+ return response
164
+ else:
165
+ response = create_error_response(402,
166
+ "Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
167
+ return response
168
+ else:
169
+ response = create_error_response(402,
170
+ "Exceeding limits, please try with a PDF having less than 20 pages for pdf .")
171
+ return response
172
+ except Exception as e:
173
+ raise e
174
+
175
+
176
+ @chatai_api_router.post("/text_pdf_extraction")
177
+ async def text_pdf_extraction(vectorstore: str = Form(...)
178
+ , pdf: UploadFile = File(...)):
179
+ logger.info(f">>>text_pdf_extraction API Triggered By {pdf.filename}<<<")
180
+ try:
181
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
182
+ content = await pdf.read()
183
+ pdf_reader = PdfReader(io.BytesIO(content))
184
+ doc_len = len(pdf_reader.pages)
185
+
186
+ if doc_len < pdf_limit :
187
+ source = pdf.filename
188
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
189
+ temp_file.write(content)
190
+ temp_file_path = temp_file.name
191
+
192
+ response = ChatAI_pipeline.text_pdf_extraction_(pdf=temp_file_path)
193
+ numTokens = len(" ".join([response[x] for x in response]))
194
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
195
+ len_text=numTokens)
196
+ os.remove(temp_file_path)
197
+ if lim:
198
+ dct = {
199
+ "output": response,
200
+ "source": source
201
+ }
202
+ numTokens = len(" ".join([response[x] for x in response]))
203
+ logger.info(f"Num of tokens {numTokens} text_pdf_extraction")
204
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
205
+ file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
206
+ response = supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
207
+ response = (
208
+ supabase_client.table("ChatAI_ChatbotDataSources")
209
+ .insert({"username": username,
210
+ "chatbotName": chatbot_name,
211
+ "dataSourceName": file_name,
212
+ "numTokens": numTokens,
213
+ "sourceEndpoint": "/text_pdf_extraction",
214
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
215
+ f"{file_name}_data.json")})
216
+ .execute()
217
+ )
218
+ response = create_success_response(200, {"source": source, "message": "Successfully extracted the text."})
219
+ logger.info(f">>>Text extracted successfully for {source}.<<<")
220
+ return response
221
+ else:
222
+ response = create_error_response(402,
223
+ "Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
224
+ return response
225
+ else:
226
+ response = create_error_response(402,
227
+ "Exceeding limits, please try with a pdf having pages less than 200.")
228
+ return response
229
+
230
+ except Exception as e:
231
+ logger.error(f">>>Error in text_pdf_extraction: {e} for {vectorstore}.<<<")
232
+ raise_http_exception(500, "Internal Server Error")
233
+
234
+
235
+
236
+
237
+ @chatai_api_router.post("/website_url_text_extraction")
238
+ async def add_website(request: AddWebsiteRequest):
239
+ vectorstore, website_urls, source = request.vectorstore, request.website_urls, request.source
240
+
241
+ logger.info(f">>>website_url_text_extraction API Triggered By {request.website_urls}<<<")
242
+ try:
243
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
244
+ total_requested_urls=len(website_urls)
245
+
246
+ if total_requested_urls < url_limit :
247
+ text = ChatAI_pipeline.website_url_text_extraction_list_(urls=website_urls)
248
+ num_token = len(" ".join([text[x] for x in text]))
249
+
250
+ logger.info(f">>>website_url_text_extraction len{num_token}<<<")
251
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
252
+ len_text=num_token)
253
+ if not lim:
254
+
255
+ response = create_error_response(402,
256
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
257
+ return response
258
+ else:
259
+ dct = {
260
+ "output": text,
261
+ "source": source
262
+ }
263
+
264
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
265
+ file_name = user_management_pipeline.create_data_source_name(source_name=urlparse(source).netloc,
266
+ username=username)
267
+ supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
268
+ (
269
+ supabase_client.table("ChatAI_ChatbotDataSources")
270
+ .insert({"username": username,
271
+ "chatbotName": chatbot_name,
272
+ "dataSourceName": file_name,
273
+ "numTokens": num_token,
274
+ "sourceEndpoint": "/fetch_text/urls",
275
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
276
+ f"{file_name}_data.json")})
277
+ .execute()
278
+ )
279
+ response = create_success_response(200, {"message": "Successfully fetched the website text."})
280
+ logger.info(f">>>Website text extracted successfully for {request.website_urls}.<<<")
281
+ return response
282
+ else:
283
+ response = create_error_response(402,
284
+ "Please select urls less than 50")
285
+ return response
286
+ except Exception as e:
287
+ logger.error(f">>>Error in website_url_text_extraction: {e} for {request.website_urls}.<<<")
288
+ raise HTTPException(status_code=500, detail="Internal Server Error")
289
+
290
+
291
+ @chatai_api_router.get("/get_current_count")
292
+ async def get_count(vectorstore: str):
293
+ logger.info(f">>>get_current_count API Triggered By {vectorstore}<<<")
294
+ try:
295
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
296
+ current_count = user_management_pipeline.get_current_count_(username)
297
+
298
+ response = create_success_response(200, {"current_count": current_count})
299
+ logger.info(f">>>Current count fetched successfully for {vectorstore}.<<<")
300
+ return response
301
+
302
+ except Exception as e:
303
+ logger.error(f">>>Error in get_current_count: {e} for {vectorstore}.<<<")
304
+ raise_http_exception(500, "Internal Server Error")
305
+
306
+
307
+ @chatai_api_router.post("/list_chatbots")
308
+ async def list_chatbots(request: ListChatbotsRequest):
309
+ logger.info(f">>>list_chatbots API Triggered By {request.username}<<<")
310
+ try:
311
+ chatbots = user_management.list_tables(username=request.username)
312
+ response = create_success_response(200, {"chatbots": chatbots})
313
+ logger.info(f">>>Chatbots listed successfully for {request.username}.<<<")
314
+ return response
315
+
316
+ except Exception as e:
317
+ logger.error(f">>>Error in list_chatbots: {e} for {request.username}.<<<")
318
+ raise_http_exception(500, "Internal Server Error")
319
+
320
+
321
+ @chatai_api_router.post("/get_chat_history")
322
+ async def chat_history(request: GetChatHistoryRequest):
323
+ logger.info(f">>>get_chat_history API Triggered By {request.vectorstore}<<<")
324
+ try:
325
+ _, username, chatbotName = request.vectorstore.split("$", 2)
326
+
327
+ history = supabase_client.table("ChatAI_ChatHistory").select(
328
+ "timestamp", "question", "response"
329
+ ).eq("username", username).eq("chatbotName", chatbotName).execute().data
330
+
331
+ response = create_success_response(200, {"history": history})
332
+ logger.info(f">>>Chat history fetched successfully for {request.vectorstore}.<<<")
333
+ return response
334
+
335
+
336
+ except IndexError:
337
+ logger.warning(f"Chat history not found for {request.vectorstore}")
338
+ return create_error_response(404, "Chat history not found for the given chatbot.")
339
+
340
+ except Exception as e:
341
+ logger.error(f">>>Error in get_chat_history: {e} for {request.vectorstore}.<<<")
342
+ raise_http_exception(500, "Internal Server Error")
343
+
344
+
345
+ @chatai_api_router.post("/delete_chatbot")
346
+ async def delete_chatbot(request: DeleteChatbotRequest):
347
+ logger.info(f">>>delete_chatbot API Triggered By {request.vectorstore}<<<")
348
+ try:
349
+ username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
350
+ supabase_client.table('ChatAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname',
351
+ chatbot_name).execute()
352
+ all_sources = supabase_client.table("ChatAI_ChatbotDataSources").select("*").eq("username", username).eq(
353
+ "chatbotName", chatbot_name).execute().data
354
+ all_sources = [x["sourceContentURL"].split("/")[-1] for x in all_sources]
355
+ supabase_client.table("ChatAI_ChatbotDataSources").delete().eq("username", username).eq("chatbotName",
356
+ chatbot_name).execute()
357
+ for source in all_sources:
358
+ supabase_client.table("ChatAI_Chatbot")
359
+ supabase_client.storage.from_("ChatAI").remove(source)
360
+ user_management.delete_table(table_name=chatbot_name)
361
+ user_management.delete_qdrant_cluster(vectorstorename=request.vectorstore)
362
+ response = create_success_response(200, {"message": "Chatbot deleted successfully"})
363
+ logger.info(f">>>Chatbot deleted successfully for {request.vectorstore}.<<<")
364
+ return response
365
+ except Exception as e:
366
+ logger.error(f">>>Error in delete_chatbot: {e} for {request.vectorstore}.<<<")
367
+ raise_http_exception(500, "Internal Server Error")
368
+
369
+
370
+
371
+
372
+
373
+
374
+ @chatai_api_router.get("/list_chatbot_sources")
375
+ async def list_chatbot_sources(vectorstore: str):
376
+ try:
377
+ logger.info(f">>>list_chatbot_sources API Triggered By {vectorstore}<<<")
378
+
379
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
380
+ result = supabase_client.table("ChatAI_ChatbotDataSources").select("*").eq("username", username).eq(
381
+ "chatbotName",
382
+ chatbot_name).execute().data
383
+
384
+ response = create_success_response(200, {"output": result})
385
+ logger.info(f">>>Chatbot listed successfully for {vectorstore}.<<<")
386
+ return response
387
+
388
+ except Exception as e:
389
+ logger.error(f">>>Error in list_chatbot_sources: {e} for {vectorstore}.<<<")
390
+ raise_http_exception(500, "Internal Server Error")
391
+
392
+
393
+ @chatai_api_router.get("/get_data_source")
394
+ async def get_data_source(vectorstore: str, source_url: str):
395
+ try:
396
+ logger.info(f">>>get_data_source API Triggered By {vectorstore}<<<")
397
+
398
+ r = requests.get(source_url)
399
+ res = encode_to_base64(eval(r.content.decode("utf-8", errors="replace")))
400
+
401
+ response = create_success_response(200, {"output": res})
402
+
403
+ return response
404
+
405
+ except Exception as e:
406
+ logger.error(f">>>Error in get_data_source: {e} for {vectorstore}.<<<")
407
+ raise_http_exception(500, "Internal Server Error")
408
+
409
+
410
+ @chatai_api_router.post("/delete_chatbot_source")
411
+ async def delete_chatbot_source(request: DeleteChatbotSourceRequest):
412
+ vectorstore, data_source_name = request.vectorstore, request.data_source_name
413
+ try:
414
+
415
+ response = supabase_client.table("ChatAI_ChatbotDataSources").delete().eq("dataSourceName",
416
+ data_source_name).execute()
417
+ response = supabase_client.storage.from_('ChatAI').remove(f"{data_source_name}_data.json")
418
+
419
+ response = create_success_response(200, {"output": f"Successfully deleted the {data_source_name} data source."})
420
+
421
+ logger.info(f">>>Data source deleted successfully for {vectorstore}.<<<")
422
+ return response
423
+
424
+
425
+ except Exception as e:
426
+ logger.error(f">>>Error in delete_chatbot_source: {e} for {vectorstore}.<<<")
427
+ raise_http_exception(500, "Internal Server Error")
428
+
429
+
430
+ @chatai_api_router.post("/train_chatbot")
431
+ async def train_chatbot(request: TrainChatbotRequest):
432
+ vectorstore, url_sources = request.vectorstore, request.urls
433
+ logger.info(f">>>train_chatbot API Triggered By {vectorstore}<<<")
434
+ try:
435
+ texts = []
436
+ sources = []
437
+ fileTypes = [
438
+ supabase_client.table("ChatAI_ChatbotDataSources").select("sourceEndpoint").eq("sourceContentURL",
439
+ x).execute().data[0][
440
+ "sourceEndpoint"] for x in url_sources]
441
+ for source, fileType in zip(url_sources, fileTypes):
442
+ if ((fileType == "/text_pdf_extraction") | (fileType == "/image_pdf_text_extraction")):
443
+ logger.info(f"Source is {source}")
444
+ r = requests.get(source)
445
+ file = eval(r.content.decode("utf-8", errors="replace"))
446
+ content = file["output"]
447
+ fileSource = file["source"]
448
+ texts.append(".".join(
449
+ [content[key] for key in content.keys()]).replace(
450
+ "\n", " "))
451
+
452
+ sources.append(fileSource)
453
+ elif fileType == "/add_text" or fileType == "/add_qa_pair":
454
+ r = requests.get(source)
455
+ file = eval(r.content.decode("utf-8", errors="replace"))
456
+ content = file["output"]["text"]
457
+ fileSource = file["source"]
458
+ texts.append(content.replace("\n", " "))
459
+ sources.append(fileSource)
460
+ elif ((fileType == "/fetch_text/urls") | (fileType == "/youtube_transcript")):
461
+ r = requests.get(source)
462
+ file = eval(r.content.decode("utf-8", errors="replace"))
463
+ content = file["output"]
464
+ fileSource = file["source"]
465
+ texts.append(".".join(
466
+ [content[key] for key in content.keys()]).replace(
467
+ "\n", " "))
468
+ sources.append(fileSource)
469
+ else:
470
+ pass
471
+ texts = [(text, source) for text, source in zip(texts, sources)]
472
+ ChatAI_pipeline.add_document_(texts, vectorstore)
473
+ response = create_success_response(200, {"message": "Chatbot trained successfully."})
474
+ logger.info(f">>>Chatbot trained successfully for {vectorstore}.<<<")
475
+
476
+ return response
477
+
478
+
479
+ except Exception as e:
480
+ logger.error(f">>>Error in train_chatbot: {e} for {vectorstore}.<<<")
481
+ raise e
482
+
483
+
484
+
485
+
486
+
487
+ @chatai_api_router.post("/new_chatbot")
488
+ async def new_chatbot(request: NewChatbotRequest):
489
+ logger.info(f">>> new_chatbot API Triggered <<<")
490
+ try:
491
+ response = user_management.new_chatbot_(chatbot_name=request.chatbot_name, username=request.username)
492
+ logger.info(f">>> Chatbot created successfully for {request.username}.<<<")
493
+ return response
494
+
495
+ except Exception as e:
496
+ logger.error(f">>>Error in new_chatbot: {e} for {request.username}.<<<")
497
+ raise_http_exception(500, "Internal Server Error")
core/api/jwt_bearer.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import Depends
3
+ from supabase import create_client
4
+ from core import logging as logger
5
+ from core.utils.error_handling import create_error_response, raise_http_exception
6
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ security = HTTPBearer()
11
+
12
+ supabase_client = create_client(
13
+ os.getenv("SUPABASE_URL"),
14
+ os.getenv("SUPABASE_KEY")
15
+ )
16
+
17
+
18
+ async def access_check_bearer(credentials: HTTPAuthorizationCredentials = Depends(security)):
19
+ access_token = credentials.credentials
20
+ try:
21
+ supabase_client.auth.get_user(access_token)
22
+
23
+ except Exception as e:
24
+ logger.info(f">>> Invalid access token {e}<<<")
25
+ raise_http_exception(code=401,
26
+ message="Invalid Access Token", details=[
27
+ {"info": "Invalid access token or access token expired please login again"}])
core/api/user_management_api.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Depends
2
+ from core import logging as logger
3
+ from core.models.apis_models import *
4
+ from fastapi.routing import APIRouter
5
+ from core.api.jwt_bearer import access_check_bearer, supabase_client
6
+ from core.pipeline.user_management_pipeline import SupabaseUserManagementPipeline
7
+
8
+ from core.utils.error_handling import raise_http_exception, create_success_response, create_error_response
9
+
10
+ user_management_api_router = APIRouter(tags=["User Management"])
11
+
12
+ user_management = SupabaseUserManagementPipeline()
13
+
14
+
15
+ @user_management_api_router.post("/user_signup")
16
+ async def user_signup(request: UserSignupRequest):
17
+ logger.info(f">>>user_signup API Triggered <<<")
18
+ response = user_management.user_signup_(username=request.username, password=request.password,email=request.email)
19
+ logger.info(f">>>user_signup API Success<<<")
20
+
21
+ return response
22
+
23
+
24
+ @user_management_api_router.post("/user_signin")
25
+ async def user_signin(request: UserSigninRequest):
26
+ logger.info(f">>>user_signin API Triggered <<<")
27
+
28
+ response = user_management.user_signin_(username=request.username, password=request.password,email=request.email)
29
+ if response != None:
30
+ logger.info(f">>>user_signin API Success.<<<")
31
+ return response
32
+ else:
33
+ logger.info(f">>> Username or password is incorrect please try again.<<<")
34
+ response = create_error_response(400, "Username or password is incorrect please try again.")
35
+ return response
36
+
37
+
38
+ @user_management_api_router.post("/get_user_data")
39
+ async def get_user_data(request: GetUserDataRequest):
40
+ logger.info(f">>>get_user_data API Triggered <<<")
41
+ response = user_management.get_user_data_(access_token=request.access_token)
42
+ return response
43
+
44
+
45
+ @user_management_api_router.post("/login_with_access_token")
46
+ async def login_with_access_token(request: LoginWithAccessTokenRequest):
47
+ logger.info(f">>>login_with_access_token API Triggered <<<")
48
+
49
+ response = user_management.login_with_access_token_(access_token=request.access_token,
50
+ refresh_token=request.refresh_token)
51
+ logger.info(f">>>login_with_access_token API Success<<<")
52
+ return response
53
+
54
+
55
+ @user_management_api_router.post("/set_session_data")
56
+ async def set_session_data(request: SetSessionDataRequest):
57
+ logger.info(f">>> set_session_data API Triggered <<<")
58
+
59
+ response = user_management.set_session_data_(access_token=request.access_token, refresh_token=request.refresh_token,
60
+ user_id=request.user_id)
61
+ return response
62
+
63
+
64
+ @user_management_api_router.post("/sign_out")
65
+ async def sign_out():
66
+ logger.info(f">>> sign_out API Triggered <<<")
67
+
68
+ response = user_management.sign_out_()
69
+ logger.info(f">>>sign_out API Success<<<")
70
+ return response
71
+
72
+
73
+ @user_management_api_router.post("/oauth_signin")
74
+ async def oauth_signin():
75
+ logger.info(f">>> oauth_signin API Triggered <<<")
76
+ response = user_management.oauth_signin_()
77
+ logger.info(f">>>oauth_signin API Success<<<")
78
+ return response
79
+
80
+
81
+ @user_management_api_router.post("/check_session")
82
+ async def check_session():
83
+ logger.info(f">>>check_session API Triggered <<<")
84
+
85
+ response = user_management.check_session_()
86
+ return response
87
+
88
+
89
+ @user_management_api_router.get("/get_public_chatbot")
90
+ async def get_public_chatbots():
91
+ logger.info(f">>>get_public_chatbot API Triggered<<<")
92
+ try:
93
+ response = supabase_client.table("ChatAI_ChatbotInfo").select("*").eq("isPrivate", False).execute().data
94
+ logger.info(f">>>Public chatbots fetched successfully.<<<")
95
+ return response
96
+ except Exception as e:
97
+ logger.error(f">>>Error in get_public_chatbot: {e}<<<")
98
+ raise_http_exception(500, "Internal Server Error")
99
+
100
+
101
+ @user_management_api_router.post("/public_private_check")
102
+ async def public_or_private(request: PublicPrivateCheckRequest):
103
+ vectorstore, mode = request.vectorstore, request.mode
104
+ logger.info(f">>>public_private_check API Triggered for {vectorstore}.<<<")
105
+ try:
106
+
107
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
108
+ if len(mode) == 0:
109
+ value = (
110
+ supabase_client.table("ChatAI_ChatbotInfo")
111
+ .select("isPrivate")
112
+ .eq("user_id", username)
113
+ .eq("chatbotname", chatbot_name)
114
+ .execute()
115
+ )
116
+ value = value.data[0]["isPrivate"]
117
+ response = create_success_response(200, {"output": value})
118
+ else:
119
+ response = (
120
+ supabase_client.table("ChatAI_ChatbotInfo")
121
+ .update({"isPrivate": mode})
122
+ .eq("user_id", username)
123
+ .eq("chatbotname", chatbot_name)
124
+ .execute()
125
+ )
126
+ response = create_success_response(200, {"output": response})
127
+ logger.info(f">>>Public/Private check successful for {vectorstore}.<<<")
128
+ return response
129
+
130
+
131
+ except Exception as e:
132
+ logger.error(f">>>Error in public_private_check: {e} for {vectorstore}.<<<")
133
+ raise_http_exception(500, "Internal Server Error")
134
+
135
+
136
+ @user_management_api_router.post("/refresh_session", dependencies=[Depends(access_check_bearer)])
137
+ async def refresh_session(request: RefreshSessionRequest):
138
+ logger.info(f">>>refresh_session API Triggered <<<")
139
+ response = user_management.refresh_session__(refresh_token=request.refresh_token)
140
+ logger.info(f">>>refresh token fetched successfully.<<<")
141
+
142
+ return response
143
+
144
+
145
+ @user_management_api_router.post("/username_creation_oauth", dependencies=[Depends(access_check_bearer)])
146
+ async def username_creation_oauth(request: UsernameCreationOauthRequest):
147
+ logger.info(f">>> username_creation_oauth API Triggered <<<")
148
+
149
+ response = user_management.username_creation_oauth_(username=request.username,email=request.email, user_id=request.user_id)
150
+
151
+ logger.info(f">>>username creation successful.<<<")
152
+ return response
core/models/__init__.py ADDED
File without changes
core/models/apis_models.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel,EmailStr
2
+ from typing import List, Optional
3
+
4
+
5
+
6
+
7
+ ## ---------------------------------- Chatbot API Models ----------------------------------
8
+
9
+ class AddTextRequest(BaseModel):
10
+ vectorstore: str
11
+ text: str
12
+
13
+
14
+ class AddWebsiteRequest(BaseModel):
15
+ website_urls: List[str]
16
+ vectorstore: str
17
+ source: str
18
+
19
+
20
+ class AnswerQueryRequest(BaseModel):
21
+ query: str
22
+ vectorstore: str
23
+ llm_model: str = "llama3-70b-8192"
24
+
25
+ class GetLinksRequest(BaseModel):
26
+ url: str
27
+
28
+
29
+
30
+ class WebsiteUrlTextExtractionRequest(BaseModel):
31
+ url: str
32
+
33
+
34
+ class WebsiteUrlTextExtractionListRequest(BaseModel):
35
+ urls: List[str]
36
+
37
+
38
+ class GetCurrentCountRequest(BaseModel):
39
+ vectorstore: str
40
+
41
+
42
+ class ListChatbotsRequest(BaseModel):
43
+ username: str
44
+
45
+
46
+ class GetChatHistoryRequest(BaseModel):
47
+ vectorstore: str
48
+
49
+
50
+ class ChatHistoryItem(BaseModel):
51
+ timestamp: str
52
+ question: str
53
+ response: str
54
+
55
+
56
+ class DeleteChatbotRequest(BaseModel):
57
+ vectorstore: str
58
+
59
+
60
+ class TrainChatbotRequest(BaseModel):
61
+ vectorstore: str
62
+ urls: list[str]
63
+
64
+
65
+ class LoadPDFRequest(BaseModel):
66
+ vectorstore: str
67
+
68
+
69
+ class LoadEditedJson(BaseModel):
70
+ vectorstore: str
71
+ data_source_name: str
72
+ source_endpoint: str
73
+ json_data: dict
74
+
75
+
76
+ class PublicPrivateCheckRequest(BaseModel):
77
+ vectorstore: str
78
+ mode: str | None = None
79
+
80
+
81
+ class DeleteChatbotSourceRequest(BaseModel):
82
+ vectorstore: str
83
+ data_source_name: str
84
+
85
+
86
+ ## ---------------------------------- User Management API Models ----------------------------------
87
+
88
+ class UserSignupRequest(BaseModel):
89
+ username: str
90
+ email:EmailStr
91
+ password: str
92
+
93
+
94
+ class UserSigninRequest(BaseModel):
95
+ username:str
96
+ email:EmailStr
97
+
98
+ password: str
99
+
100
+
101
+ class CheckSessionRequest(BaseModel):
102
+ user_id: str
103
+
104
+
105
+ class GetUserDataRequest(BaseModel):
106
+ access_token: str
107
+
108
+
109
+ class RefreshSessionRequest(BaseModel):
110
+ refresh_token: str
111
+
112
+
113
+ class LoginWithAccessTokenRequest(BaseModel):
114
+ access_token: str
115
+ refresh_token: str
116
+
117
+
118
+ class UsernameCreationOauthRequest(BaseModel):
119
+ username: str
120
+ email: str
121
+ user_id: str
122
+
123
+
124
+
125
+ class SetSessionDataRequest(BaseModel):
126
+ access_token: str
127
+ refresh_token: str
128
+ user_id: str
129
+
130
+
131
+ class SignOutRequest(BaseModel):
132
+ user_id: str
133
+
134
+
135
+ class NewChatbotRequest(BaseModel):
136
+ chatbot_name: str
137
+ username: str
core/models/response_handling_models.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+ from pydantic import BaseModel
3
+
4
+ class SuccessResponse(BaseModel):
5
+ status: str
6
+ code: int
7
+ data: Dict[str, Any]
8
+
9
+
10
+ class ErrorResponse(BaseModel):
11
+ status: str
12
+ error: Dict[str, Any]
13
+
14
+
15
+ class SuccessResponseUsermanagement(BaseModel):
16
+ status: str
17
+ message: str
18
+ code: int
19
+ data: Dict[str, Any]
core/models/utls.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class FollowUps(BaseModel):
5
+ q1: str = Field(description="First Follow-up Question")
6
+ q2: str = Field(description="Second Follow-up Question")
7
+ q3: str = Field(description="Third Follow-up Question")
8
+
core/pipeline/__init__.py ADDED
File without changes
core/pipeline/chataipipeline.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.prompts import PromptTemplate
4
+ from core.prompts.custom_prompts import _custom_prompts
5
+ from core.services.answer_query.answerquery import AnswerQuery
6
+ from core.services.document.add_document import AddDocument
7
+ from core.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
8
+ from core.services.embeddings.jina_embeddings import jina_embedding
9
+ from core.services.get_links.web_scraper import WebScraper
10
+ from core.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR as OCRService
11
+ from core.services.pdf_extraction.image_pdf.image_pdf_text_extraction import get_text_from_image_pdf
12
+ from core.services.pdf_extraction.text_pdf.text_pdf_extraction import extract_text_from_pdf
13
+ from core.services.website_url.text_extraction_urlsnew import WebScrapertext
14
+ from core.utils.utils import json_parser
15
+
16
+
17
+ class ChatAIPipeline:
18
+ def __init__(self):
19
+ prompt_template = _custom_prompts["RAG_ANSWER_PROMPT"]
20
+ follow_up_prompt_template = _custom_prompts["FOLLOW_UP_PROMPT"]
21
+ prompt = ChatPromptTemplate.from_template(prompt_template)
22
+ json_parser_ = json_parser()
23
+ follow_up_prompt = PromptTemplate(
24
+ template=follow_up_prompt_template,
25
+ input_variables=["context"],
26
+ partial_variables={"format_instructions": json_parser_.get_format_instructions()},
27
+ )
28
+ self.vector_embedding = jina_embedding()
29
+ self.sparse_embedding = qdrant_bm25_embedding()
30
+ self.add_document_service = AddDocument(self.vector_embedding, self.sparse_embedding)
31
+
32
+ self.answer_query_service = AnswerQuery(vector_embedding=self.vector_embedding,
33
+ sparse_embedding=self.sparse_embedding, prompt=prompt,
34
+ follow_up_prompt=follow_up_prompt, json_parser=json_parser_)
35
+ self.get_website_links = WebScraper()
36
+ self.ocr_service = OCRService()
37
+ self.web_text_extractor = WebScrapertext()
38
+
39
+ def add_document_(self, texts: list[tuple[str]], vectorstore: str):
40
+ return self.add_document_service.add_documents(texts=texts, vectorstore=vectorstore)
41
+
42
+ def answer_query_(self, query: str, vectorstore: str, llm_model: str = "llama-3.3-70b-versatile"):
43
+ output, follow_up_questions, source = self.answer_query_service.answer_query(query=query,
44
+ vectorstore=vectorstore,
45
+ llmModel=llm_model)
46
+ return output, follow_up_questions, source
47
+
48
+ def get_links_(self, url: str, timeout: int):
49
+ return self.get_website_links.get_links(url=url, timeout=timeout)
50
+
51
+ def image_pdf_text_extraction_(self, image_pdf: bytes):
52
+ return get_text_from_image_pdf(pdf_bytes=image_pdf)
53
+
54
+ def text_pdf_extraction_(self, pdf: str):
55
+ return extract_text_from_pdf(pdf_path=pdf)
56
+
57
+ def website_url_text_extraction_(self, url: str):
58
+ return self.web_text_extractor.extract_text_from_url(url=url)
59
+
60
+ def website_url_text_extraction_list_(self, urls: list):
61
+ return self.web_text_extractor.extract_text_from_urls(urls=urls)
core/pipeline/user_management_pipeline.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from core.services.supabase.user_management.user_service import UserManagement
3
+ from core.services.supabase.user_management.chatbot_management import SupabaseChatoBotManagement
4
+ from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
5
+ from core.services.supabase.user_management.chat_history import get_chat_history
6
+ from supabase.client import create_client
7
+ from qdrant_client import QdrantClient
8
+ import os
9
+
10
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
11
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
12
+
13
+ QDRANT_URL = os.getenv("QDRANT_URL")
14
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
15
+
16
+
17
+ class SupabaseUserManagementPipeline:
18
+ def __init__(self):
19
+ self.supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
20
+ self.qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
21
+ self.user_management = UserManagement(supabase_client=self.supabase_client)
22
+ self.chatbot_management = SupabaseChatoBotManagement(supabase_client=self.supabase_client,
23
+ qdrant_client=self.qdrant_client)
24
+ self.user_db_setup = ChatAIUserDBSetup(supabase_client=self.supabase_client)
25
+
26
+ def user_signup_(self, username: str,password: str,email:str) -> dict:
27
+ return self.user_management.user_signup(username=username, password=password, email=email)
28
+
29
+ def user_signin_(self, username: str, password: str,email:str) -> dict:
30
+ return self.user_management.user_signin(username=username, password=password,email=email)
31
+
32
+ def check_session_(self) -> dict:
33
+ return self.user_management.check_session()
34
+
35
+ def get_user_data_(self, access_token: str) -> dict:
36
+ return self.user_management.get_user_data(access_token=access_token)
37
+
38
+ def refresh_session__(self, refresh_token: str) -> dict:
39
+ return self.user_management.refresh_session_(refresh_token=refresh_token)
40
+
41
+ def login_with_access_token_(self, access_token: str, refresh_token: str) -> dict:
42
+ return self.user_management.login_with_access_token(access_token=access_token, refresh_token=refresh_token)
43
+
44
+ def username_creation_oauth_(self, username: str, user_id: str):
45
+ return self.user_management.user_name_creation_oauth(user_id=user_id, username=username)
46
+
47
+ def set_session_data_(self, access_token: str, refresh_token: str, user_id: str):
48
+ return self.user_management.set_session_data(access_token=access_token, refresh_token=refresh_token,
49
+ user_id=user_id)
50
+
51
+ def sign_out_(self):
52
+ return self.user_management.sign_out_()
53
+
54
+ def oauth_signin_(self) -> dict:
55
+ return self.user_management.oauth()
56
+
57
+ def new_chatbot_(self, chatbot_name: str, username: str):
58
+ return self.chatbot_management.new_chatbot(chatbot_name=chatbot_name, username=username)
59
+
60
+ def get_chat_history_(self, vectorstore: str):
61
+ return get_chat_history(vectorstore=vectorstore, supabase_client=self.supabase_client)
62
+
63
+ def delete_table(self, table_name: str):
64
+ return self.chatbot_management.delete_table(table_name=table_name)
65
+
66
+ def list_tables(self, username: str):
67
+ return self.chatbot_management.list_tables(username=username)
68
+
69
+ def create_data_source_name(self, source_name: str, username: str):
70
+ return self.chatbot_management.create_data_source_name(source_name=source_name, username=username)
71
+
72
+ def delete_qdrant_cluster(self, vectorstorename):
73
+ self.qdrant_client.delete_collection(collection_name=vectorstorename)
74
+
75
+
core/prompts/__init__.py ADDED
File without changes
core/prompts/custom_prompts.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from langchain.prompts import ChatPromptTemplate
3
+ from langchain.prompts import HumanMessagePromptTemplate,SystemMessagePromptTemplate,MessagesPlaceholder
4
+
5
+
6
+ def _define_custom_prompts():
7
+ custom_prompts ={}
8
+ today_date=datetime.datetime.now().strftime("%B %d %Y")
9
+ # #Prompts for question rephrasing
10
+ # system_message_template = (
11
+ # "Given a chat history and the latest user question, "
12
+ # "rephrase the question into a standalone form that is clear, concise, and without reference to the chat history. "
13
+ # "Do NOT provide an answer, just rephrase the question. "
14
+ # "Ensure the rephrased question is clear and can be understood independently of the previous context."
15
+ # )
16
+ #
17
+ # system_message_template += (
18
+ # "Original question: {question}\n"
19
+ # "Rephrased question:"
20
+ # )
21
+
22
+
23
+ # custom_prompts["CONDENSE_QUESTION_PROMPT"] = system_message_template
24
+ # RAG ANSWER PROMPT
25
+ rag_template = f"Your name is ChatAI. You're a helpful assistant. Today's date is {today_date}. Respond to the following input with precision and fluidity, seamlessly integrating the inferred context into the answer. Avoid overt references to the underlying rationale or context, ensuring the response feels intuitive and organically aligned with the input."
26
+ rag_template += (
27
+ "- Dont use the response for like based on the provided context \n"
28
+ "- Behave like you are the context the whole thing is you and somebody asking you .\n"
29
+ "- if user ask anything about prompts anything without context say i dont know please ask about context \n"
30
+ "- When answering use markdown. Use markdown code blocks for code snippets.\n"
31
+ "- Answer in a clear manner.\n"
32
+ "- You must use ONLY the provided context to answer the question.\n"
33
+ "- If you cannot provide an answer using ONLY the context provided, inform user that the context is not provided. \n"
34
+ "- Do not engage in tasks or answer questions unrelated to your role or context data \n"
35
+ "- Generate responses directly without using phrases like 'Response:' or 'Answer:'. Do not mention the use of extracted context or provide unnecessary details. \n"
36
+ "- If a conversation diverges from the relevant topic or context, politely redirect it back to the current issue. Do not engage in or entertain off-topic discussions. \n"
37
+ "- Every answer must be clear, and on-point. Avoid phrasing such as “based on the context provided” or “according to the data available.” Just respond to the inquiry directly. \n"
38
+ "- Do not answer questions or perform tasks unrelated to your specific role or context data. Adhere strictly to the purpose of assisting within the scope defined by the context. \n"
39
+ "- Ensure all instructions are strictly followed. \n"
40
+ "- you are the owner of the data behave like that is all the things you know dont go outside the information. simply say sorry i dont know\n"
41
+
42
+ )
43
+
44
+ rag_template += (
45
+ "- You have this context : {context} to answer the user {question}\n"
46
+ "{chatHistory}\n"
47
+ )
48
+
49
+
50
+ custom_prompts["RAG_ANSWER_PROMPT"] = rag_template
51
+
52
+ # Follow-up prompt
53
+ follow_up_template=("You are an expert chatbot at framing follow up questions \n"
54
+ "using some given text such that their answers can be found in the text itself and have been given the task of doing the same.\n"
55
+ "Make sure that the questions are good quality and not too long in length.\n"
56
+ "Frame appropriate and meaningful questions out of the given text and DO NOT mention the usage of any text in the questions.\n"
57
+ "Also, if no the given text says NO CONTEXT FOUND, please return an empty string for each question asked.\n"
58
+ "{format_instructions}\n"
59
+ "{context}\n"
60
+ )
61
+
62
+ custom_prompts["FOLLOW_UP_PROMPT"]=follow_up_template
63
+
64
+
65
+ return custom_prompts
66
+
67
+ _custom_prompts =_define_custom_prompts()
core/services/__init__.py ADDED
File without changes
core/services/answer_query/__init__.py ADDED
File without changes
core/services/answer_query/answerquery.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from langchain.memory import ChatMessageHistory
4
+ from langchain.retrievers import ContextualCompressionRetriever
5
+ from langchain_community.document_compressors import JinaRerank
6
+ from langchain_core.chat_history import BaseChatMessageHistory
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
9
+ from langchain_core.runnables.history import RunnableWithMessageHistory
10
+ from langchain_groq import ChatGroq
11
+
12
+ from core.services.vector_db.qdrent.upload_document import answer_query_from_existing_collection
13
+
14
+ os.environ["JINA_API_KEY"] = os.getenv("JINA_API")
15
+
16
+
17
+ class AnswerQuery:
18
+ def __init__(self, prompt, vector_embedding, sparse_embedding, follow_up_prompt, json_parser):
19
+ self.chat_history_store = {}
20
+ self.compressor = JinaRerank(model="jina-reranker-v2-base-multilingual")
21
+ self.vector_embed = vector_embedding
22
+ self.sparse_embed = sparse_embedding
23
+ self.prompt = prompt
24
+ self.follow_up_prompt = follow_up_prompt
25
+ self.json_parser = json_parser
26
+
27
+ def format_docs(self, docs: str):
28
+ global sources
29
+ global temp_context
30
+ sources = []
31
+ context = ""
32
+ for doc in docs:
33
+ context += f"{doc.page_content}\n\n\n"
34
+ source = doc.metadata
35
+ source = source["source"]
36
+ sources.append(source)
37
+ if context == "":
38
+ context = "No context found"
39
+ else:
40
+ pass
41
+ sources = list(set(sources))
42
+ temp_context = context
43
+ return context
44
+
45
+ def answer_query(self, query: str, vectorstore: str, llmModel: str = "llama-3.3-70b-versatile"):
46
+ global sources
47
+ global temp_context
48
+ vector_store_name = vectorstore
49
+ vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
50
+ sparse_embed=self.sparse_embed,
51
+ vectorstore=vectorstore)
52
+
53
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
54
+ compression_retriever = ContextualCompressionRetriever(
55
+ base_compressor=self.compressor, base_retriever=retriever
56
+ )
57
+ brain_chain = (
58
+ {"context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(
59
+ self.format_docs),
60
+ "question": RunnableLambda(lambda x: x["question"]),
61
+ "chatHistory": RunnableLambda(lambda x: x["chatHistory"])}
62
+ | self.prompt
63
+ | ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
64
+ | StrOutputParser()
65
+ )
66
+ message_chain = RunnableWithMessageHistory(
67
+ brain_chain,
68
+ self.get_session_history,
69
+ input_messages_key="question",
70
+ history_messages_key="chatHistory"
71
+ )
72
+ chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
73
+ follow_up_chain = self.follow_up_prompt | ChatGroq(model_name="llama-3.3-70b-versatile",
74
+ temperature=0) | self.json_parser
75
+
76
+ output = chain.invoke(
77
+ {"question": query},
78
+ {"configurable": {"session_id": vector_store_name}}
79
+ )
80
+ follow_up_questions = follow_up_chain.invoke({"context": temp_context})
81
+
82
+ return output, follow_up_questions, sources
83
+
84
+ async def answer_query_stream(self, query: str, vectorstore: str, llmModel):
85
+ global sources
86
+ global temp_context
87
+
88
+ vector_store_name = vectorstore
89
+ vector_store = answer_query_from_existing_collection(
90
+ vector_embed=self.vector_embed,
91
+ sparse_embed=self.sparse_embed,
92
+ vectorstore=vectorstore
93
+ )
94
+
95
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
96
+ compression_retriever = ContextualCompressionRetriever(
97
+ base_compressor=self.compressor,
98
+ base_retriever=retriever
99
+ )
100
+
101
+ brain_chain = (
102
+ {
103
+ "context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(
104
+ self.format_docs),
105
+ "question": RunnableLambda(lambda x: x["question"]),
106
+ "chatHistory": RunnableLambda(lambda x: x["chatHistory"])
107
+ }
108
+ | self.prompt
109
+ | ChatGroq(
110
+ model=llmModel,
111
+ temperature=0.75,
112
+ max_tokens=512,
113
+ streaming=True
114
+ )
115
+ | StrOutputParser()
116
+ )
117
+
118
+ message_chain = RunnableWithMessageHistory(
119
+ brain_chain,
120
+ self.get_session_history,
121
+ input_messages_key="question",
122
+ history_messages_key="chatHistory"
123
+ )
124
+
125
+ chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
126
+
127
+ async for chunk in chain.astream(
128
+ {"question": query},
129
+ {"configurable": {"session_id": vector_store_name}}
130
+ ):
131
+ yield {
132
+ "type": "main_response",
133
+ "content": chunk
134
+ }
135
+
136
+ follow_up_chain = self.follow_up_prompt | ChatGroq(
137
+ model_name="llama-3.3-70b-versatile",
138
+ temperature=0
139
+ ) | self.json_parser
140
+
141
+ follow_up_questions = await follow_up_chain.ainvoke({"context": temp_context})
142
+
143
+ yield {
144
+ "type": "follow_up_questions",
145
+ "content": follow_up_questions
146
+ }
147
+
148
+ yield {
149
+ "type": "sources",
150
+ "content": sources
151
+ }
152
+
153
+ def trim_messages(self, chain_input):
154
+ for store_name in self.chat_history_store:
155
+ messages = self.chat_history_store[store_name].messages
156
+ if len(messages) <= 1:
157
+ pass
158
+ else:
159
+ self.chat_history_store[store_name].clear()
160
+ for message in messages[-1:]:
161
+ self.chat_history_store[store_name].add_message(message)
162
+ return True
163
+
164
+ def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
165
+ if session_id not in self.chat_history_store:
166
+ self.chat_history_store[session_id] = ChatMessageHistory()
167
+ return self.chat_history_store[session_id]
core/services/document/__init__.py ADDED
File without changes
core/services/document/add_document.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import string
3
+ from langchain.docstore.document import Document
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from core.services.vector_db.qdrent.upload_document import upload_document_existing_collection
6
+
7
+
8
+ class AddDocument:
9
+ def __init__(self, vector_embedding, sparse_embedding):
10
+ self.vector_embed = vector_embedding
11
+ self.sparse_embed = sparse_embedding
12
+
13
+ def clean_text(self,text:str)->str:
14
+ text=text.replace("\n", "")
15
+ text = text.translate(str.maketrans('', '', string.punctuation.replace(".", "")))
16
+ text = text.encode('utf-8', errors='ignore').decode('utf-8')
17
+ return text
18
+ def add_documents(self, texts: list[tuple[str]], vectorstore: str):
19
+ splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=400,
21
+ chunk_overlap=100,
22
+ add_start_index=True
23
+ )
24
+ contents, sources = zip(*texts)
25
+ cleaned_texts = [self.clean_text(text) for text in contents]
26
+
27
+ # Create Document objects
28
+ docs = [
29
+ Document(page_content=text, metadata={"source": source})
30
+ for text, source in zip(cleaned_texts, sources)
31
+ ]
32
+
33
+ # Split documents and upload
34
+ documents = splitter.split_documents(docs)
35
+ upload_document_existing_collection(vector_embed=self.vector_embed,
36
+ sparse_embed=self.sparse_embed,
37
+ vectorstore=vectorstore, documents=documents)
core/services/embeddings/Qdrant_BM25_embedding.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_qdrant import FastEmbedSparse
3
+
4
+
5
+ def qdrant_bm25_embedding():
6
+ instance = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
7
+
8
+ return instance
core/services/embeddings/__init__.py ADDED
File without changes
core/services/embeddings/jina_embeddings.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings import JinaEmbeddings
2
+ import os
3
+
4
+ def jina_embedding():
5
+ text_embeddings=JinaEmbeddings(
6
+ jina_api_key=os.getenv('JINA_API'),
7
+ model_name="jina-embeddings-v3")
8
+ return text_embeddings
core/services/get_links/__init__.py ADDED
File without changes
core/services/get_links/web_scraper.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urlparse, urljoin
5
+ from concurrent.futures import ThreadPoolExecutor
6
+
7
+ class WebScraper:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def get_links(self,url: str, timeout=4):
12
+ start = time.time()
13
+
14
+ def get_links_from_page(url: str) -> list:
15
+ response = requests.get(url)
16
+ soup = BeautifulSoup(response.content, "lxml")
17
+ anchors = soup.find_all("a")
18
+ links = []
19
+ for anchor in anchors:
20
+ if "href" in anchor.attrs:
21
+ if urlparse(anchor.attrs["href"]).netloc == urlparse(url).netloc:
22
+ links.append(anchor.attrs["href"])
23
+ elif not anchor.attrs["href"].startswith(("//", "file", "javascript", "tel", "mailto", "http")):
24
+ links.append(urljoin(url + "/", anchor.attrs["href"]))
25
+ else:
26
+ pass
27
+ links = [link for link in links if "#" not in link]
28
+ links = list(set(links))
29
+ else:
30
+ continue
31
+ return links
32
+
33
+ links = get_links_from_page(url)
34
+ unique_links = set()
35
+ for link in links:
36
+ now = time.time()
37
+ if now - start > timeout:
38
+ break
39
+ else:
40
+ unique_links = unique_links.union(set(get_links_from_page(link)))
41
+ return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in unique_links]))
42
+
core/services/ocr/__init__.py ADDED
File without changes
core/services/ocr/replicate_ocr/__init__.py ADDED
File without changes
core/services/ocr/replicate_ocr/replicate_ocr.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import base64
3
+ import cv2
4
+ import replicate
5
+
6
+
7
+ def replicate_run(input):
8
+ output = replicate.run(
9
+ "abiruyt/text-extract-ocr:a524caeaa23495bc9edc805ab08ab5fe943afd3febed884a4f3747aa32e9cd61",
10
+ input=input
11
+ )
12
+ return output
13
+
14
+
15
+ class ReplicateOCR:
16
+ def __init__(self):
17
+ pass
18
+
19
+ def read_text(self, image_path):
20
+ _, buffer = cv2.imencode('.png', image_path)
21
+ base_64_image = base64.b64encode(buffer).decode('utf-8')
22
+
23
+ input_image_uri = f"data:image/png;base64,{base_64_image}"
24
+ input = {
25
+ "image": input_image_uri,
26
+ }
27
+ output = replicate_run(input)
28
+ if output:
29
+ output = output.replace("\n", " ").replace("\t", " ")
30
+ return output
31
+
32
+ else:
33
+ return None
core/services/pdf_extraction/__init__.py ADDED
File without changes
core/services/pdf_extraction/image_pdf/__init__.py ADDED
File without changes
core/services/pdf_extraction/image_pdf/image_pdf_text_extraction.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import concurrent.futures
3
+ import numpy as np
4
+ from pdf2image import convert_from_bytes
5
+
6
+ from core import logging as logger
7
+ from core.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR
8
+
9
+
10
+ def get_text_from_image_pdf(pdf_bytes: bytes):
11
+ easy_ocr = ReplicateOCR()
12
+
13
+ def process_image(args: tuple) -> str:
14
+ index, image = args
15
+ gray_image = image.convert('L')
16
+ np_image = np.array(gray_image)
17
+ text = easy_ocr.read_text(np_image)
18
+ logger.debug(f"Processed page {index + 1}")
19
+ return text
20
+
21
+ all_images = convert_from_bytes(
22
+ pdf_bytes,
23
+ thread_count=1,
24
+ grayscale=True,
25
+ size=(1700, None)
26
+ )
27
+
28
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
29
+ image_data = list(enumerate(all_images))
30
+ texts = list(executor.map(process_image, image_data))
31
+
32
+ logger.info("OCR processing completed")
33
+ try:
34
+ return {i + 1: text for i, text in enumerate(texts)}
35
+
36
+ except Exception as e:
37
+ logger.error(f"Error in OCR processing: {e}")
38
+ return None
core/services/pdf_extraction/text_pdf/__init__.py ADDED
File without changes
core/services/pdf_extraction/text_pdf/text_pdf_extraction.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import base64
3
+ import pymupdf
4
+ from concurrent.futures import ThreadPoolExecutor
5
+
6
+
7
+ def extract_text_from_page(page):
8
+ text = page.get_text()
9
+ return text
10
+
11
+
12
+ def extract_text_from_pdf(pdf_path: str):
13
+ doc = pymupdf.open(pdf_path)
14
+ pages = [doc.load_page(i) for i in range(len(doc))]
15
+ with ThreadPoolExecutor() as executor:
16
+ texts = list(executor.map(extract_text_from_page, pages))
17
+
18
+ doc.close()
19
+ return {x + 1: y for x, y in enumerate(texts)}
core/services/supabase/__init__.py ADDED
File without changes
core/services/supabase/chat_ai_setup/__init__.py ADDED
File without changes
core/services/supabase/chat_ai_setup/chataiusermanagement.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ class ChatAIUserDBSetup:
3
+ def __init__(self, supabase_client):
4
+ self.client = supabase_client
5
+
6
+ def setup_new_user(self, user_id: str, username: str,email:str) -> dict:
7
+ userData = self.client.table("ChatAI_UserInfo").select("*").execute().data
8
+ if username not in [userData[x]["username"] for x in range(len(userData))]:
9
+ try:
10
+ self.client.table("ChatAI_UserInfo").insert(
11
+ {"user_id": user_id, "username": username,"email":email}).execute()
12
+
13
+ self.client.table("ChatAI_UserConfig").insert({"user_id": username}).execute()
14
+
15
+ res = {
16
+ "code": 200,
17
+ "message": "User Setup Successful"
18
+ }
19
+
20
+ except Exception as e:
21
+ res = {
22
+ "code": 409,
23
+ "message": "Email already exists",
24
+ }
25
+
26
+ return res
27
+
28
+ else:
29
+ return {
30
+ "code": 409,
31
+ "message": "Username already exists"
32
+ }
core/services/supabase/limit/__init__.py ADDED
File without changes
core/services/supabase/limit/limit_check.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class LimitChecker:
2
+ def __init__(self, supabase_client):
3
+ limits = self.fetch_limits(supabase_client)
4
+ self.url_limit = limits.get("total_website_url")
5
+ self.pdf_limit = limits.get("text_pdf_limit")
6
+ self.image_pdf_limit = limits.get("image_pdf_limit")
7
+
8
+ def fetch_limits(self, supabase_client):
9
+ table = supabase_client.table("ChatAI_limit_check")
10
+ results = table.select("*").execute()
11
+ return results.data[0]
12
+
13
+ def __iter__(self):
14
+ return iter((self.url_limit, self.pdf_limit, self.image_pdf_limit))
core/services/supabase/user_management/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
core/services/supabase/user_management/chat_history.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ def get_chat_history(supabase_client, vectorstore: str):
3
+ username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
4
+ response = supabase_client.table("ChatAI_ChatHistory").select("timestamp", "question", "response").eq(
5
+ "username",
6
+ username).eq(
7
+ "chatbotName", chatbotName).execute().data
8
+ return response
core/services/supabase/user_management/chatbot_management.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import random
3
+ import re
4
+ import string
5
+
6
+ from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
7
+ from core.services.vector_db.qdrent.qdrentvector_db import QdrantVectorStoreDB
8
+ from core.utils.error_handling import create_error_response
9
+ from core.utils.utils import load_ini_config
10
+ from core.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
11
+ from core.services.embeddings.jina_embeddings import jina_embedding
12
+
13
+
14
+
15
+ class SupabaseChatoBotManagement:
16
+ def __init__(self, supabase_client, qdrant_client):
17
+ vector_embeddings = jina_embedding()
18
+ sparse_embeddings = qdrant_bm25_embedding()
19
+ self.config = load_ini_config("config.ini")
20
+ self.qdrant_vector_db = QdrantVectorStoreDB(qdrant_client=qdrant_client, vector_embeddings=vector_embeddings,
21
+ sparse_embeddings=sparse_embeddings)
22
+ self.supabase_client = supabase_client
23
+ self.user_db_setup = ChatAIUserDBSetup(self.supabase_client)
24
+
25
+ def new_chatbot(self, chatbot_name: str, username: str):
26
+ currentBotCount = len(self.qdrant_vector_db.list_tables(username=username))
27
+ limit = self.supabase_client.table("ChatAI_UserConfig").select("chatbotLimit").eq("user_id",
28
+ username).execute().data[
29
+ 0][
30
+ "chatbotLimit"]
31
+ if currentBotCount >= int(limit):
32
+ return create_error_response(code=400, message="You have reached the limit of chatbots you can create")
33
+ self.supabase_client.table("ChatAI_ChatbotInfo").insert(
34
+ {"user_id": username, "chatbotname": chatbot_name}).execute()
35
+ chat_bot_name = f"chatai${username}${chatbot_name}"
36
+ return self.qdrant_vector_db.create_table(table_name=chat_bot_name)
37
+
38
+ def delete_table(self, table_name: str):
39
+ return self.qdrant_vector_db.delete_table(table_name=table_name)
40
+
41
+ def list_tables(self, username: str):
42
+ return self.qdrant_vector_db.list_tables(username=username)
43
+
44
+ def create_data_source_name(self, source_name, username):
45
+ sources = [x["dataSourceName"] for x in
46
+ self.supabase_client.table("ChatAI_ChatbotDataSources").select("dataSourceName").execute().data]
47
+ n_sources = re.sub(r'[^a-zA-Z0-9]', '_', source_name)
48
+ n_sources = n_sources.lower()
49
+ new_source_name = f"chatai_{username}_{n_sources}_{''.join(random.choices(string.ascii_letters + string.digits, k=60))}"
50
+ if new_source_name not in sources:
51
+ return new_source_name
52
+ else:
53
+ return self.create_data_source_name(source_name, username)
54
+
55
+ def get_current_count(self, username):
56
+ data_ = self.supabase_client.table("ChatAI_ChatbotInfo").select("*").filter("user_id", "eq",
57
+ username).execute().data
58
+ current_count = 0
59
+ for data in data_:
60
+ current_count += int(data["charactercount"])
61
+
62
+ return current_count
core/services/supabase/user_management/token_limit.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ from core import logging as logger
4
+
5
+
6
+ def token_limit_check(supabase_client, username, chatbot_name, len_text):
7
+ logger.info(f">>>token_limit_check API Triggered_{username}_{chatbot_name} <<<")
8
+
9
+ try:
10
+ chatbot_response = supabase_client.table("ChatAI_ChatbotInfo") \
11
+ .select("charactercount") \
12
+ .eq("user_id", username) \
13
+ .eq("chatbotname", chatbot_name) \
14
+ .execute()
15
+
16
+ if not chatbot_response.data:
17
+ logger.error(f"Error checking username and chatbotname{username},{chatbot_name}")
18
+ return False
19
+ currentCount = int(chatbot_response.data[0]["charactercount"])
20
+ newCount = currentCount + len_text
21
+ logger.info(f"{newCount}")
22
+
23
+ limit_response = supabase_client.table("ChatAI_UserConfig") \
24
+ .select("tokenLimit") \
25
+ .eq("user_id", username) \
26
+ .execute()
27
+
28
+ limit = int(limit_response.data[0]["tokenLimit"])
29
+
30
+ if newCount < limit:
31
+ supabase_client.table("ChatAI_ChatbotInfo") \
32
+ .update({"charactercount": str(newCount)}) \
33
+ .eq("user_id", username) \
34
+ .eq("chatbotname", chatbot_name) \
35
+ .execute()
36
+
37
+ logger.info(f">>>token_limit_check API Success for user: {username} chatbot: {chatbot_name}<<<")
38
+ return True
39
+ else:
40
+ logger.error(f">>>token_limit_check exceeded for user: {username}{chatbot_name}<<<")
41
+ return False
42
+
43
+ except Exception as e:
44
+ raise e
45
+ return False
core/services/supabase/user_management/user_service.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import jwt
3
+ from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
4
+ from jwt import ExpiredSignatureError, InvalidTokenError
5
+ from fastapi import HTTPException
6
+ from core.utils.error_handling import raise_http_exception, create_success_response, create_error_response, \
7
+ success_response_user_management
8
+ from core.utils.utils import config_read
9
+
10
+ config = config_read()
11
+
12
+
13
+ class UserManagement:
14
+ def __init__(self, supabase_client):
15
+ self.supabase_client = supabase_client
16
+ self.user_db_setup = ChatAIUserDBSetup(self.supabase_client)
17
+
18
+ def user_signup(self, username: str, password: str,email:str) -> dict:
19
+ res, _ = self.supabase_client.auth.sign_up(
20
+ {"email": email, "password": password, "role": "user"}
21
+ )
22
+
23
+ user_id = res[1].id
24
+ r_ = self.user_db_setup.setup_new_user(user_id=user_id, username=username,email=email)
25
+ if r_.get('code') == 409:
26
+ response = create_error_response(code=409, message=r_.get('message'))
27
+ elif r_.get('code') == 200:
28
+ response = success_response_user_management(code=200, message="Signup successful", data={
29
+ "info": "Please check you email address for email verification"})
30
+ else:
31
+ response = raise_http_exception(code=400, message="Failed to sign up please try again later")
32
+ return response
33
+
34
+ def user_signin(self, username: str, password: str,email:str):
35
+ try:
36
+ res = self.supabase_client.auth.sign_in_with_password(
37
+ {"email": email, "password": password}
38
+ )
39
+
40
+ if res != None:
41
+ user_id = res.user.id
42
+ access_token = res.session.access_token
43
+ refresh_token = res.session.refresh_token
44
+
45
+ userData = self.supabase_client.table("ChatAI_UserInfo").select("*").filter("user_id", "eq",
46
+ user_id).execute().data
47
+ username = userData[0]["username"]
48
+
49
+ message = {
50
+ "username": username,
51
+ "user_id": user_id,
52
+ "access_token": access_token,
53
+ "refresh_token": refresh_token,
54
+ }
55
+ response = success_response_user_management(code=200, message="Signin successful", data=message)
56
+
57
+ return response
58
+ else:
59
+ response = create_error_response(code=400, message="Email or password is incorrect please try again",)
60
+ return response
61
+
62
+
63
+
64
+
65
+ except Exception as e:
66
+ create_error_response(code=400, message="Failed to sign in please try again later",
67
+ details=[{"error": str(e)}])
68
+
69
+ def check_session(self):
70
+ res = self.supabase_client.auth.get_session()
71
+ if res == None:
72
+ try:
73
+ self.supabase_client.auth.sign_out()
74
+
75
+ return create_error_response(code=401, message="Session expired please login again")
76
+ except Exception as e:
77
+ create_error_response(code=400, message="Failed to check session")
78
+
79
+ else:
80
+ response = success_response_user_management(code=200, message="Session active", data=dict(res))
81
+ return response
82
+
83
+ def get_user_data(self, access_token: str) -> dict:
84
+ res = self.supabase_client.auth.get_user(jwt=access_token)
85
+ response = success_response_user_management(code=200, message="User data fetched successfully", data=res)
86
+ return response
87
+
88
+ def refresh_session_(self, refresh_token: str) -> dict:
89
+ res = self.supabase_client.auth.refresh_session(refresh_token)
90
+ response = success_response_user_management(code=200, message="Session refreshed successfully", data=res)
91
+
92
+ return response
93
+
94
+ def login_with_access_token(self, access_token: str, refresh_token: str):
95
+ try:
96
+ decoded_token = jwt.decode(access_token, options={"verify_signature": False})
97
+ user_id_oauth = decoded_token.get("username")
98
+ try:
99
+ self.supabase_client.table("ChatAI_UserInfo").select("*").filter("user_id", "eq",
100
+ user_id_oauth).execute()
101
+ user_id = self.supabase_client.table("ChatAI_UserInfo").select("*").filter("email", "eq",
102
+ user_id_oauth).execute()
103
+ if len(user_id.data) == 0:
104
+ user_name = ""
105
+ else:
106
+ user_name = user_id.data[0]["username"]
107
+
108
+ json = {
109
+ "user_id": decoded_token.get("sub"),
110
+ "email": decoded_token.get("email"),
111
+ "access_token": access_token,
112
+ "refresh_token": refresh_token,
113
+ "issued_at": decoded_token.get("iat"),
114
+ "expires_at": decoded_token.get("exp"),
115
+ "username": user_name
116
+
117
+ }
118
+ response = success_response_user_management(code=200, message="Login with access token successful",
119
+ data=json)
120
+ return response
121
+ except:
122
+
123
+ response = create_error_response(code=400, message="Failed to login with access token",
124
+ details=[{"error": "User not found"}])
125
+
126
+ return response
127
+
128
+ except (ExpiredSignatureError, InvalidTokenError) as e:
129
+ create_error_response(code=400, message="Failed to login with access token", details=[{"error": str(e)}])
130
+
131
+ def user_name_creation_oauth(self, username: str, user_id: str,email:str):
132
+ r_ = self.user_db_setup.setup_new_user(user_id=user_id, username=username,email=email)
133
+ response = success_response_user_management(code=200, message="Username creation successful", data=r_)
134
+
135
+ return response
136
+
137
+ def set_session_data(self, access_token, refresh_token, user_id):
138
+ res = self.supabase_client.auth.set_session(access_token, refresh_token)
139
+ response = success_response_user_management(code=200, message="Session data set successfully", data=dict(res))
140
+ return response
141
+
142
+ def sign_out_(self):
143
+ try:
144
+
145
+ res = self.supabase_client.auth.sign_out()
146
+ response = success_response_user_management(code=200, message="Signout successful")
147
+ return response
148
+ except Exception as e:
149
+ raise HTTPException(status_code=400, detail=str(e))
150
+
151
+ def oauth(self):
152
+ res = self.supabase_client.auth.sign_in_with_oauth(
153
+ {"provider": "google", "options": {"redirect_to": f"{config.get('oauth', 'redirect_to')}"}})
154
+ response = success_response_user_management(code=200, message="OAuth successful", data=dict(res))
155
+ return response
core/services/vector_db/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
core/services/vector_db/qdrent/__init__.py ADDED
File without changes
core/services/vector_db/qdrent/qdrentvector_db.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from core.services.vector_db.qdrent.upload_document import upload_document_new_collection
3
+ from core.utils.error_handling import create_success_response, success_response_user_management, raise_http_exception
4
+
5
+
6
+ class QdrantVectorStoreDB:
7
+ def __init__(self, qdrant_client, vector_embeddings, sparse_embeddings):
8
+ self.vector_embeddings = vector_embeddings
9
+ self.sparse_embeddings = sparse_embeddings
10
+ self.qdrant_client = qdrant_client
11
+
12
+ def create_table(self, table_name: str):
13
+ upload_document_new_collection(vector_embed=self.vector_embeddings, sparse_embed=self.sparse_embeddings,
14
+ table_name=table_name)
15
+ response = success_response_user_management(code=200, message="Chatbot Creation Successful")
16
+ return response
17
+
18
+
19
+ def delete_table(self, table_name: str):
20
+ try:
21
+ self.qdrant_client.delete_collection(collection_name=table_name)
22
+ return success_response_user_management(code=200, message="Chatbot Deletion Successful")
23
+ except Exception as e:
24
+ raise_http_exception(code=400, message="Failed to delete chatbot")
25
+
26
+
27
+ def list_tables(self, username: str):
28
+ try:
29
+ qdrant_collections = self.qdrant_client.get_collections()
30
+ return list(filter(lambda x: True if x.split("$")[1] == username else False,
31
+ [x.name for x in qdrant_collections.collections]))
32
+
33
+ except Exception as e:
34
+ return e
core/services/vector_db/qdrent/upload_document.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ from langchain_qdrant import QdrantVectorStore, RetrievalMode
5
+
6
+
7
+ def answer_query_from_existing_collection(vector_embed, sparse_embed, vectorstore):
8
+ vector_store = QdrantVectorStore.from_existing_collection(
9
+ embedding=vector_embed,
10
+ sparse_embedding=sparse_embed,
11
+ collection_name=vectorstore,
12
+ url=os.environ["QDRANT_URL"],
13
+ api_key=os.environ["QDRANT_API_KEY"],
14
+ retrieval_mode=RetrievalMode.HYBRID
15
+ )
16
+ return vector_store
17
+
18
+
19
+ def upload_document_existing_collection(documents, vector_embed, sparse_embed, vectorstore):
20
+ vector_store = QdrantVectorStore.from_documents(
21
+ documents=documents,
22
+ embedding=vector_embed,
23
+ sparse_embedding=sparse_embed,
24
+ prefer_grpc=True,
25
+ collection_name=vectorstore,
26
+ url=os.environ["QDRANT_URL"],
27
+ api_key=os.environ["QDRANT_API_KEY"],
28
+ force_recreate=True,
29
+ retrieval_mode=RetrievalMode.HYBRID
30
+ )
31
+
32
+
33
+ def upload_document_new_collection(vector_embed, sparse_embed, table_name):
34
+ try:
35
+ qdrant = QdrantVectorStore.from_documents(
36
+ documents=[],
37
+ embedding=vector_embed,
38
+ sparse_embedding=sparse_embed,
39
+ url=os.environ["QDRANT_URL"],
40
+ prefer_grpc=True,
41
+ api_key=os.environ["QDRANT_API_KEY"],
42
+ collection_name=table_name,
43
+ force_recreate=True,
44
+ retrieval_mode=RetrievalMode.HYBRID
45
+ )
46
+ except Exception as e:
47
+ return str(e)
core/services/website_url/__init__.py ADDED
File without changes