updated
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +149 -0
- app.py +27 -0
- config.ini +13 -0
- core/__init__.py +16 -0
- core/api/__init__.py +0 -0
- core/api/chatai.py +497 -0
- core/api/jwt_bearer.py +27 -0
- core/api/user_management_api.py +152 -0
- core/models/__init__.py +0 -0
- core/models/apis_models.py +137 -0
- core/models/response_handling_models.py +19 -0
- core/models/utls.py +8 -0
- core/pipeline/__init__.py +0 -0
- core/pipeline/chataipipeline.py +61 -0
- core/pipeline/user_management_pipeline.py +75 -0
- core/prompts/__init__.py +0 -0
- core/prompts/custom_prompts.py +67 -0
- core/services/__init__.py +0 -0
- core/services/answer_query/__init__.py +0 -0
- core/services/answer_query/answerquery.py +167 -0
- core/services/document/__init__.py +0 -0
- core/services/document/add_document.py +37 -0
- core/services/embeddings/Qdrant_BM25_embedding.py +8 -0
- core/services/embeddings/__init__.py +0 -0
- core/services/embeddings/jina_embeddings.py +8 -0
- core/services/get_links/__init__.py +0 -0
- core/services/get_links/web_scraper.py +42 -0
- core/services/ocr/__init__.py +0 -0
- core/services/ocr/replicate_ocr/__init__.py +0 -0
- core/services/ocr/replicate_ocr/replicate_ocr.py +33 -0
- core/services/pdf_extraction/__init__.py +0 -0
- core/services/pdf_extraction/image_pdf/__init__.py +0 -0
- core/services/pdf_extraction/image_pdf/image_pdf_text_extraction.py +38 -0
- core/services/pdf_extraction/text_pdf/__init__.py +0 -0
- core/services/pdf_extraction/text_pdf/text_pdf_extraction.py +19 -0
- core/services/supabase/__init__.py +0 -0
- core/services/supabase/chat_ai_setup/__init__.py +0 -0
- core/services/supabase/chat_ai_setup/chataiusermanagement.py +32 -0
- core/services/supabase/limit/__init__.py +0 -0
- core/services/supabase/limit/limit_check.py +14 -0
- core/services/supabase/user_management/__init__.py +1 -0
- core/services/supabase/user_management/chat_history.py +8 -0
- core/services/supabase/user_management/chatbot_management.py +62 -0
- core/services/supabase/user_management/token_limit.py +45 -0
- core/services/supabase/user_management/user_service.py +155 -0
- core/services/vector_db/__init__.py +1 -0
- core/services/vector_db/qdrent/__init__.py +0 -0
- core/services/vector_db/qdrent/qdrentvector_db.py +34 -0
- core/services/vector_db/qdrent/upload_document.py +47 -0
- core/services/website_url/__init__.py +0 -0
.gitignore
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
target/
|
76 |
+
|
77 |
+
# Jupyter Notebook
|
78 |
+
.ipynb_checkpoints
|
79 |
+
|
80 |
+
# IPython
|
81 |
+
profile_default/
|
82 |
+
ipython_config.py
|
83 |
+
|
84 |
+
# pyenv
|
85 |
+
.python-version
|
86 |
+
|
87 |
+
# pipenv
|
88 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
89 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
90 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
91 |
+
# install all needed dependencies.
|
92 |
+
#Pipfile.lock
|
93 |
+
|
94 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
95 |
+
__pypackages__/
|
96 |
+
|
97 |
+
# Celery stuff
|
98 |
+
celerybeat-schedule
|
99 |
+
celerybeat.pid
|
100 |
+
|
101 |
+
# SageMath parsed files
|
102 |
+
*.sage.py
|
103 |
+
|
104 |
+
# Environments
|
105 |
+
.env
|
106 |
+
.venv
|
107 |
+
env/
|
108 |
+
venv/
|
109 |
+
ENV/
|
110 |
+
env.bak/
|
111 |
+
venv.bak/
|
112 |
+
|
113 |
+
# Spyder project settings
|
114 |
+
.spyderproject
|
115 |
+
.spyproject
|
116 |
+
|
117 |
+
# Rope project settings
|
118 |
+
.ropeproject
|
119 |
+
|
120 |
+
# mkdocs documentation
|
121 |
+
/site
|
122 |
+
|
123 |
+
# mypy
|
124 |
+
.mypy_cache/
|
125 |
+
.dmypy.json
|
126 |
+
dmypy.json
|
127 |
+
|
128 |
+
# Pyre type checker
|
129 |
+
.pyre/
|
130 |
+
|
131 |
+
# Machine Learning and Speech Libraries
|
132 |
+
# TensorFlow
|
133 |
+
*.ckpt*
|
134 |
+
*.pbtxt
|
135 |
+
*.tfevents*
|
136 |
+
# PyTorch
|
137 |
+
*.pt
|
138 |
+
# Keras
|
139 |
+
*.h5
|
140 |
+
# Scikit-learn
|
141 |
+
*.pkl
|
142 |
+
# Speech Recognition
|
143 |
+
*.wav
|
144 |
+
*.mp3
|
145 |
+
.idea/
|
146 |
+
logs
|
147 |
+
images
|
148 |
+
resources
|
149 |
+
experiments
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import uvicorn
|
3 |
+
from fastapi import FastAPI, Depends
|
4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
5 |
+
from core import logging as logger
|
6 |
+
from core.api.chatai import chatai_api_router
|
7 |
+
from core.api.jwt_bearer import access_check_bearer
|
8 |
+
from core.api.user_management_api import user_management_api_router
|
9 |
+
from core.utils.utils import load_ini_config
|
10 |
+
config = load_ini_config("config.ini")
|
11 |
+
app = FastAPI(docs_url=config.get('fastapi_config', 'docs_url'), redoc_url=config.get('fastapi_config', 'redoc_url'),
|
12 |
+
openapi_url=config.get('fastapi_config', 'openapi_url')
|
13 |
+
)
|
14 |
+
|
15 |
+
PROTECTED = [Depends(access_check_bearer)]
|
16 |
+
app.add_middleware(
|
17 |
+
CORSMiddleware,
|
18 |
+
allow_origins=["*"],
|
19 |
+
allow_credentials=True,
|
20 |
+
allow_methods=["*"],
|
21 |
+
allow_headers=["*"],
|
22 |
+
)
|
23 |
+
app.include_router(user_management_api_router, prefix="/chatai")
|
24 |
+
app.include_router(chatai_api_router, prefix="/chatai")
|
25 |
+
if __name__ == '__main__':
|
26 |
+
uvicorn.run(app, port=config.get("fastapi_config", "port"), host=config.get('fastapi_config', 'host'),
|
27 |
+
timeout_keep_alive=300, timeout_graceful_shutdown=600)
|
config.ini
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
|
4 |
+
[fastapi_config]
|
5 |
+
host = 0.0.0.0
|
6 |
+
port = 7860
|
7 |
+
docs_url = /docs
|
8 |
+
redoc_url = /redoc
|
9 |
+
openapi_url = /openapi.json
|
10 |
+
|
11 |
+
|
12 |
+
[oauth]
|
13 |
+
redirect_to : https://google.com/
|
core/__init__.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import logging.config
|
4 |
+
import yaml
|
5 |
+
import os
|
6 |
+
|
7 |
+
if os.path.exists("logs"):
|
8 |
+
pass
|
9 |
+
else:
|
10 |
+
os.makedirs("logs")
|
11 |
+
|
12 |
+
log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
|
13 |
+
with open(log_config_path, 'r') as file:
|
14 |
+
config = yaml.safe_load(file.read())
|
15 |
+
|
16 |
+
logging.config.dictConfig(config)
|
core/api/__init__.py
ADDED
File without changes
|
core/api/chatai.py
ADDED
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import string
|
4 |
+
import tempfile
|
5 |
+
from urllib.parse import urlparse
|
6 |
+
import requests
|
7 |
+
from fastapi import UploadFile, File, Form, HTTPException
|
8 |
+
from fastapi.requests import Request
|
9 |
+
from fastapi.routing import APIRouter
|
10 |
+
from supabase import create_client
|
11 |
+
from core import logging as logger
|
12 |
+
from core.api.user_management_api import user_management
|
13 |
+
from core.api.user_management_api import user_management as user_management_pipeline
|
14 |
+
from core.models.apis_models import *
|
15 |
+
from core.pipeline.chataipipeline import ChatAIPipeline
|
16 |
+
from core.services.supabase.user_management.token_limit import token_limit_check
|
17 |
+
from core.utils.error_handling import create_error_response, create_success_response, raise_http_exception
|
18 |
+
from core.utils.utils import get_ip_info, encode_to_base64, clean_text, decode_base64
|
19 |
+
from core.services.supabase.limit.limit_check import LimitChecker
|
20 |
+
from PyPDF2 import PdfReader
|
21 |
+
from dotenv import load_dotenv
|
22 |
+
load_dotenv()
|
23 |
+
import io
|
24 |
+
|
25 |
+
chatai_api_router = APIRouter(tags=["ChatAI"])
|
26 |
+
supabase_client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_KEY"))
|
27 |
+
supabase_client_ = supabase_client
|
28 |
+
ChatAI_pipeline = ChatAIPipeline()
|
29 |
+
url_limit,pdf_limit,ocr_limit=LimitChecker(supabase_client)
|
30 |
+
|
31 |
+
@chatai_api_router.post("/add_text")
|
32 |
+
async def add_text(request: AddTextRequest):
|
33 |
+
logger.info(f">>>AddText API Triggered By {request.vectorstore}<<<")
|
34 |
+
try:
|
35 |
+
vectorstore, text = request.vectorstore, request.text
|
36 |
+
username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
37 |
+
cleaned_text = " ".join(text.split())
|
38 |
+
num_token = len(cleaned_text)
|
39 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
|
40 |
+
len_text=num_token)
|
41 |
+
text = clean_text(text)
|
42 |
+
if lim:
|
43 |
+
dct = {
|
44 |
+
"output": {"text": text},
|
45 |
+
"source": "Text",
|
46 |
+
}
|
47 |
+
cleaned_text = " ".join(text.split()) # handles unnencessary spaces
|
48 |
+
# Count characters
|
49 |
+
num_token = len(cleaned_text)
|
50 |
+
logger.info(f"Number of token {num_token}")
|
51 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
52 |
+
file_name = user_management_pipeline.create_data_source_name(source_name="text", username=username)
|
53 |
+
supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
|
54 |
+
|
55 |
+
supa = supabase_client.table("ChatAI_ChatbotDataSources").insert(
|
56 |
+
{"username": username, "chatbotName": chat_bot_name, "dataSourceName": file_name,
|
57 |
+
"numTokens": num_token, "sourceEndpoint": "/add_text",
|
58 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
59 |
+
f"{file_name}_data.json")}).execute()
|
60 |
+
|
61 |
+
response = create_success_response(200, {"message": "Successfully added the text."})
|
62 |
+
logger.info(f">>>Text added successfully for {request.vectorstore}.<<<")
|
63 |
+
|
64 |
+
return response
|
65 |
+
else:
|
66 |
+
response = create_error_response(400,
|
67 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
68 |
+
return response
|
69 |
+
|
70 |
+
except Exception as e:
|
71 |
+
logger.error(f">>>Error in add_text: {e} for {request.vectorstore}.<<<")
|
72 |
+
raise_http_exception(500, "Internal Server Error")
|
73 |
+
|
74 |
+
|
75 |
+
@chatai_api_router.post("/answer_query")
|
76 |
+
async def answer_query(request: AnswerQueryRequest, req: Request):
|
77 |
+
logger.info(f">>>answer_query API Triggered By {request.vectorstore}<<<")
|
78 |
+
try:
|
79 |
+
username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
80 |
+
ip_address = req.client.host
|
81 |
+
city = get_ip_info(ip_address)
|
82 |
+
output, followup_questions, source = ChatAI_pipeline.answer_query_(query=request.query,
|
83 |
+
vectorstore=request.vectorstore,
|
84 |
+
llm_model=request.llm_model)
|
85 |
+
supa = supabase_client.table("ChatAI_ChatHistory").insert(
|
86 |
+
{"username": username, "chatbotName": chatbot_name, "llmModel": request.llm_model,
|
87 |
+
"question": request.query, "response": output, "IpAddress": ip_address, "ResponseTokenCount": len(output),
|
88 |
+
"vectorstore": request.vectorstore, "City": city}).execute()
|
89 |
+
|
90 |
+
response = create_success_response(200, data={"output": output, "follow_up_questions": followup_questions,
|
91 |
+
"source": source})
|
92 |
+
logger.info(f">>>Query answered successfully for {request.vectorstore}.<<<")
|
93 |
+
return response
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
logger.error(f">>>Error in answer_query: {e} for {request.vectorstore}.<<<")
|
97 |
+
raise e
|
98 |
+
|
99 |
+
|
100 |
+
@chatai_api_router.post("/get_links")
|
101 |
+
async def get_links(request: GetLinksRequest):
|
102 |
+
logger.info(f">>>get_links API Triggered By {request.url}<<<")
|
103 |
+
try:
|
104 |
+
response = ChatAI_pipeline.get_links_(url=request.url, timeout=30)
|
105 |
+
response = create_success_response(200, {"urls": response, "source": urlparse(request.url).netloc})
|
106 |
+
logger.info(f">>>Links fetched successfully for {request.url}.<<<")
|
107 |
+
return response
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f">>>Error in get_links: {e} for {request.url}.<<<")
|
111 |
+
raise_http_exception(500, "Internal Server Error")
|
112 |
+
|
113 |
+
|
114 |
+
@chatai_api_router.post("/image_pdf_text_extraction")
|
115 |
+
async def image_pdf_text_extraction(vectorstore: str = Form(...)
|
116 |
+
, pdf: UploadFile = File(...)):
|
117 |
+
logger.info(f">>>image_pdf_text_extraction API Triggered By {pdf.filename}<<<")
|
118 |
+
try:
|
119 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
120 |
+
pdf_bytes = await pdf.read()
|
121 |
+
source = pdf.filename
|
122 |
+
pdf_reader = PdfReader(io.BytesIO(pdf_bytes))
|
123 |
+
doc_len = len(pdf_reader.pages)
|
124 |
+
if doc_len<ocr_limit:
|
125 |
+
response = ChatAI_pipeline.image_pdf_text_extraction_(image_pdf=pdf_bytes)
|
126 |
+
|
127 |
+
num_tokens = 0
|
128 |
+
try:
|
129 |
+
num_tokens = len(" ".join([response[x] for x in response]))
|
130 |
+
except (KeyError, TypeError, AttributeError):
|
131 |
+
pass
|
132 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
133 |
+
len_text=num_tokens)
|
134 |
+
logger.info(f"this is the {lim}")
|
135 |
+
if lim:
|
136 |
+
dct = {
|
137 |
+
"output": response,
|
138 |
+
"source": source
|
139 |
+
}
|
140 |
+
|
141 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
142 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
|
143 |
+
num_tokens = 0
|
144 |
+
try:
|
145 |
+
valid_responses = [response[x] for x in response if response[x] is not None]
|
146 |
+
num_tokens = len(" ".join(valid_responses))
|
147 |
+
except Exception as e:
|
148 |
+
num_tokens = 0
|
149 |
+
|
150 |
+
response = supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
|
151 |
+
supa = supabase_client.table("ChatAI_ChatbotDataSources").insert(
|
152 |
+
{"username": username,
|
153 |
+
"chatbotName": chatbot_name,
|
154 |
+
"dataSourceName": file_name,
|
155 |
+
"numTokens": num_tokens,
|
156 |
+
"sourceEndpoint": "/image_pdf_text_extraction",
|
157 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
158 |
+
f"{file_name}_data.json")}).execute()
|
159 |
+
|
160 |
+
response = create_success_response(200,
|
161 |
+
{"source": pdf.filename, "message": "Successfully extracted the text."})
|
162 |
+
logger.info(f">>>Text extracted successfully for {pdf.filename}.<<<")
|
163 |
+
return response
|
164 |
+
else:
|
165 |
+
response = create_error_response(402,
|
166 |
+
"Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
|
167 |
+
return response
|
168 |
+
else:
|
169 |
+
response = create_error_response(402,
|
170 |
+
"Exceeding limits, please try with a PDF having less than 20 pages for pdf .")
|
171 |
+
return response
|
172 |
+
except Exception as e:
|
173 |
+
raise e
|
174 |
+
|
175 |
+
|
176 |
+
@chatai_api_router.post("/text_pdf_extraction")
|
177 |
+
async def text_pdf_extraction(vectorstore: str = Form(...)
|
178 |
+
, pdf: UploadFile = File(...)):
|
179 |
+
logger.info(f">>>text_pdf_extraction API Triggered By {pdf.filename}<<<")
|
180 |
+
try:
|
181 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
182 |
+
content = await pdf.read()
|
183 |
+
pdf_reader = PdfReader(io.BytesIO(content))
|
184 |
+
doc_len = len(pdf_reader.pages)
|
185 |
+
|
186 |
+
if doc_len < pdf_limit :
|
187 |
+
source = pdf.filename
|
188 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
189 |
+
temp_file.write(content)
|
190 |
+
temp_file_path = temp_file.name
|
191 |
+
|
192 |
+
response = ChatAI_pipeline.text_pdf_extraction_(pdf=temp_file_path)
|
193 |
+
numTokens = len(" ".join([response[x] for x in response]))
|
194 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
195 |
+
len_text=numTokens)
|
196 |
+
os.remove(temp_file_path)
|
197 |
+
if lim:
|
198 |
+
dct = {
|
199 |
+
"output": response,
|
200 |
+
"source": source
|
201 |
+
}
|
202 |
+
numTokens = len(" ".join([response[x] for x in response]))
|
203 |
+
logger.info(f"Num of tokens {numTokens} text_pdf_extraction")
|
204 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
205 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
|
206 |
+
response = supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
|
207 |
+
response = (
|
208 |
+
supabase_client.table("ChatAI_ChatbotDataSources")
|
209 |
+
.insert({"username": username,
|
210 |
+
"chatbotName": chatbot_name,
|
211 |
+
"dataSourceName": file_name,
|
212 |
+
"numTokens": numTokens,
|
213 |
+
"sourceEndpoint": "/text_pdf_extraction",
|
214 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
215 |
+
f"{file_name}_data.json")})
|
216 |
+
.execute()
|
217 |
+
)
|
218 |
+
response = create_success_response(200, {"source": source, "message": "Successfully extracted the text."})
|
219 |
+
logger.info(f">>>Text extracted successfully for {source}.<<<")
|
220 |
+
return response
|
221 |
+
else:
|
222 |
+
response = create_error_response(402,
|
223 |
+
"Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
|
224 |
+
return response
|
225 |
+
else:
|
226 |
+
response = create_error_response(402,
|
227 |
+
"Exceeding limits, please try with a pdf having pages less than 200.")
|
228 |
+
return response
|
229 |
+
|
230 |
+
except Exception as e:
|
231 |
+
logger.error(f">>>Error in text_pdf_extraction: {e} for {vectorstore}.<<<")
|
232 |
+
raise_http_exception(500, "Internal Server Error")
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
|
237 |
+
@chatai_api_router.post("/website_url_text_extraction")
|
238 |
+
async def add_website(request: AddWebsiteRequest):
|
239 |
+
vectorstore, website_urls, source = request.vectorstore, request.website_urls, request.source
|
240 |
+
|
241 |
+
logger.info(f">>>website_url_text_extraction API Triggered By {request.website_urls}<<<")
|
242 |
+
try:
|
243 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
244 |
+
total_requested_urls=len(website_urls)
|
245 |
+
|
246 |
+
if total_requested_urls < url_limit :
|
247 |
+
text = ChatAI_pipeline.website_url_text_extraction_list_(urls=website_urls)
|
248 |
+
num_token = len(" ".join([text[x] for x in text]))
|
249 |
+
|
250 |
+
logger.info(f">>>website_url_text_extraction len{num_token}<<<")
|
251 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
252 |
+
len_text=num_token)
|
253 |
+
if not lim:
|
254 |
+
|
255 |
+
response = create_error_response(402,
|
256 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
257 |
+
return response
|
258 |
+
else:
|
259 |
+
dct = {
|
260 |
+
"output": text,
|
261 |
+
"source": source
|
262 |
+
}
|
263 |
+
|
264 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
265 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=urlparse(source).netloc,
|
266 |
+
username=username)
|
267 |
+
supabase_client.storage.from_("ChatAI").upload(file=dct, path=f"{file_name}_data.json")
|
268 |
+
(
|
269 |
+
supabase_client.table("ChatAI_ChatbotDataSources")
|
270 |
+
.insert({"username": username,
|
271 |
+
"chatbotName": chatbot_name,
|
272 |
+
"dataSourceName": file_name,
|
273 |
+
"numTokens": num_token,
|
274 |
+
"sourceEndpoint": "/fetch_text/urls",
|
275 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
276 |
+
f"{file_name}_data.json")})
|
277 |
+
.execute()
|
278 |
+
)
|
279 |
+
response = create_success_response(200, {"message": "Successfully fetched the website text."})
|
280 |
+
logger.info(f">>>Website text extracted successfully for {request.website_urls}.<<<")
|
281 |
+
return response
|
282 |
+
else:
|
283 |
+
response = create_error_response(402,
|
284 |
+
"Please select urls less than 50")
|
285 |
+
return response
|
286 |
+
except Exception as e:
|
287 |
+
logger.error(f">>>Error in website_url_text_extraction: {e} for {request.website_urls}.<<<")
|
288 |
+
raise HTTPException(status_code=500, detail="Internal Server Error")
|
289 |
+
|
290 |
+
|
291 |
+
@chatai_api_router.get("/get_current_count")
|
292 |
+
async def get_count(vectorstore: str):
|
293 |
+
logger.info(f">>>get_current_count API Triggered By {vectorstore}<<<")
|
294 |
+
try:
|
295 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
296 |
+
current_count = user_management_pipeline.get_current_count_(username)
|
297 |
+
|
298 |
+
response = create_success_response(200, {"current_count": current_count})
|
299 |
+
logger.info(f">>>Current count fetched successfully for {vectorstore}.<<<")
|
300 |
+
return response
|
301 |
+
|
302 |
+
except Exception as e:
|
303 |
+
logger.error(f">>>Error in get_current_count: {e} for {vectorstore}.<<<")
|
304 |
+
raise_http_exception(500, "Internal Server Error")
|
305 |
+
|
306 |
+
|
307 |
+
@chatai_api_router.post("/list_chatbots")
|
308 |
+
async def list_chatbots(request: ListChatbotsRequest):
|
309 |
+
logger.info(f">>>list_chatbots API Triggered By {request.username}<<<")
|
310 |
+
try:
|
311 |
+
chatbots = user_management.list_tables(username=request.username)
|
312 |
+
response = create_success_response(200, {"chatbots": chatbots})
|
313 |
+
logger.info(f">>>Chatbots listed successfully for {request.username}.<<<")
|
314 |
+
return response
|
315 |
+
|
316 |
+
except Exception as e:
|
317 |
+
logger.error(f">>>Error in list_chatbots: {e} for {request.username}.<<<")
|
318 |
+
raise_http_exception(500, "Internal Server Error")
|
319 |
+
|
320 |
+
|
321 |
+
@chatai_api_router.post("/get_chat_history")
|
322 |
+
async def chat_history(request: GetChatHistoryRequest):
|
323 |
+
logger.info(f">>>get_chat_history API Triggered By {request.vectorstore}<<<")
|
324 |
+
try:
|
325 |
+
_, username, chatbotName = request.vectorstore.split("$", 2)
|
326 |
+
|
327 |
+
history = supabase_client.table("ChatAI_ChatHistory").select(
|
328 |
+
"timestamp", "question", "response"
|
329 |
+
).eq("username", username).eq("chatbotName", chatbotName).execute().data
|
330 |
+
|
331 |
+
response = create_success_response(200, {"history": history})
|
332 |
+
logger.info(f">>>Chat history fetched successfully for {request.vectorstore}.<<<")
|
333 |
+
return response
|
334 |
+
|
335 |
+
|
336 |
+
except IndexError:
|
337 |
+
logger.warning(f"Chat history not found for {request.vectorstore}")
|
338 |
+
return create_error_response(404, "Chat history not found for the given chatbot.")
|
339 |
+
|
340 |
+
except Exception as e:
|
341 |
+
logger.error(f">>>Error in get_chat_history: {e} for {request.vectorstore}.<<<")
|
342 |
+
raise_http_exception(500, "Internal Server Error")
|
343 |
+
|
344 |
+
|
345 |
+
@chatai_api_router.post("/delete_chatbot")
|
346 |
+
async def delete_chatbot(request: DeleteChatbotRequest):
|
347 |
+
logger.info(f">>>delete_chatbot API Triggered By {request.vectorstore}<<<")
|
348 |
+
try:
|
349 |
+
username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
350 |
+
supabase_client.table('ChatAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname',
|
351 |
+
chatbot_name).execute()
|
352 |
+
all_sources = supabase_client.table("ChatAI_ChatbotDataSources").select("*").eq("username", username).eq(
|
353 |
+
"chatbotName", chatbot_name).execute().data
|
354 |
+
all_sources = [x["sourceContentURL"].split("/")[-1] for x in all_sources]
|
355 |
+
supabase_client.table("ChatAI_ChatbotDataSources").delete().eq("username", username).eq("chatbotName",
|
356 |
+
chatbot_name).execute()
|
357 |
+
for source in all_sources:
|
358 |
+
supabase_client.table("ChatAI_Chatbot")
|
359 |
+
supabase_client.storage.from_("ChatAI").remove(source)
|
360 |
+
user_management.delete_table(table_name=chatbot_name)
|
361 |
+
user_management.delete_qdrant_cluster(vectorstorename=request.vectorstore)
|
362 |
+
response = create_success_response(200, {"message": "Chatbot deleted successfully"})
|
363 |
+
logger.info(f">>>Chatbot deleted successfully for {request.vectorstore}.<<<")
|
364 |
+
return response
|
365 |
+
except Exception as e:
|
366 |
+
logger.error(f">>>Error in delete_chatbot: {e} for {request.vectorstore}.<<<")
|
367 |
+
raise_http_exception(500, "Internal Server Error")
|
368 |
+
|
369 |
+
|
370 |
+
|
371 |
+
|
372 |
+
|
373 |
+
|
374 |
+
@chatai_api_router.get("/list_chatbot_sources")
|
375 |
+
async def list_chatbot_sources(vectorstore: str):
|
376 |
+
try:
|
377 |
+
logger.info(f">>>list_chatbot_sources API Triggered By {vectorstore}<<<")
|
378 |
+
|
379 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
380 |
+
result = supabase_client.table("ChatAI_ChatbotDataSources").select("*").eq("username", username).eq(
|
381 |
+
"chatbotName",
|
382 |
+
chatbot_name).execute().data
|
383 |
+
|
384 |
+
response = create_success_response(200, {"output": result})
|
385 |
+
logger.info(f">>>Chatbot listed successfully for {vectorstore}.<<<")
|
386 |
+
return response
|
387 |
+
|
388 |
+
except Exception as e:
|
389 |
+
logger.error(f">>>Error in list_chatbot_sources: {e} for {vectorstore}.<<<")
|
390 |
+
raise_http_exception(500, "Internal Server Error")
|
391 |
+
|
392 |
+
|
393 |
+
@chatai_api_router.get("/get_data_source")
|
394 |
+
async def get_data_source(vectorstore: str, source_url: str):
|
395 |
+
try:
|
396 |
+
logger.info(f">>>get_data_source API Triggered By {vectorstore}<<<")
|
397 |
+
|
398 |
+
r = requests.get(source_url)
|
399 |
+
res = encode_to_base64(eval(r.content.decode("utf-8", errors="replace")))
|
400 |
+
|
401 |
+
response = create_success_response(200, {"output": res})
|
402 |
+
|
403 |
+
return response
|
404 |
+
|
405 |
+
except Exception as e:
|
406 |
+
logger.error(f">>>Error in get_data_source: {e} for {vectorstore}.<<<")
|
407 |
+
raise_http_exception(500, "Internal Server Error")
|
408 |
+
|
409 |
+
|
410 |
+
@chatai_api_router.post("/delete_chatbot_source")
|
411 |
+
async def delete_chatbot_source(request: DeleteChatbotSourceRequest):
|
412 |
+
vectorstore, data_source_name = request.vectorstore, request.data_source_name
|
413 |
+
try:
|
414 |
+
|
415 |
+
response = supabase_client.table("ChatAI_ChatbotDataSources").delete().eq("dataSourceName",
|
416 |
+
data_source_name).execute()
|
417 |
+
response = supabase_client.storage.from_('ChatAI').remove(f"{data_source_name}_data.json")
|
418 |
+
|
419 |
+
response = create_success_response(200, {"output": f"Successfully deleted the {data_source_name} data source."})
|
420 |
+
|
421 |
+
logger.info(f">>>Data source deleted successfully for {vectorstore}.<<<")
|
422 |
+
return response
|
423 |
+
|
424 |
+
|
425 |
+
except Exception as e:
|
426 |
+
logger.error(f">>>Error in delete_chatbot_source: {e} for {vectorstore}.<<<")
|
427 |
+
raise_http_exception(500, "Internal Server Error")
|
428 |
+
|
429 |
+
|
430 |
+
@chatai_api_router.post("/train_chatbot")
|
431 |
+
async def train_chatbot(request: TrainChatbotRequest):
|
432 |
+
vectorstore, url_sources = request.vectorstore, request.urls
|
433 |
+
logger.info(f">>>train_chatbot API Triggered By {vectorstore}<<<")
|
434 |
+
try:
|
435 |
+
texts = []
|
436 |
+
sources = []
|
437 |
+
fileTypes = [
|
438 |
+
supabase_client.table("ChatAI_ChatbotDataSources").select("sourceEndpoint").eq("sourceContentURL",
|
439 |
+
x).execute().data[0][
|
440 |
+
"sourceEndpoint"] for x in url_sources]
|
441 |
+
for source, fileType in zip(url_sources, fileTypes):
|
442 |
+
if ((fileType == "/text_pdf_extraction") | (fileType == "/image_pdf_text_extraction")):
|
443 |
+
logger.info(f"Source is {source}")
|
444 |
+
r = requests.get(source)
|
445 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
446 |
+
content = file["output"]
|
447 |
+
fileSource = file["source"]
|
448 |
+
texts.append(".".join(
|
449 |
+
[content[key] for key in content.keys()]).replace(
|
450 |
+
"\n", " "))
|
451 |
+
|
452 |
+
sources.append(fileSource)
|
453 |
+
elif fileType == "/add_text" or fileType == "/add_qa_pair":
|
454 |
+
r = requests.get(source)
|
455 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
456 |
+
content = file["output"]["text"]
|
457 |
+
fileSource = file["source"]
|
458 |
+
texts.append(content.replace("\n", " "))
|
459 |
+
sources.append(fileSource)
|
460 |
+
elif ((fileType == "/fetch_text/urls") | (fileType == "/youtube_transcript")):
|
461 |
+
r = requests.get(source)
|
462 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
463 |
+
content = file["output"]
|
464 |
+
fileSource = file["source"]
|
465 |
+
texts.append(".".join(
|
466 |
+
[content[key] for key in content.keys()]).replace(
|
467 |
+
"\n", " "))
|
468 |
+
sources.append(fileSource)
|
469 |
+
else:
|
470 |
+
pass
|
471 |
+
texts = [(text, source) for text, source in zip(texts, sources)]
|
472 |
+
ChatAI_pipeline.add_document_(texts, vectorstore)
|
473 |
+
response = create_success_response(200, {"message": "Chatbot trained successfully."})
|
474 |
+
logger.info(f">>>Chatbot trained successfully for {vectorstore}.<<<")
|
475 |
+
|
476 |
+
return response
|
477 |
+
|
478 |
+
|
479 |
+
except Exception as e:
|
480 |
+
logger.error(f">>>Error in train_chatbot: {e} for {vectorstore}.<<<")
|
481 |
+
raise e
|
482 |
+
|
483 |
+
|
484 |
+
|
485 |
+
|
486 |
+
|
487 |
+
@chatai_api_router.post("/new_chatbot")
|
488 |
+
async def new_chatbot(request: NewChatbotRequest):
|
489 |
+
logger.info(f">>> new_chatbot API Triggered <<<")
|
490 |
+
try:
|
491 |
+
response = user_management.new_chatbot_(chatbot_name=request.chatbot_name, username=request.username)
|
492 |
+
logger.info(f">>> Chatbot created successfully for {request.username}.<<<")
|
493 |
+
return response
|
494 |
+
|
495 |
+
except Exception as e:
|
496 |
+
logger.error(f">>>Error in new_chatbot: {e} for {request.username}.<<<")
|
497 |
+
raise_http_exception(500, "Internal Server Error")
|
core/api/jwt_bearer.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from fastapi import Depends
|
3 |
+
from supabase import create_client
|
4 |
+
from core import logging as logger
|
5 |
+
from core.utils.error_handling import create_error_response, raise_http_exception
|
6 |
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
security = HTTPBearer()
|
11 |
+
|
12 |
+
supabase_client = create_client(
|
13 |
+
os.getenv("SUPABASE_URL"),
|
14 |
+
os.getenv("SUPABASE_KEY")
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
async def access_check_bearer(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
19 |
+
access_token = credentials.credentials
|
20 |
+
try:
|
21 |
+
supabase_client.auth.get_user(access_token)
|
22 |
+
|
23 |
+
except Exception as e:
|
24 |
+
logger.info(f">>> Invalid access token {e}<<<")
|
25 |
+
raise_http_exception(code=401,
|
26 |
+
message="Invalid Access Token", details=[
|
27 |
+
{"info": "Invalid access token or access token expired please login again"}])
|
core/api/user_management_api.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import Depends
|
2 |
+
from core import logging as logger
|
3 |
+
from core.models.apis_models import *
|
4 |
+
from fastapi.routing import APIRouter
|
5 |
+
from core.api.jwt_bearer import access_check_bearer, supabase_client
|
6 |
+
from core.pipeline.user_management_pipeline import SupabaseUserManagementPipeline
|
7 |
+
|
8 |
+
from core.utils.error_handling import raise_http_exception, create_success_response, create_error_response
|
9 |
+
|
10 |
+
user_management_api_router = APIRouter(tags=["User Management"])
|
11 |
+
|
12 |
+
user_management = SupabaseUserManagementPipeline()
|
13 |
+
|
14 |
+
|
15 |
+
@user_management_api_router.post("/user_signup")
|
16 |
+
async def user_signup(request: UserSignupRequest):
|
17 |
+
logger.info(f">>>user_signup API Triggered <<<")
|
18 |
+
response = user_management.user_signup_(username=request.username, password=request.password,email=request.email)
|
19 |
+
logger.info(f">>>user_signup API Success<<<")
|
20 |
+
|
21 |
+
return response
|
22 |
+
|
23 |
+
|
24 |
+
@user_management_api_router.post("/user_signin")
|
25 |
+
async def user_signin(request: UserSigninRequest):
|
26 |
+
logger.info(f">>>user_signin API Triggered <<<")
|
27 |
+
|
28 |
+
response = user_management.user_signin_(username=request.username, password=request.password,email=request.email)
|
29 |
+
if response != None:
|
30 |
+
logger.info(f">>>user_signin API Success.<<<")
|
31 |
+
return response
|
32 |
+
else:
|
33 |
+
logger.info(f">>> Username or password is incorrect please try again.<<<")
|
34 |
+
response = create_error_response(400, "Username or password is incorrect please try again.")
|
35 |
+
return response
|
36 |
+
|
37 |
+
|
38 |
+
@user_management_api_router.post("/get_user_data")
|
39 |
+
async def get_user_data(request: GetUserDataRequest):
|
40 |
+
logger.info(f">>>get_user_data API Triggered <<<")
|
41 |
+
response = user_management.get_user_data_(access_token=request.access_token)
|
42 |
+
return response
|
43 |
+
|
44 |
+
|
45 |
+
@user_management_api_router.post("/login_with_access_token")
|
46 |
+
async def login_with_access_token(request: LoginWithAccessTokenRequest):
|
47 |
+
logger.info(f">>>login_with_access_token API Triggered <<<")
|
48 |
+
|
49 |
+
response = user_management.login_with_access_token_(access_token=request.access_token,
|
50 |
+
refresh_token=request.refresh_token)
|
51 |
+
logger.info(f">>>login_with_access_token API Success<<<")
|
52 |
+
return response
|
53 |
+
|
54 |
+
|
55 |
+
@user_management_api_router.post("/set_session_data")
|
56 |
+
async def set_session_data(request: SetSessionDataRequest):
|
57 |
+
logger.info(f">>> set_session_data API Triggered <<<")
|
58 |
+
|
59 |
+
response = user_management.set_session_data_(access_token=request.access_token, refresh_token=request.refresh_token,
|
60 |
+
user_id=request.user_id)
|
61 |
+
return response
|
62 |
+
|
63 |
+
|
64 |
+
@user_management_api_router.post("/sign_out")
|
65 |
+
async def sign_out():
|
66 |
+
logger.info(f">>> sign_out API Triggered <<<")
|
67 |
+
|
68 |
+
response = user_management.sign_out_()
|
69 |
+
logger.info(f">>>sign_out API Success<<<")
|
70 |
+
return response
|
71 |
+
|
72 |
+
|
73 |
+
@user_management_api_router.post("/oauth_signin")
|
74 |
+
async def oauth_signin():
|
75 |
+
logger.info(f">>> oauth_signin API Triggered <<<")
|
76 |
+
response = user_management.oauth_signin_()
|
77 |
+
logger.info(f">>>oauth_signin API Success<<<")
|
78 |
+
return response
|
79 |
+
|
80 |
+
|
81 |
+
@user_management_api_router.post("/check_session")
|
82 |
+
async def check_session():
|
83 |
+
logger.info(f">>>check_session API Triggered <<<")
|
84 |
+
|
85 |
+
response = user_management.check_session_()
|
86 |
+
return response
|
87 |
+
|
88 |
+
|
89 |
+
@user_management_api_router.get("/get_public_chatbot")
|
90 |
+
async def get_public_chatbots():
|
91 |
+
logger.info(f">>>get_public_chatbot API Triggered<<<")
|
92 |
+
try:
|
93 |
+
response = supabase_client.table("ChatAI_ChatbotInfo").select("*").eq("isPrivate", False).execute().data
|
94 |
+
logger.info(f">>>Public chatbots fetched successfully.<<<")
|
95 |
+
return response
|
96 |
+
except Exception as e:
|
97 |
+
logger.error(f">>>Error in get_public_chatbot: {e}<<<")
|
98 |
+
raise_http_exception(500, "Internal Server Error")
|
99 |
+
|
100 |
+
|
101 |
+
@user_management_api_router.post("/public_private_check")
|
102 |
+
async def public_or_private(request: PublicPrivateCheckRequest):
|
103 |
+
vectorstore, mode = request.vectorstore, request.mode
|
104 |
+
logger.info(f">>>public_private_check API Triggered for {vectorstore}.<<<")
|
105 |
+
try:
|
106 |
+
|
107 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
108 |
+
if len(mode) == 0:
|
109 |
+
value = (
|
110 |
+
supabase_client.table("ChatAI_ChatbotInfo")
|
111 |
+
.select("isPrivate")
|
112 |
+
.eq("user_id", username)
|
113 |
+
.eq("chatbotname", chatbot_name)
|
114 |
+
.execute()
|
115 |
+
)
|
116 |
+
value = value.data[0]["isPrivate"]
|
117 |
+
response = create_success_response(200, {"output": value})
|
118 |
+
else:
|
119 |
+
response = (
|
120 |
+
supabase_client.table("ChatAI_ChatbotInfo")
|
121 |
+
.update({"isPrivate": mode})
|
122 |
+
.eq("user_id", username)
|
123 |
+
.eq("chatbotname", chatbot_name)
|
124 |
+
.execute()
|
125 |
+
)
|
126 |
+
response = create_success_response(200, {"output": response})
|
127 |
+
logger.info(f">>>Public/Private check successful for {vectorstore}.<<<")
|
128 |
+
return response
|
129 |
+
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f">>>Error in public_private_check: {e} for {vectorstore}.<<<")
|
133 |
+
raise_http_exception(500, "Internal Server Error")
|
134 |
+
|
135 |
+
|
136 |
+
@user_management_api_router.post("/refresh_session", dependencies=[Depends(access_check_bearer)])
|
137 |
+
async def refresh_session(request: RefreshSessionRequest):
|
138 |
+
logger.info(f">>>refresh_session API Triggered <<<")
|
139 |
+
response = user_management.refresh_session__(refresh_token=request.refresh_token)
|
140 |
+
logger.info(f">>>refresh token fetched successfully.<<<")
|
141 |
+
|
142 |
+
return response
|
143 |
+
|
144 |
+
|
145 |
+
@user_management_api_router.post("/username_creation_oauth", dependencies=[Depends(access_check_bearer)])
|
146 |
+
async def username_creation_oauth(request: UsernameCreationOauthRequest):
|
147 |
+
logger.info(f">>> username_creation_oauth API Triggered <<<")
|
148 |
+
|
149 |
+
response = user_management.username_creation_oauth_(username=request.username,email=request.email, user_id=request.user_id)
|
150 |
+
|
151 |
+
logger.info(f">>>username creation successful.<<<")
|
152 |
+
return response
|
core/models/__init__.py
ADDED
File without changes
|
core/models/apis_models.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel,EmailStr
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
## ---------------------------------- Chatbot API Models ----------------------------------
|
8 |
+
|
9 |
+
class AddTextRequest(BaseModel):
|
10 |
+
vectorstore: str
|
11 |
+
text: str
|
12 |
+
|
13 |
+
|
14 |
+
class AddWebsiteRequest(BaseModel):
|
15 |
+
website_urls: List[str]
|
16 |
+
vectorstore: str
|
17 |
+
source: str
|
18 |
+
|
19 |
+
|
20 |
+
class AnswerQueryRequest(BaseModel):
|
21 |
+
query: str
|
22 |
+
vectorstore: str
|
23 |
+
llm_model: str = "llama3-70b-8192"
|
24 |
+
|
25 |
+
class GetLinksRequest(BaseModel):
|
26 |
+
url: str
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
class WebsiteUrlTextExtractionRequest(BaseModel):
|
31 |
+
url: str
|
32 |
+
|
33 |
+
|
34 |
+
class WebsiteUrlTextExtractionListRequest(BaseModel):
|
35 |
+
urls: List[str]
|
36 |
+
|
37 |
+
|
38 |
+
class GetCurrentCountRequest(BaseModel):
|
39 |
+
vectorstore: str
|
40 |
+
|
41 |
+
|
42 |
+
class ListChatbotsRequest(BaseModel):
|
43 |
+
username: str
|
44 |
+
|
45 |
+
|
46 |
+
class GetChatHistoryRequest(BaseModel):
|
47 |
+
vectorstore: str
|
48 |
+
|
49 |
+
|
50 |
+
class ChatHistoryItem(BaseModel):
|
51 |
+
timestamp: str
|
52 |
+
question: str
|
53 |
+
response: str
|
54 |
+
|
55 |
+
|
56 |
+
class DeleteChatbotRequest(BaseModel):
|
57 |
+
vectorstore: str
|
58 |
+
|
59 |
+
|
60 |
+
class TrainChatbotRequest(BaseModel):
|
61 |
+
vectorstore: str
|
62 |
+
urls: list[str]
|
63 |
+
|
64 |
+
|
65 |
+
class LoadPDFRequest(BaseModel):
|
66 |
+
vectorstore: str
|
67 |
+
|
68 |
+
|
69 |
+
class LoadEditedJson(BaseModel):
|
70 |
+
vectorstore: str
|
71 |
+
data_source_name: str
|
72 |
+
source_endpoint: str
|
73 |
+
json_data: dict
|
74 |
+
|
75 |
+
|
76 |
+
class PublicPrivateCheckRequest(BaseModel):
|
77 |
+
vectorstore: str
|
78 |
+
mode: str | None = None
|
79 |
+
|
80 |
+
|
81 |
+
class DeleteChatbotSourceRequest(BaseModel):
|
82 |
+
vectorstore: str
|
83 |
+
data_source_name: str
|
84 |
+
|
85 |
+
|
86 |
+
## ---------------------------------- User Management API Models ----------------------------------
|
87 |
+
|
88 |
+
class UserSignupRequest(BaseModel):
|
89 |
+
username: str
|
90 |
+
email:EmailStr
|
91 |
+
password: str
|
92 |
+
|
93 |
+
|
94 |
+
class UserSigninRequest(BaseModel):
|
95 |
+
username:str
|
96 |
+
email:EmailStr
|
97 |
+
|
98 |
+
password: str
|
99 |
+
|
100 |
+
|
101 |
+
class CheckSessionRequest(BaseModel):
|
102 |
+
user_id: str
|
103 |
+
|
104 |
+
|
105 |
+
class GetUserDataRequest(BaseModel):
|
106 |
+
access_token: str
|
107 |
+
|
108 |
+
|
109 |
+
class RefreshSessionRequest(BaseModel):
|
110 |
+
refresh_token: str
|
111 |
+
|
112 |
+
|
113 |
+
class LoginWithAccessTokenRequest(BaseModel):
|
114 |
+
access_token: str
|
115 |
+
refresh_token: str
|
116 |
+
|
117 |
+
|
118 |
+
class UsernameCreationOauthRequest(BaseModel):
|
119 |
+
username: str
|
120 |
+
email: str
|
121 |
+
user_id: str
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
class SetSessionDataRequest(BaseModel):
|
126 |
+
access_token: str
|
127 |
+
refresh_token: str
|
128 |
+
user_id: str
|
129 |
+
|
130 |
+
|
131 |
+
class SignOutRequest(BaseModel):
|
132 |
+
user_id: str
|
133 |
+
|
134 |
+
|
135 |
+
class NewChatbotRequest(BaseModel):
|
136 |
+
chatbot_name: str
|
137 |
+
username: str
|
core/models/response_handling_models.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict
|
2 |
+
from pydantic import BaseModel
|
3 |
+
|
4 |
+
class SuccessResponse(BaseModel):
|
5 |
+
status: str
|
6 |
+
code: int
|
7 |
+
data: Dict[str, Any]
|
8 |
+
|
9 |
+
|
10 |
+
class ErrorResponse(BaseModel):
|
11 |
+
status: str
|
12 |
+
error: Dict[str, Any]
|
13 |
+
|
14 |
+
|
15 |
+
class SuccessResponseUsermanagement(BaseModel):
|
16 |
+
status: str
|
17 |
+
message: str
|
18 |
+
code: int
|
19 |
+
data: Dict[str, Any]
|
core/models/utls.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
|
3 |
+
|
4 |
+
class FollowUps(BaseModel):
|
5 |
+
q1: str = Field(description="First Follow-up Question")
|
6 |
+
q2: str = Field(description="Second Follow-up Question")
|
7 |
+
q3: str = Field(description="Third Follow-up Question")
|
8 |
+
|
core/pipeline/__init__.py
ADDED
File without changes
|
core/pipeline/chataipipeline.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
3 |
+
from langchain_core.prompts import PromptTemplate
|
4 |
+
from core.prompts.custom_prompts import _custom_prompts
|
5 |
+
from core.services.answer_query.answerquery import AnswerQuery
|
6 |
+
from core.services.document.add_document import AddDocument
|
7 |
+
from core.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
|
8 |
+
from core.services.embeddings.jina_embeddings import jina_embedding
|
9 |
+
from core.services.get_links.web_scraper import WebScraper
|
10 |
+
from core.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR as OCRService
|
11 |
+
from core.services.pdf_extraction.image_pdf.image_pdf_text_extraction import get_text_from_image_pdf
|
12 |
+
from core.services.pdf_extraction.text_pdf.text_pdf_extraction import extract_text_from_pdf
|
13 |
+
from core.services.website_url.text_extraction_urlsnew import WebScrapertext
|
14 |
+
from core.utils.utils import json_parser
|
15 |
+
|
16 |
+
|
17 |
+
class ChatAIPipeline:
|
18 |
+
def __init__(self):
|
19 |
+
prompt_template = _custom_prompts["RAG_ANSWER_PROMPT"]
|
20 |
+
follow_up_prompt_template = _custom_prompts["FOLLOW_UP_PROMPT"]
|
21 |
+
prompt = ChatPromptTemplate.from_template(prompt_template)
|
22 |
+
json_parser_ = json_parser()
|
23 |
+
follow_up_prompt = PromptTemplate(
|
24 |
+
template=follow_up_prompt_template,
|
25 |
+
input_variables=["context"],
|
26 |
+
partial_variables={"format_instructions": json_parser_.get_format_instructions()},
|
27 |
+
)
|
28 |
+
self.vector_embedding = jina_embedding()
|
29 |
+
self.sparse_embedding = qdrant_bm25_embedding()
|
30 |
+
self.add_document_service = AddDocument(self.vector_embedding, self.sparse_embedding)
|
31 |
+
|
32 |
+
self.answer_query_service = AnswerQuery(vector_embedding=self.vector_embedding,
|
33 |
+
sparse_embedding=self.sparse_embedding, prompt=prompt,
|
34 |
+
follow_up_prompt=follow_up_prompt, json_parser=json_parser_)
|
35 |
+
self.get_website_links = WebScraper()
|
36 |
+
self.ocr_service = OCRService()
|
37 |
+
self.web_text_extractor = WebScrapertext()
|
38 |
+
|
39 |
+
def add_document_(self, texts: list[tuple[str]], vectorstore: str):
|
40 |
+
return self.add_document_service.add_documents(texts=texts, vectorstore=vectorstore)
|
41 |
+
|
42 |
+
def answer_query_(self, query: str, vectorstore: str, llm_model: str = "llama-3.3-70b-versatile"):
|
43 |
+
output, follow_up_questions, source = self.answer_query_service.answer_query(query=query,
|
44 |
+
vectorstore=vectorstore,
|
45 |
+
llmModel=llm_model)
|
46 |
+
return output, follow_up_questions, source
|
47 |
+
|
48 |
+
def get_links_(self, url: str, timeout: int):
|
49 |
+
return self.get_website_links.get_links(url=url, timeout=timeout)
|
50 |
+
|
51 |
+
def image_pdf_text_extraction_(self, image_pdf: bytes):
|
52 |
+
return get_text_from_image_pdf(pdf_bytes=image_pdf)
|
53 |
+
|
54 |
+
def text_pdf_extraction_(self, pdf: str):
|
55 |
+
return extract_text_from_pdf(pdf_path=pdf)
|
56 |
+
|
57 |
+
def website_url_text_extraction_(self, url: str):
|
58 |
+
return self.web_text_extractor.extract_text_from_url(url=url)
|
59 |
+
|
60 |
+
def website_url_text_extraction_list_(self, urls: list):
|
61 |
+
return self.web_text_extractor.extract_text_from_urls(urls=urls)
|
core/pipeline/user_management_pipeline.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from core.services.supabase.user_management.user_service import UserManagement
|
3 |
+
from core.services.supabase.user_management.chatbot_management import SupabaseChatoBotManagement
|
4 |
+
from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
|
5 |
+
from core.services.supabase.user_management.chat_history import get_chat_history
|
6 |
+
from supabase.client import create_client
|
7 |
+
from qdrant_client import QdrantClient
|
8 |
+
import os
|
9 |
+
|
10 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
11 |
+
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
12 |
+
|
13 |
+
QDRANT_URL = os.getenv("QDRANT_URL")
|
14 |
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
15 |
+
|
16 |
+
|
17 |
+
class SupabaseUserManagementPipeline:
|
18 |
+
def __init__(self):
|
19 |
+
self.supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
20 |
+
self.qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
|
21 |
+
self.user_management = UserManagement(supabase_client=self.supabase_client)
|
22 |
+
self.chatbot_management = SupabaseChatoBotManagement(supabase_client=self.supabase_client,
|
23 |
+
qdrant_client=self.qdrant_client)
|
24 |
+
self.user_db_setup = ChatAIUserDBSetup(supabase_client=self.supabase_client)
|
25 |
+
|
26 |
+
def user_signup_(self, username: str,password: str,email:str) -> dict:
|
27 |
+
return self.user_management.user_signup(username=username, password=password, email=email)
|
28 |
+
|
29 |
+
def user_signin_(self, username: str, password: str,email:str) -> dict:
|
30 |
+
return self.user_management.user_signin(username=username, password=password,email=email)
|
31 |
+
|
32 |
+
def check_session_(self) -> dict:
|
33 |
+
return self.user_management.check_session()
|
34 |
+
|
35 |
+
def get_user_data_(self, access_token: str) -> dict:
|
36 |
+
return self.user_management.get_user_data(access_token=access_token)
|
37 |
+
|
38 |
+
def refresh_session__(self, refresh_token: str) -> dict:
|
39 |
+
return self.user_management.refresh_session_(refresh_token=refresh_token)
|
40 |
+
|
41 |
+
def login_with_access_token_(self, access_token: str, refresh_token: str) -> dict:
|
42 |
+
return self.user_management.login_with_access_token(access_token=access_token, refresh_token=refresh_token)
|
43 |
+
|
44 |
+
def username_creation_oauth_(self, username: str, user_id: str):
|
45 |
+
return self.user_management.user_name_creation_oauth(user_id=user_id, username=username)
|
46 |
+
|
47 |
+
def set_session_data_(self, access_token: str, refresh_token: str, user_id: str):
|
48 |
+
return self.user_management.set_session_data(access_token=access_token, refresh_token=refresh_token,
|
49 |
+
user_id=user_id)
|
50 |
+
|
51 |
+
def sign_out_(self):
|
52 |
+
return self.user_management.sign_out_()
|
53 |
+
|
54 |
+
def oauth_signin_(self) -> dict:
|
55 |
+
return self.user_management.oauth()
|
56 |
+
|
57 |
+
def new_chatbot_(self, chatbot_name: str, username: str):
|
58 |
+
return self.chatbot_management.new_chatbot(chatbot_name=chatbot_name, username=username)
|
59 |
+
|
60 |
+
def get_chat_history_(self, vectorstore: str):
|
61 |
+
return get_chat_history(vectorstore=vectorstore, supabase_client=self.supabase_client)
|
62 |
+
|
63 |
+
def delete_table(self, table_name: str):
|
64 |
+
return self.chatbot_management.delete_table(table_name=table_name)
|
65 |
+
|
66 |
+
def list_tables(self, username: str):
|
67 |
+
return self.chatbot_management.list_tables(username=username)
|
68 |
+
|
69 |
+
def create_data_source_name(self, source_name: str, username: str):
|
70 |
+
return self.chatbot_management.create_data_source_name(source_name=source_name, username=username)
|
71 |
+
|
72 |
+
def delete_qdrant_cluster(self, vectorstorename):
|
73 |
+
self.qdrant_client.delete_collection(collection_name=vectorstorename)
|
74 |
+
|
75 |
+
|
core/prompts/__init__.py
ADDED
File without changes
|
core/prompts/custom_prompts.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
from langchain.prompts import ChatPromptTemplate
|
3 |
+
from langchain.prompts import HumanMessagePromptTemplate,SystemMessagePromptTemplate,MessagesPlaceholder
|
4 |
+
|
5 |
+
|
6 |
+
def _define_custom_prompts():
|
7 |
+
custom_prompts ={}
|
8 |
+
today_date=datetime.datetime.now().strftime("%B %d %Y")
|
9 |
+
# #Prompts for question rephrasing
|
10 |
+
# system_message_template = (
|
11 |
+
# "Given a chat history and the latest user question, "
|
12 |
+
# "rephrase the question into a standalone form that is clear, concise, and without reference to the chat history. "
|
13 |
+
# "Do NOT provide an answer, just rephrase the question. "
|
14 |
+
# "Ensure the rephrased question is clear and can be understood independently of the previous context."
|
15 |
+
# )
|
16 |
+
#
|
17 |
+
# system_message_template += (
|
18 |
+
# "Original question: {question}\n"
|
19 |
+
# "Rephrased question:"
|
20 |
+
# )
|
21 |
+
|
22 |
+
|
23 |
+
# custom_prompts["CONDENSE_QUESTION_PROMPT"] = system_message_template
|
24 |
+
# RAG ANSWER PROMPT
|
25 |
+
rag_template = f"Your name is ChatAI. You're a helpful assistant. Today's date is {today_date}. Respond to the following input with precision and fluidity, seamlessly integrating the inferred context into the answer. Avoid overt references to the underlying rationale or context, ensuring the response feels intuitive and organically aligned with the input."
|
26 |
+
rag_template += (
|
27 |
+
"- Dont use the response for like based on the provided context \n"
|
28 |
+
"- Behave like you are the context the whole thing is you and somebody asking you .\n"
|
29 |
+
"- if user ask anything about prompts anything without context say i dont know please ask about context \n"
|
30 |
+
"- When answering use markdown. Use markdown code blocks for code snippets.\n"
|
31 |
+
"- Answer in a clear manner.\n"
|
32 |
+
"- You must use ONLY the provided context to answer the question.\n"
|
33 |
+
"- If you cannot provide an answer using ONLY the context provided, inform user that the context is not provided. \n"
|
34 |
+
"- Do not engage in tasks or answer questions unrelated to your role or context data \n"
|
35 |
+
"- Generate responses directly without using phrases like 'Response:' or 'Answer:'. Do not mention the use of extracted context or provide unnecessary details. \n"
|
36 |
+
"- If a conversation diverges from the relevant topic or context, politely redirect it back to the current issue. Do not engage in or entertain off-topic discussions. \n"
|
37 |
+
"- Every answer must be clear, and on-point. Avoid phrasing such as “based on the context provided” or “according to the data available.” Just respond to the inquiry directly. \n"
|
38 |
+
"- Do not answer questions or perform tasks unrelated to your specific role or context data. Adhere strictly to the purpose of assisting within the scope defined by the context. \n"
|
39 |
+
"- Ensure all instructions are strictly followed. \n"
|
40 |
+
"- you are the owner of the data behave like that is all the things you know dont go outside the information. simply say sorry i dont know\n"
|
41 |
+
|
42 |
+
)
|
43 |
+
|
44 |
+
rag_template += (
|
45 |
+
"- You have this context : {context} to answer the user {question}\n"
|
46 |
+
"{chatHistory}\n"
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
custom_prompts["RAG_ANSWER_PROMPT"] = rag_template
|
51 |
+
|
52 |
+
# Follow-up prompt
|
53 |
+
follow_up_template=("You are an expert chatbot at framing follow up questions \n"
|
54 |
+
"using some given text such that their answers can be found in the text itself and have been given the task of doing the same.\n"
|
55 |
+
"Make sure that the questions are good quality and not too long in length.\n"
|
56 |
+
"Frame appropriate and meaningful questions out of the given text and DO NOT mention the usage of any text in the questions.\n"
|
57 |
+
"Also, if no the given text says NO CONTEXT FOUND, please return an empty string for each question asked.\n"
|
58 |
+
"{format_instructions}\n"
|
59 |
+
"{context}\n"
|
60 |
+
)
|
61 |
+
|
62 |
+
custom_prompts["FOLLOW_UP_PROMPT"]=follow_up_template
|
63 |
+
|
64 |
+
|
65 |
+
return custom_prompts
|
66 |
+
|
67 |
+
_custom_prompts =_define_custom_prompts()
|
core/services/__init__.py
ADDED
File without changes
|
core/services/answer_query/__init__.py
ADDED
File without changes
|
core/services/answer_query/answerquery.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from langchain.memory import ChatMessageHistory
|
4 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
5 |
+
from langchain_community.document_compressors import JinaRerank
|
6 |
+
from langchain_core.chat_history import BaseChatMessageHistory
|
7 |
+
from langchain_core.output_parsers import StrOutputParser
|
8 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
9 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
10 |
+
from langchain_groq import ChatGroq
|
11 |
+
|
12 |
+
from core.services.vector_db.qdrent.upload_document import answer_query_from_existing_collection
|
13 |
+
|
14 |
+
os.environ["JINA_API_KEY"] = os.getenv("JINA_API")
|
15 |
+
|
16 |
+
|
17 |
+
class AnswerQuery:
|
18 |
+
def __init__(self, prompt, vector_embedding, sparse_embedding, follow_up_prompt, json_parser):
|
19 |
+
self.chat_history_store = {}
|
20 |
+
self.compressor = JinaRerank(model="jina-reranker-v2-base-multilingual")
|
21 |
+
self.vector_embed = vector_embedding
|
22 |
+
self.sparse_embed = sparse_embedding
|
23 |
+
self.prompt = prompt
|
24 |
+
self.follow_up_prompt = follow_up_prompt
|
25 |
+
self.json_parser = json_parser
|
26 |
+
|
27 |
+
def format_docs(self, docs: str):
|
28 |
+
global sources
|
29 |
+
global temp_context
|
30 |
+
sources = []
|
31 |
+
context = ""
|
32 |
+
for doc in docs:
|
33 |
+
context += f"{doc.page_content}\n\n\n"
|
34 |
+
source = doc.metadata
|
35 |
+
source = source["source"]
|
36 |
+
sources.append(source)
|
37 |
+
if context == "":
|
38 |
+
context = "No context found"
|
39 |
+
else:
|
40 |
+
pass
|
41 |
+
sources = list(set(sources))
|
42 |
+
temp_context = context
|
43 |
+
return context
|
44 |
+
|
45 |
+
def answer_query(self, query: str, vectorstore: str, llmModel: str = "llama-3.3-70b-versatile"):
|
46 |
+
global sources
|
47 |
+
global temp_context
|
48 |
+
vector_store_name = vectorstore
|
49 |
+
vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
|
50 |
+
sparse_embed=self.sparse_embed,
|
51 |
+
vectorstore=vectorstore)
|
52 |
+
|
53 |
+
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
|
54 |
+
compression_retriever = ContextualCompressionRetriever(
|
55 |
+
base_compressor=self.compressor, base_retriever=retriever
|
56 |
+
)
|
57 |
+
brain_chain = (
|
58 |
+
{"context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(
|
59 |
+
self.format_docs),
|
60 |
+
"question": RunnableLambda(lambda x: x["question"]),
|
61 |
+
"chatHistory": RunnableLambda(lambda x: x["chatHistory"])}
|
62 |
+
| self.prompt
|
63 |
+
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
64 |
+
| StrOutputParser()
|
65 |
+
)
|
66 |
+
message_chain = RunnableWithMessageHistory(
|
67 |
+
brain_chain,
|
68 |
+
self.get_session_history,
|
69 |
+
input_messages_key="question",
|
70 |
+
history_messages_key="chatHistory"
|
71 |
+
)
|
72 |
+
chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
|
73 |
+
follow_up_chain = self.follow_up_prompt | ChatGroq(model_name="llama-3.3-70b-versatile",
|
74 |
+
temperature=0) | self.json_parser
|
75 |
+
|
76 |
+
output = chain.invoke(
|
77 |
+
{"question": query},
|
78 |
+
{"configurable": {"session_id": vector_store_name}}
|
79 |
+
)
|
80 |
+
follow_up_questions = follow_up_chain.invoke({"context": temp_context})
|
81 |
+
|
82 |
+
return output, follow_up_questions, sources
|
83 |
+
|
84 |
+
async def answer_query_stream(self, query: str, vectorstore: str, llmModel):
|
85 |
+
global sources
|
86 |
+
global temp_context
|
87 |
+
|
88 |
+
vector_store_name = vectorstore
|
89 |
+
vector_store = answer_query_from_existing_collection(
|
90 |
+
vector_embed=self.vector_embed,
|
91 |
+
sparse_embed=self.sparse_embed,
|
92 |
+
vectorstore=vectorstore
|
93 |
+
)
|
94 |
+
|
95 |
+
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
|
96 |
+
compression_retriever = ContextualCompressionRetriever(
|
97 |
+
base_compressor=self.compressor,
|
98 |
+
base_retriever=retriever
|
99 |
+
)
|
100 |
+
|
101 |
+
brain_chain = (
|
102 |
+
{
|
103 |
+
"context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(
|
104 |
+
self.format_docs),
|
105 |
+
"question": RunnableLambda(lambda x: x["question"]),
|
106 |
+
"chatHistory": RunnableLambda(lambda x: x["chatHistory"])
|
107 |
+
}
|
108 |
+
| self.prompt
|
109 |
+
| ChatGroq(
|
110 |
+
model=llmModel,
|
111 |
+
temperature=0.75,
|
112 |
+
max_tokens=512,
|
113 |
+
streaming=True
|
114 |
+
)
|
115 |
+
| StrOutputParser()
|
116 |
+
)
|
117 |
+
|
118 |
+
message_chain = RunnableWithMessageHistory(
|
119 |
+
brain_chain,
|
120 |
+
self.get_session_history,
|
121 |
+
input_messages_key="question",
|
122 |
+
history_messages_key="chatHistory"
|
123 |
+
)
|
124 |
+
|
125 |
+
chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
|
126 |
+
|
127 |
+
async for chunk in chain.astream(
|
128 |
+
{"question": query},
|
129 |
+
{"configurable": {"session_id": vector_store_name}}
|
130 |
+
):
|
131 |
+
yield {
|
132 |
+
"type": "main_response",
|
133 |
+
"content": chunk
|
134 |
+
}
|
135 |
+
|
136 |
+
follow_up_chain = self.follow_up_prompt | ChatGroq(
|
137 |
+
model_name="llama-3.3-70b-versatile",
|
138 |
+
temperature=0
|
139 |
+
) | self.json_parser
|
140 |
+
|
141 |
+
follow_up_questions = await follow_up_chain.ainvoke({"context": temp_context})
|
142 |
+
|
143 |
+
yield {
|
144 |
+
"type": "follow_up_questions",
|
145 |
+
"content": follow_up_questions
|
146 |
+
}
|
147 |
+
|
148 |
+
yield {
|
149 |
+
"type": "sources",
|
150 |
+
"content": sources
|
151 |
+
}
|
152 |
+
|
153 |
+
def trim_messages(self, chain_input):
|
154 |
+
for store_name in self.chat_history_store:
|
155 |
+
messages = self.chat_history_store[store_name].messages
|
156 |
+
if len(messages) <= 1:
|
157 |
+
pass
|
158 |
+
else:
|
159 |
+
self.chat_history_store[store_name].clear()
|
160 |
+
for message in messages[-1:]:
|
161 |
+
self.chat_history_store[store_name].add_message(message)
|
162 |
+
return True
|
163 |
+
|
164 |
+
def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
|
165 |
+
if session_id not in self.chat_history_store:
|
166 |
+
self.chat_history_store[session_id] = ChatMessageHistory()
|
167 |
+
return self.chat_history_store[session_id]
|
core/services/document/__init__.py
ADDED
File without changes
|
core/services/document/add_document.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import string
|
3 |
+
from langchain.docstore.document import Document
|
4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
+
from core.services.vector_db.qdrent.upload_document import upload_document_existing_collection
|
6 |
+
|
7 |
+
|
8 |
+
class AddDocument:
|
9 |
+
def __init__(self, vector_embedding, sparse_embedding):
|
10 |
+
self.vector_embed = vector_embedding
|
11 |
+
self.sparse_embed = sparse_embedding
|
12 |
+
|
13 |
+
def clean_text(self,text:str)->str:
|
14 |
+
text=text.replace("\n", "")
|
15 |
+
text = text.translate(str.maketrans('', '', string.punctuation.replace(".", "")))
|
16 |
+
text = text.encode('utf-8', errors='ignore').decode('utf-8')
|
17 |
+
return text
|
18 |
+
def add_documents(self, texts: list[tuple[str]], vectorstore: str):
|
19 |
+
splitter = RecursiveCharacterTextSplitter(
|
20 |
+
chunk_size=400,
|
21 |
+
chunk_overlap=100,
|
22 |
+
add_start_index=True
|
23 |
+
)
|
24 |
+
contents, sources = zip(*texts)
|
25 |
+
cleaned_texts = [self.clean_text(text) for text in contents]
|
26 |
+
|
27 |
+
# Create Document objects
|
28 |
+
docs = [
|
29 |
+
Document(page_content=text, metadata={"source": source})
|
30 |
+
for text, source in zip(cleaned_texts, sources)
|
31 |
+
]
|
32 |
+
|
33 |
+
# Split documents and upload
|
34 |
+
documents = splitter.split_documents(docs)
|
35 |
+
upload_document_existing_collection(vector_embed=self.vector_embed,
|
36 |
+
sparse_embed=self.sparse_embed,
|
37 |
+
vectorstore=vectorstore, documents=documents)
|
core/services/embeddings/Qdrant_BM25_embedding.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from langchain_qdrant import FastEmbedSparse
|
3 |
+
|
4 |
+
|
5 |
+
def qdrant_bm25_embedding():
|
6 |
+
instance = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
|
7 |
+
|
8 |
+
return instance
|
core/services/embeddings/__init__.py
ADDED
File without changes
|
core/services/embeddings/jina_embeddings.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.embeddings import JinaEmbeddings
|
2 |
+
import os
|
3 |
+
|
4 |
+
def jina_embedding():
|
5 |
+
text_embeddings=JinaEmbeddings(
|
6 |
+
jina_api_key=os.getenv('JINA_API'),
|
7 |
+
model_name="jina-embeddings-v3")
|
8 |
+
return text_embeddings
|
core/services/get_links/__init__.py
ADDED
File without changes
|
core/services/get_links/web_scraper.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from urllib.parse import urlparse, urljoin
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
+
|
7 |
+
class WebScraper:
|
8 |
+
def __init__(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
def get_links(self,url: str, timeout=4):
|
12 |
+
start = time.time()
|
13 |
+
|
14 |
+
def get_links_from_page(url: str) -> list:
|
15 |
+
response = requests.get(url)
|
16 |
+
soup = BeautifulSoup(response.content, "lxml")
|
17 |
+
anchors = soup.find_all("a")
|
18 |
+
links = []
|
19 |
+
for anchor in anchors:
|
20 |
+
if "href" in anchor.attrs:
|
21 |
+
if urlparse(anchor.attrs["href"]).netloc == urlparse(url).netloc:
|
22 |
+
links.append(anchor.attrs["href"])
|
23 |
+
elif not anchor.attrs["href"].startswith(("//", "file", "javascript", "tel", "mailto", "http")):
|
24 |
+
links.append(urljoin(url + "/", anchor.attrs["href"]))
|
25 |
+
else:
|
26 |
+
pass
|
27 |
+
links = [link for link in links if "#" not in link]
|
28 |
+
links = list(set(links))
|
29 |
+
else:
|
30 |
+
continue
|
31 |
+
return links
|
32 |
+
|
33 |
+
links = get_links_from_page(url)
|
34 |
+
unique_links = set()
|
35 |
+
for link in links:
|
36 |
+
now = time.time()
|
37 |
+
if now - start > timeout:
|
38 |
+
break
|
39 |
+
else:
|
40 |
+
unique_links = unique_links.union(set(get_links_from_page(link)))
|
41 |
+
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in unique_links]))
|
42 |
+
|
core/services/ocr/__init__.py
ADDED
File without changes
|
core/services/ocr/replicate_ocr/__init__.py
ADDED
File without changes
|
core/services/ocr/replicate_ocr/replicate_ocr.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import base64
|
3 |
+
import cv2
|
4 |
+
import replicate
|
5 |
+
|
6 |
+
|
7 |
+
def replicate_run(input):
|
8 |
+
output = replicate.run(
|
9 |
+
"abiruyt/text-extract-ocr:a524caeaa23495bc9edc805ab08ab5fe943afd3febed884a4f3747aa32e9cd61",
|
10 |
+
input=input
|
11 |
+
)
|
12 |
+
return output
|
13 |
+
|
14 |
+
|
15 |
+
class ReplicateOCR:
|
16 |
+
def __init__(self):
|
17 |
+
pass
|
18 |
+
|
19 |
+
def read_text(self, image_path):
|
20 |
+
_, buffer = cv2.imencode('.png', image_path)
|
21 |
+
base_64_image = base64.b64encode(buffer).decode('utf-8')
|
22 |
+
|
23 |
+
input_image_uri = f"data:image/png;base64,{base_64_image}"
|
24 |
+
input = {
|
25 |
+
"image": input_image_uri,
|
26 |
+
}
|
27 |
+
output = replicate_run(input)
|
28 |
+
if output:
|
29 |
+
output = output.replace("\n", " ").replace("\t", " ")
|
30 |
+
return output
|
31 |
+
|
32 |
+
else:
|
33 |
+
return None
|
core/services/pdf_extraction/__init__.py
ADDED
File without changes
|
core/services/pdf_extraction/image_pdf/__init__.py
ADDED
File without changes
|
core/services/pdf_extraction/image_pdf/image_pdf_text_extraction.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import concurrent.futures
|
3 |
+
import numpy as np
|
4 |
+
from pdf2image import convert_from_bytes
|
5 |
+
|
6 |
+
from core import logging as logger
|
7 |
+
from core.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR
|
8 |
+
|
9 |
+
|
10 |
+
def get_text_from_image_pdf(pdf_bytes: bytes):
|
11 |
+
easy_ocr = ReplicateOCR()
|
12 |
+
|
13 |
+
def process_image(args: tuple) -> str:
|
14 |
+
index, image = args
|
15 |
+
gray_image = image.convert('L')
|
16 |
+
np_image = np.array(gray_image)
|
17 |
+
text = easy_ocr.read_text(np_image)
|
18 |
+
logger.debug(f"Processed page {index + 1}")
|
19 |
+
return text
|
20 |
+
|
21 |
+
all_images = convert_from_bytes(
|
22 |
+
pdf_bytes,
|
23 |
+
thread_count=1,
|
24 |
+
grayscale=True,
|
25 |
+
size=(1700, None)
|
26 |
+
)
|
27 |
+
|
28 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
29 |
+
image_data = list(enumerate(all_images))
|
30 |
+
texts = list(executor.map(process_image, image_data))
|
31 |
+
|
32 |
+
logger.info("OCR processing completed")
|
33 |
+
try:
|
34 |
+
return {i + 1: text for i, text in enumerate(texts)}
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
logger.error(f"Error in OCR processing: {e}")
|
38 |
+
return None
|
core/services/pdf_extraction/text_pdf/__init__.py
ADDED
File without changes
|
core/services/pdf_extraction/text_pdf/text_pdf_extraction.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import base64
|
3 |
+
import pymupdf
|
4 |
+
from concurrent.futures import ThreadPoolExecutor
|
5 |
+
|
6 |
+
|
7 |
+
def extract_text_from_page(page):
|
8 |
+
text = page.get_text()
|
9 |
+
return text
|
10 |
+
|
11 |
+
|
12 |
+
def extract_text_from_pdf(pdf_path: str):
|
13 |
+
doc = pymupdf.open(pdf_path)
|
14 |
+
pages = [doc.load_page(i) for i in range(len(doc))]
|
15 |
+
with ThreadPoolExecutor() as executor:
|
16 |
+
texts = list(executor.map(extract_text_from_page, pages))
|
17 |
+
|
18 |
+
doc.close()
|
19 |
+
return {x + 1: y for x, y in enumerate(texts)}
|
core/services/supabase/__init__.py
ADDED
File without changes
|
core/services/supabase/chat_ai_setup/__init__.py
ADDED
File without changes
|
core/services/supabase/chat_ai_setup/chataiusermanagement.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
class ChatAIUserDBSetup:
|
3 |
+
def __init__(self, supabase_client):
|
4 |
+
self.client = supabase_client
|
5 |
+
|
6 |
+
def setup_new_user(self, user_id: str, username: str,email:str) -> dict:
|
7 |
+
userData = self.client.table("ChatAI_UserInfo").select("*").execute().data
|
8 |
+
if username not in [userData[x]["username"] for x in range(len(userData))]:
|
9 |
+
try:
|
10 |
+
self.client.table("ChatAI_UserInfo").insert(
|
11 |
+
{"user_id": user_id, "username": username,"email":email}).execute()
|
12 |
+
|
13 |
+
self.client.table("ChatAI_UserConfig").insert({"user_id": username}).execute()
|
14 |
+
|
15 |
+
res = {
|
16 |
+
"code": 200,
|
17 |
+
"message": "User Setup Successful"
|
18 |
+
}
|
19 |
+
|
20 |
+
except Exception as e:
|
21 |
+
res = {
|
22 |
+
"code": 409,
|
23 |
+
"message": "Email already exists",
|
24 |
+
}
|
25 |
+
|
26 |
+
return res
|
27 |
+
|
28 |
+
else:
|
29 |
+
return {
|
30 |
+
"code": 409,
|
31 |
+
"message": "Username already exists"
|
32 |
+
}
|
core/services/supabase/limit/__init__.py
ADDED
File without changes
|
core/services/supabase/limit/limit_check.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class LimitChecker:
|
2 |
+
def __init__(self, supabase_client):
|
3 |
+
limits = self.fetch_limits(supabase_client)
|
4 |
+
self.url_limit = limits.get("total_website_url")
|
5 |
+
self.pdf_limit = limits.get("text_pdf_limit")
|
6 |
+
self.image_pdf_limit = limits.get("image_pdf_limit")
|
7 |
+
|
8 |
+
def fetch_limits(self, supabase_client):
|
9 |
+
table = supabase_client.table("ChatAI_limit_check")
|
10 |
+
results = table.select("*").execute()
|
11 |
+
return results.data[0]
|
12 |
+
|
13 |
+
def __iter__(self):
|
14 |
+
return iter((self.url_limit, self.pdf_limit, self.image_pdf_limit))
|
core/services/supabase/user_management/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
core/services/supabase/user_management/chat_history.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
def get_chat_history(supabase_client, vectorstore: str):
|
3 |
+
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
4 |
+
response = supabase_client.table("ChatAI_ChatHistory").select("timestamp", "question", "response").eq(
|
5 |
+
"username",
|
6 |
+
username).eq(
|
7 |
+
"chatbotName", chatbotName).execute().data
|
8 |
+
return response
|
core/services/supabase/user_management/chatbot_management.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import random
|
3 |
+
import re
|
4 |
+
import string
|
5 |
+
|
6 |
+
from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
|
7 |
+
from core.services.vector_db.qdrent.qdrentvector_db import QdrantVectorStoreDB
|
8 |
+
from core.utils.error_handling import create_error_response
|
9 |
+
from core.utils.utils import load_ini_config
|
10 |
+
from core.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
|
11 |
+
from core.services.embeddings.jina_embeddings import jina_embedding
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
class SupabaseChatoBotManagement:
|
16 |
+
def __init__(self, supabase_client, qdrant_client):
|
17 |
+
vector_embeddings = jina_embedding()
|
18 |
+
sparse_embeddings = qdrant_bm25_embedding()
|
19 |
+
self.config = load_ini_config("config.ini")
|
20 |
+
self.qdrant_vector_db = QdrantVectorStoreDB(qdrant_client=qdrant_client, vector_embeddings=vector_embeddings,
|
21 |
+
sparse_embeddings=sparse_embeddings)
|
22 |
+
self.supabase_client = supabase_client
|
23 |
+
self.user_db_setup = ChatAIUserDBSetup(self.supabase_client)
|
24 |
+
|
25 |
+
def new_chatbot(self, chatbot_name: str, username: str):
|
26 |
+
currentBotCount = len(self.qdrant_vector_db.list_tables(username=username))
|
27 |
+
limit = self.supabase_client.table("ChatAI_UserConfig").select("chatbotLimit").eq("user_id",
|
28 |
+
username).execute().data[
|
29 |
+
0][
|
30 |
+
"chatbotLimit"]
|
31 |
+
if currentBotCount >= int(limit):
|
32 |
+
return create_error_response(code=400, message="You have reached the limit of chatbots you can create")
|
33 |
+
self.supabase_client.table("ChatAI_ChatbotInfo").insert(
|
34 |
+
{"user_id": username, "chatbotname": chatbot_name}).execute()
|
35 |
+
chat_bot_name = f"chatai${username}${chatbot_name}"
|
36 |
+
return self.qdrant_vector_db.create_table(table_name=chat_bot_name)
|
37 |
+
|
38 |
+
def delete_table(self, table_name: str):
|
39 |
+
return self.qdrant_vector_db.delete_table(table_name=table_name)
|
40 |
+
|
41 |
+
def list_tables(self, username: str):
|
42 |
+
return self.qdrant_vector_db.list_tables(username=username)
|
43 |
+
|
44 |
+
def create_data_source_name(self, source_name, username):
|
45 |
+
sources = [x["dataSourceName"] for x in
|
46 |
+
self.supabase_client.table("ChatAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
47 |
+
n_sources = re.sub(r'[^a-zA-Z0-9]', '_', source_name)
|
48 |
+
n_sources = n_sources.lower()
|
49 |
+
new_source_name = f"chatai_{username}_{n_sources}_{''.join(random.choices(string.ascii_letters + string.digits, k=60))}"
|
50 |
+
if new_source_name not in sources:
|
51 |
+
return new_source_name
|
52 |
+
else:
|
53 |
+
return self.create_data_source_name(source_name, username)
|
54 |
+
|
55 |
+
def get_current_count(self, username):
|
56 |
+
data_ = self.supabase_client.table("ChatAI_ChatbotInfo").select("*").filter("user_id", "eq",
|
57 |
+
username).execute().data
|
58 |
+
current_count = 0
|
59 |
+
for data in data_:
|
60 |
+
current_count += int(data["charactercount"])
|
61 |
+
|
62 |
+
return current_count
|
core/services/supabase/user_management/token_limit.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pandas as pd
|
3 |
+
from core import logging as logger
|
4 |
+
|
5 |
+
|
6 |
+
def token_limit_check(supabase_client, username, chatbot_name, len_text):
|
7 |
+
logger.info(f">>>token_limit_check API Triggered_{username}_{chatbot_name} <<<")
|
8 |
+
|
9 |
+
try:
|
10 |
+
chatbot_response = supabase_client.table("ChatAI_ChatbotInfo") \
|
11 |
+
.select("charactercount") \
|
12 |
+
.eq("user_id", username) \
|
13 |
+
.eq("chatbotname", chatbot_name) \
|
14 |
+
.execute()
|
15 |
+
|
16 |
+
if not chatbot_response.data:
|
17 |
+
logger.error(f"Error checking username and chatbotname{username},{chatbot_name}")
|
18 |
+
return False
|
19 |
+
currentCount = int(chatbot_response.data[0]["charactercount"])
|
20 |
+
newCount = currentCount + len_text
|
21 |
+
logger.info(f"{newCount}")
|
22 |
+
|
23 |
+
limit_response = supabase_client.table("ChatAI_UserConfig") \
|
24 |
+
.select("tokenLimit") \
|
25 |
+
.eq("user_id", username) \
|
26 |
+
.execute()
|
27 |
+
|
28 |
+
limit = int(limit_response.data[0]["tokenLimit"])
|
29 |
+
|
30 |
+
if newCount < limit:
|
31 |
+
supabase_client.table("ChatAI_ChatbotInfo") \
|
32 |
+
.update({"charactercount": str(newCount)}) \
|
33 |
+
.eq("user_id", username) \
|
34 |
+
.eq("chatbotname", chatbot_name) \
|
35 |
+
.execute()
|
36 |
+
|
37 |
+
logger.info(f">>>token_limit_check API Success for user: {username} chatbot: {chatbot_name}<<<")
|
38 |
+
return True
|
39 |
+
else:
|
40 |
+
logger.error(f">>>token_limit_check exceeded for user: {username}{chatbot_name}<<<")
|
41 |
+
return False
|
42 |
+
|
43 |
+
except Exception as e:
|
44 |
+
raise e
|
45 |
+
return False
|
core/services/supabase/user_management/user_service.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import jwt
|
3 |
+
from core.services.supabase.chat_ai_setup.chataiusermanagement import ChatAIUserDBSetup
|
4 |
+
from jwt import ExpiredSignatureError, InvalidTokenError
|
5 |
+
from fastapi import HTTPException
|
6 |
+
from core.utils.error_handling import raise_http_exception, create_success_response, create_error_response, \
|
7 |
+
success_response_user_management
|
8 |
+
from core.utils.utils import config_read
|
9 |
+
|
10 |
+
config = config_read()
|
11 |
+
|
12 |
+
|
13 |
+
class UserManagement:
|
14 |
+
def __init__(self, supabase_client):
|
15 |
+
self.supabase_client = supabase_client
|
16 |
+
self.user_db_setup = ChatAIUserDBSetup(self.supabase_client)
|
17 |
+
|
18 |
+
def user_signup(self, username: str, password: str,email:str) -> dict:
|
19 |
+
res, _ = self.supabase_client.auth.sign_up(
|
20 |
+
{"email": email, "password": password, "role": "user"}
|
21 |
+
)
|
22 |
+
|
23 |
+
user_id = res[1].id
|
24 |
+
r_ = self.user_db_setup.setup_new_user(user_id=user_id, username=username,email=email)
|
25 |
+
if r_.get('code') == 409:
|
26 |
+
response = create_error_response(code=409, message=r_.get('message'))
|
27 |
+
elif r_.get('code') == 200:
|
28 |
+
response = success_response_user_management(code=200, message="Signup successful", data={
|
29 |
+
"info": "Please check you email address for email verification"})
|
30 |
+
else:
|
31 |
+
response = raise_http_exception(code=400, message="Failed to sign up please try again later")
|
32 |
+
return response
|
33 |
+
|
34 |
+
def user_signin(self, username: str, password: str,email:str):
|
35 |
+
try:
|
36 |
+
res = self.supabase_client.auth.sign_in_with_password(
|
37 |
+
{"email": email, "password": password}
|
38 |
+
)
|
39 |
+
|
40 |
+
if res != None:
|
41 |
+
user_id = res.user.id
|
42 |
+
access_token = res.session.access_token
|
43 |
+
refresh_token = res.session.refresh_token
|
44 |
+
|
45 |
+
userData = self.supabase_client.table("ChatAI_UserInfo").select("*").filter("user_id", "eq",
|
46 |
+
user_id).execute().data
|
47 |
+
username = userData[0]["username"]
|
48 |
+
|
49 |
+
message = {
|
50 |
+
"username": username,
|
51 |
+
"user_id": user_id,
|
52 |
+
"access_token": access_token,
|
53 |
+
"refresh_token": refresh_token,
|
54 |
+
}
|
55 |
+
response = success_response_user_management(code=200, message="Signin successful", data=message)
|
56 |
+
|
57 |
+
return response
|
58 |
+
else:
|
59 |
+
response = create_error_response(code=400, message="Email or password is incorrect please try again",)
|
60 |
+
return response
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
create_error_response(code=400, message="Failed to sign in please try again later",
|
67 |
+
details=[{"error": str(e)}])
|
68 |
+
|
69 |
+
def check_session(self):
|
70 |
+
res = self.supabase_client.auth.get_session()
|
71 |
+
if res == None:
|
72 |
+
try:
|
73 |
+
self.supabase_client.auth.sign_out()
|
74 |
+
|
75 |
+
return create_error_response(code=401, message="Session expired please login again")
|
76 |
+
except Exception as e:
|
77 |
+
create_error_response(code=400, message="Failed to check session")
|
78 |
+
|
79 |
+
else:
|
80 |
+
response = success_response_user_management(code=200, message="Session active", data=dict(res))
|
81 |
+
return response
|
82 |
+
|
83 |
+
def get_user_data(self, access_token: str) -> dict:
|
84 |
+
res = self.supabase_client.auth.get_user(jwt=access_token)
|
85 |
+
response = success_response_user_management(code=200, message="User data fetched successfully", data=res)
|
86 |
+
return response
|
87 |
+
|
88 |
+
def refresh_session_(self, refresh_token: str) -> dict:
|
89 |
+
res = self.supabase_client.auth.refresh_session(refresh_token)
|
90 |
+
response = success_response_user_management(code=200, message="Session refreshed successfully", data=res)
|
91 |
+
|
92 |
+
return response
|
93 |
+
|
94 |
+
def login_with_access_token(self, access_token: str, refresh_token: str):
|
95 |
+
try:
|
96 |
+
decoded_token = jwt.decode(access_token, options={"verify_signature": False})
|
97 |
+
user_id_oauth = decoded_token.get("username")
|
98 |
+
try:
|
99 |
+
self.supabase_client.table("ChatAI_UserInfo").select("*").filter("user_id", "eq",
|
100 |
+
user_id_oauth).execute()
|
101 |
+
user_id = self.supabase_client.table("ChatAI_UserInfo").select("*").filter("email", "eq",
|
102 |
+
user_id_oauth).execute()
|
103 |
+
if len(user_id.data) == 0:
|
104 |
+
user_name = ""
|
105 |
+
else:
|
106 |
+
user_name = user_id.data[0]["username"]
|
107 |
+
|
108 |
+
json = {
|
109 |
+
"user_id": decoded_token.get("sub"),
|
110 |
+
"email": decoded_token.get("email"),
|
111 |
+
"access_token": access_token,
|
112 |
+
"refresh_token": refresh_token,
|
113 |
+
"issued_at": decoded_token.get("iat"),
|
114 |
+
"expires_at": decoded_token.get("exp"),
|
115 |
+
"username": user_name
|
116 |
+
|
117 |
+
}
|
118 |
+
response = success_response_user_management(code=200, message="Login with access token successful",
|
119 |
+
data=json)
|
120 |
+
return response
|
121 |
+
except:
|
122 |
+
|
123 |
+
response = create_error_response(code=400, message="Failed to login with access token",
|
124 |
+
details=[{"error": "User not found"}])
|
125 |
+
|
126 |
+
return response
|
127 |
+
|
128 |
+
except (ExpiredSignatureError, InvalidTokenError) as e:
|
129 |
+
create_error_response(code=400, message="Failed to login with access token", details=[{"error": str(e)}])
|
130 |
+
|
131 |
+
def user_name_creation_oauth(self, username: str, user_id: str,email:str):
|
132 |
+
r_ = self.user_db_setup.setup_new_user(user_id=user_id, username=username,email=email)
|
133 |
+
response = success_response_user_management(code=200, message="Username creation successful", data=r_)
|
134 |
+
|
135 |
+
return response
|
136 |
+
|
137 |
+
def set_session_data(self, access_token, refresh_token, user_id):
|
138 |
+
res = self.supabase_client.auth.set_session(access_token, refresh_token)
|
139 |
+
response = success_response_user_management(code=200, message="Session data set successfully", data=dict(res))
|
140 |
+
return response
|
141 |
+
|
142 |
+
def sign_out_(self):
|
143 |
+
try:
|
144 |
+
|
145 |
+
res = self.supabase_client.auth.sign_out()
|
146 |
+
response = success_response_user_management(code=200, message="Signout successful")
|
147 |
+
return response
|
148 |
+
except Exception as e:
|
149 |
+
raise HTTPException(status_code=400, detail=str(e))
|
150 |
+
|
151 |
+
def oauth(self):
|
152 |
+
res = self.supabase_client.auth.sign_in_with_oauth(
|
153 |
+
{"provider": "google", "options": {"redirect_to": f"{config.get('oauth', 'redirect_to')}"}})
|
154 |
+
response = success_response_user_management(code=200, message="OAuth successful", data=dict(res))
|
155 |
+
return response
|
core/services/vector_db/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
core/services/vector_db/qdrent/__init__.py
ADDED
File without changes
|
core/services/vector_db/qdrent/qdrentvector_db.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from core.services.vector_db.qdrent.upload_document import upload_document_new_collection
|
3 |
+
from core.utils.error_handling import create_success_response, success_response_user_management, raise_http_exception
|
4 |
+
|
5 |
+
|
6 |
+
class QdrantVectorStoreDB:
|
7 |
+
def __init__(self, qdrant_client, vector_embeddings, sparse_embeddings):
|
8 |
+
self.vector_embeddings = vector_embeddings
|
9 |
+
self.sparse_embeddings = sparse_embeddings
|
10 |
+
self.qdrant_client = qdrant_client
|
11 |
+
|
12 |
+
def create_table(self, table_name: str):
|
13 |
+
upload_document_new_collection(vector_embed=self.vector_embeddings, sparse_embed=self.sparse_embeddings,
|
14 |
+
table_name=table_name)
|
15 |
+
response = success_response_user_management(code=200, message="Chatbot Creation Successful")
|
16 |
+
return response
|
17 |
+
|
18 |
+
|
19 |
+
def delete_table(self, table_name: str):
|
20 |
+
try:
|
21 |
+
self.qdrant_client.delete_collection(collection_name=table_name)
|
22 |
+
return success_response_user_management(code=200, message="Chatbot Deletion Successful")
|
23 |
+
except Exception as e:
|
24 |
+
raise_http_exception(code=400, message="Failed to delete chatbot")
|
25 |
+
|
26 |
+
|
27 |
+
def list_tables(self, username: str):
|
28 |
+
try:
|
29 |
+
qdrant_collections = self.qdrant_client.get_collections()
|
30 |
+
return list(filter(lambda x: True if x.split("$")[1] == username else False,
|
31 |
+
[x.name for x in qdrant_collections.collections]))
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
return e
|
core/services/vector_db/qdrent/upload_document.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
|
4 |
+
from langchain_qdrant import QdrantVectorStore, RetrievalMode
|
5 |
+
|
6 |
+
|
7 |
+
def answer_query_from_existing_collection(vector_embed, sparse_embed, vectorstore):
|
8 |
+
vector_store = QdrantVectorStore.from_existing_collection(
|
9 |
+
embedding=vector_embed,
|
10 |
+
sparse_embedding=sparse_embed,
|
11 |
+
collection_name=vectorstore,
|
12 |
+
url=os.environ["QDRANT_URL"],
|
13 |
+
api_key=os.environ["QDRANT_API_KEY"],
|
14 |
+
retrieval_mode=RetrievalMode.HYBRID
|
15 |
+
)
|
16 |
+
return vector_store
|
17 |
+
|
18 |
+
|
19 |
+
def upload_document_existing_collection(documents, vector_embed, sparse_embed, vectorstore):
|
20 |
+
vector_store = QdrantVectorStore.from_documents(
|
21 |
+
documents=documents,
|
22 |
+
embedding=vector_embed,
|
23 |
+
sparse_embedding=sparse_embed,
|
24 |
+
prefer_grpc=True,
|
25 |
+
collection_name=vectorstore,
|
26 |
+
url=os.environ["QDRANT_URL"],
|
27 |
+
api_key=os.environ["QDRANT_API_KEY"],
|
28 |
+
force_recreate=True,
|
29 |
+
retrieval_mode=RetrievalMode.HYBRID
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
def upload_document_new_collection(vector_embed, sparse_embed, table_name):
|
34 |
+
try:
|
35 |
+
qdrant = QdrantVectorStore.from_documents(
|
36 |
+
documents=[],
|
37 |
+
embedding=vector_embed,
|
38 |
+
sparse_embedding=sparse_embed,
|
39 |
+
url=os.environ["QDRANT_URL"],
|
40 |
+
prefer_grpc=True,
|
41 |
+
api_key=os.environ["QDRANT_API_KEY"],
|
42 |
+
collection_name=table_name,
|
43 |
+
force_recreate=True,
|
44 |
+
retrieval_mode=RetrievalMode.HYBRID
|
45 |
+
)
|
46 |
+
except Exception as e:
|
47 |
+
return str(e)
|
core/services/website_url/__init__.py
ADDED
File without changes
|