Spaces:
Sleeping
Sleeping
Merge pull request #1 from almutareb/starter_docsqa
Browse files- .devcontainer/Dockerfile +44 -0
- .devcontainer/devcontainer.json +25 -0
- .github/dependabot.yml +12 -0
- .github/workflows/pylint.yml +23 -0
- .gitignore +3 -0
- CONTRIBUTION.md +21 -29
- README.md +33 -32
- app_gui.py +65 -0
- core-langchain-rag.py +315 -0
- docs/template.md +16 -0
- docs/workflow-advanced-rag.drawio +83 -0
- example.env +15 -0
- mail_automation_draft.drawio +0 -140
- rag_app/create_embedding.py +50 -0
- rag_app/generate_summary.py +69 -0
- rag_app/get_db_retriever.py +29 -0
- rag_app/handle_vector_store.py +82 -0
- rag_app/load_data_from_urls.py +50 -0
- rag_app/load_vector_stores.py +70 -0
- rag_app/react_agent.py +0 -0
- rag_app/simple_qa_chain.py +0 -0
- requirements.txt +13 -0
- test_this.py +17 -0
- vectorstore/placeholder.txt +1 -0
.devcontainer/Dockerfile
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM mcr.microsoft.com/devcontainers/base:jammy
|
2 |
+
# FROM mcr.microsoft.com/devcontainers/base:jammy
|
3 |
+
|
4 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
5 |
+
ARG USER=vscode
|
6 |
+
|
7 |
+
RUN DEBIAN_FRONTEND=noninteractive \
|
8 |
+
&& apt-get update \
|
9 |
+
&& apt-get install -y build-essential --no-install-recommends make \
|
10 |
+
ca-certificates \
|
11 |
+
git \
|
12 |
+
libssl-dev \
|
13 |
+
zlib1g-dev \
|
14 |
+
libbz2-dev \
|
15 |
+
libreadline-dev \
|
16 |
+
libsqlite3-dev \
|
17 |
+
wget \
|
18 |
+
curl \
|
19 |
+
llvm \
|
20 |
+
libncurses5-dev \
|
21 |
+
xz-utils \
|
22 |
+
tk-dev \
|
23 |
+
libxml2-dev \
|
24 |
+
libxmlsec1-dev \
|
25 |
+
libffi-dev \
|
26 |
+
liblzma-dev
|
27 |
+
|
28 |
+
# Python and poetry installation
|
29 |
+
USER $USER
|
30 |
+
ARG HOME="/home/$USER"
|
31 |
+
ARG PYTHON_VERSION=3.11
|
32 |
+
|
33 |
+
ENV PYENV_ROOT="${HOME}/.pyenv"
|
34 |
+
ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:${HOME}/.local/bin:$PATH"
|
35 |
+
|
36 |
+
RUN echo "done 0" \
|
37 |
+
&& curl https://pyenv.run | bash \
|
38 |
+
&& echo "done 1" \
|
39 |
+
&& pyenv install ${PYTHON_VERSION} \
|
40 |
+
&& echo "done 2" \
|
41 |
+
&& pyenv global ${PYTHON_VERSION} \
|
42 |
+
&& echo "done 3" \
|
43 |
+
&& curl -sSL https://install.python-poetry.org | python3 - \
|
44 |
+
&& poetry config virtualenvs.in-project true
|
.devcontainer/devcontainer.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "poetry3-poetry-pyenv",
|
3 |
+
"build": {
|
4 |
+
"dockerfile": "Dockerfile"
|
5 |
+
},
|
6 |
+
|
7 |
+
// 👇 Features to add to the Dev Container. More info: https://containers.dev/implementors/features.
|
8 |
+
// "features": {},
|
9 |
+
|
10 |
+
// 👇 Use 'forwardPorts' to make a list of ports inside the container available locally.
|
11 |
+
// "forwardPorts": [],
|
12 |
+
|
13 |
+
// 👇 Use 'postCreateCommand' to run commands after the container is created.
|
14 |
+
// "postCreateCommand": "",
|
15 |
+
|
16 |
+
// 👇 Configure tool-specific properties.
|
17 |
+
"customizations": {
|
18 |
+
"vscode": {
|
19 |
+
"extensions":["ms-python.python", "njpwerner.autodocstring","ms-azuretools.vscode-docker", "qwtel.sqlite-viewer"]
|
20 |
+
}
|
21 |
+
}
|
22 |
+
|
23 |
+
// 👇 Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
24 |
+
// "remoteUser": "root"
|
25 |
+
}
|
.github/dependabot.yml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# To get started with Dependabot version updates, you'll need to specify which
|
2 |
+
# package ecosystems to update and where the package manifests are located.
|
3 |
+
# Please see the documentation for more information:
|
4 |
+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
5 |
+
# https://containers.dev/guide/dependabot
|
6 |
+
|
7 |
+
version: 2
|
8 |
+
updates:
|
9 |
+
- package-ecosystem: "devcontainers"
|
10 |
+
directory: "/"
|
11 |
+
schedule:
|
12 |
+
interval: weekly
|
.github/workflows/pylint.yml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Pylint
|
2 |
+
|
3 |
+
on: [push]
|
4 |
+
|
5 |
+
jobs:
|
6 |
+
build:
|
7 |
+
runs-on: ubuntu-latest
|
8 |
+
strategy:
|
9 |
+
matrix:
|
10 |
+
python-version: ["3.8", "3.9", "3.10"]
|
11 |
+
steps:
|
12 |
+
- uses: actions/checkout@v3
|
13 |
+
- name: Set up Python ${{ matrix.python-version }}
|
14 |
+
uses: actions/setup-python@v3
|
15 |
+
with:
|
16 |
+
python-version: ${{ matrix.python-version }}
|
17 |
+
- name: Install dependencies
|
18 |
+
run: |
|
19 |
+
python -m pip install --upgrade pip
|
20 |
+
pip install pylint
|
21 |
+
- name: Analysing the code with pylint
|
22 |
+
run: |
|
23 |
+
pylint $(git ls-files '*.py')
|
.gitignore
CHANGED
@@ -158,3 +158,6 @@ cython_debug/
|
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
#.idea/
|
|
|
|
|
|
|
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
#.idea/
|
161 |
+
*.zip
|
162 |
+
*.pkl
|
163 |
+
*.faiss
|
CONTRIBUTION.md
CHANGED
@@ -1,36 +1,28 @@
|
|
1 |
-
#
|
2 |
|
3 |
-
|
|
|
4 |
|
5 |
-
##
|
|
|
|
|
6 |
|
7 |
-
|
|
|
8 |
|
9 |
-
|
|
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
When reporting issues, please use the provided issue template. Your report should include:
|
20 |
-
|
21 |
-
- A clear, descriptive title
|
22 |
-
- A detailed description of the issue
|
23 |
-
- Steps to reproduce the issue
|
24 |
-
- Logs, if applicable
|
25 |
-
- Screenshots, if applicable
|
26 |
-
|
27 |
-
This information is crucial in diagnosing and fixing the issue you're experiencing.
|
28 |
-
|
29 |
-
### Suggestions
|
30 |
-
|
31 |
-
We're always looking for new ideas to improve our project. If you have a suggestion, please:
|
32 |
-
|
33 |
-
- Clearly describe your suggestion, including the purpose and intended outcome.
|
34 |
-
- Explain why you believe this change would be beneficial to the project.
|
35 |
-
|
36 |
-
We appreciate your contributions and look forward to collaborating with you!
|
|
|
1 |
+
# Pull Request Template
|
2 |
|
3 |
+
## Description
|
4 |
+
Please include a brief description of the changes introduced by this PR.
|
5 |
|
6 |
+
## Related Issue(s)
|
7 |
+
- If this PR addresses a particular issue, please reference it here using GitHub's linking syntax, e.g., "Fixes #123".
|
8 |
+
- If there's no related issue, briefly explain the motivation behind these changes.
|
9 |
|
10 |
+
## Changes Made
|
11 |
+
Please provide a list of the changes made in this PR.
|
12 |
|
13 |
+
## Screenshots (if applicable)
|
14 |
+
If the changes include UI updates or visual changes, please attach relevant screenshots here.
|
15 |
|
16 |
+
## Checklist
|
17 |
+
- [ ] I have tested my changes locally and ensured that they work as expected.
|
18 |
+
- [ ] I have updated the documentation (if applicable).
|
19 |
+
- [ ] My code follows the project's coding conventions and style guidelines.
|
20 |
+
- [ ] I have added appropriate test cases (if applicable).
|
21 |
+
- [ ] I have reviewed my own code to ensure its quality.
|
22 |
|
23 |
+
## Additional Notes
|
24 |
+
Add any additional notes or context about this PR here.
|
25 |
|
26 |
+
## Reviewer(s)
|
27 |
+
- @reviewer1
|
28 |
+
- @reviewer2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,32 +1,33 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
-
|
9 |
-
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
-
|
25 |
-
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
1 |
+
# Anatomy of Advanced Enterprise Rag Systems
|
2 |
+
|
3 |
+
This repository accompanies the blog series "The Anatomy of Advanced Enterprise Rag Systems" and provides a hands-on learning experience for building sophisticated Rag systems. Dive deep into each component, from setup and evaluation to security and multi-agent interactions.
|
4 |
+
|
5 |
+
Explore these key topics:
|
6 |
+
|
7 |
+
- Test Setup and Evaluation Metrics: Learn how to assess the performance and effectiveness of your Rag system.
|
8 |
+
- Data Preparation and Management: Discover techniques for organizing and optimizing your knowledge base.
|
9 |
+
- User Input Processing: Understand how to handle diverse user queries and extract relevant information.
|
10 |
+
- Retrieval System: Unleash the power of retrieving relevant passages from your knowledge base.
|
11 |
+
- Information Processing and Generation: Craft accurate and informative responses using state-of-the-art techniques.
|
12 |
+
- Feedback and Continuous Improvement: Enhance your Rag system over time using user feedback and data analysis.
|
13 |
+
- Multi-agents and Agent-services: Explore advanced architectures for distributed and collaborative Rag systems.
|
14 |
+
- Monitoring and Security: Ensure the robustness and trustworthiness of your Rag system with proper monitoring and security practices.
|
15 |
+
|
16 |
+
What you'll find here:
|
17 |
+
|
18 |
+
- Code examples: Implementations of key concepts from each topic, ready to use and adapt.
|
19 |
+
- Data samples: Pre-prepared data sets for experimentation and testing.
|
20 |
+
- Additional resources: Links to relevant articles, libraries, and tools to deepen your understanding.
|
21 |
+
|
22 |
+
Getting started:
|
23 |
+
|
24 |
+
- Clone this repository: git clone https://github.com/<username>/advanced-enterprise-rag-systems.git
|
25 |
+
- Follow the instructions in each topic directory.
|
26 |
+
|
27 |
+
Contributing:
|
28 |
+
|
29 |
+
We welcome your contributions! Share your expertise, improve existing code examples, or add new ones. Submit a pull request to share your valuable additions.
|
30 |
+
|
31 |
+
License:
|
32 |
+
|
33 |
+
This project is licensed under the MIT License: LICENSE.
|
app_gui.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import Gradio for UI, along with other necessary libraries
|
2 |
+
import gradio as gr
|
3 |
+
# need to import the qa!
|
4 |
+
|
5 |
+
# Function to add a new input to the chat history
|
6 |
+
def add_text(history, text):
|
7 |
+
# Append the new text to the history with a placeholder for the response
|
8 |
+
history = history + [(text, None)]
|
9 |
+
return history, ""
|
10 |
+
|
11 |
+
# Function representing the bot's response mechanism
|
12 |
+
def bot(history):
|
13 |
+
# Obtain the response from the 'infer' function using the latest input
|
14 |
+
response = infer(history[-1][0], history)
|
15 |
+
sources = [doc.metadata.get("source") for doc in response['source_documents']]
|
16 |
+
src_list = '\n'.join(sources)
|
17 |
+
print_this = response['result'] + "\n\n\n Sources: \n\n\n" + src_list
|
18 |
+
|
19 |
+
|
20 |
+
history[-1][1] = print_this #response['answer']
|
21 |
+
# Update the history with the bot's response
|
22 |
+
#history[-1][1] = response['result']
|
23 |
+
return history
|
24 |
+
|
25 |
+
# Function to infer the response using the RAG model
|
26 |
+
def infer(question, history):
|
27 |
+
# Use the question and history to query the RAG model
|
28 |
+
result = qa({"query": question, "history": history, "question": question})
|
29 |
+
return result
|
30 |
+
|
31 |
+
# CSS styling for the Gradio interface
|
32 |
+
css = """
|
33 |
+
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
34 |
+
"""
|
35 |
+
|
36 |
+
# HTML content for the Gradio interface title
|
37 |
+
title = """
|
38 |
+
<div style="text-align:left;">
|
39 |
+
<p>Hello, I BotTina 2.0, your intelligent AI assistant. I can help you explore Wuerttembergische Versicherungs products.<br />
|
40 |
+
</div>
|
41 |
+
"""
|
42 |
+
|
43 |
+
# Building the Gradio interface
|
44 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
45 |
+
with gr.Column(elem_id="col-container"):
|
46 |
+
gr.HTML(title) # Add the HTML title to the interface
|
47 |
+
chatbot = gr.Chatbot([], elem_id="chatbot",
|
48 |
+
bubble_full_width=False,
|
49 |
+
avatar_images=(None, "https://dacodi-production.s3.amazonaws.com/store/87bc00b6727589462954f2e3ff6f531c.png"),
|
50 |
+
height=680,) # Initialize the chatbot component
|
51 |
+
clear = gr.Button("Clear") # Add a button to clear the chat
|
52 |
+
|
53 |
+
# Create a row for the question input
|
54 |
+
with gr.Row():
|
55 |
+
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
|
56 |
+
|
57 |
+
# Define the action when the question is submitted
|
58 |
+
question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
|
59 |
+
bot, chatbot, chatbot
|
60 |
+
)
|
61 |
+
# Define the action for the clear button
|
62 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
63 |
+
|
64 |
+
# Launch the Gradio demo interface
|
65 |
+
demo.launch(share=False, debug=True)
|
core-langchain-rag.py
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing necessary libraries
|
2 |
+
import sys
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
|
6 |
+
# # Importing RecursiveUrlLoader for web scraping and BeautifulSoup for HTML parsing
|
7 |
+
# from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
8 |
+
# from bs4 import BeautifulSoup as Soup
|
9 |
+
# import mimetypes
|
10 |
+
|
11 |
+
# # List of URLs to scrape
|
12 |
+
# urls = ["https://langchain-doc.readthedocs.io/en/latest"]
|
13 |
+
|
14 |
+
# # Initialize an empty list to store the documents
|
15 |
+
# docs = []
|
16 |
+
|
17 |
+
# # Looping through each URL in the list - this could take some time!
|
18 |
+
# stf = time.time() # Start time for performance measurement
|
19 |
+
# for url in urls:
|
20 |
+
# try:
|
21 |
+
# st = time.time() # Start time for performance measurement
|
22 |
+
# # Create a RecursiveUrlLoader instance with a specified URL and depth
|
23 |
+
# # The extractor function uses BeautifulSoup to parse the HTML content and extract text
|
24 |
+
# loader = RecursiveUrlLoader(url=url, max_depth=5, extractor=lambda x: Soup(x, "html.parser").text)
|
25 |
+
|
26 |
+
# # Load the documents from the URL and extend the docs list
|
27 |
+
# docs.extend(loader.load())
|
28 |
+
|
29 |
+
# et = time.time() - st # Calculate time taken for splitting
|
30 |
+
# print(f'Time taken for downloading documents from {url}: {et} seconds.')
|
31 |
+
# except Exception as e:
|
32 |
+
# # Print an error message if there is an issue with loading or parsing the URL
|
33 |
+
# print(f"Failed to load or parse the URL {url}. Error: {e}", file=sys.stderr)
|
34 |
+
# etf = time.time() - stf # Calculate time taken for splitting
|
35 |
+
# print(f'Total time taken for downloading {len(docs)} documents: {etf} seconds.')
|
36 |
+
|
37 |
+
# # Import necessary modules for text splitting and vectorization
|
38 |
+
# from langchain.text_splitter import RecursiveCharacterTextSplitter
|
39 |
+
# import time
|
40 |
+
# from langchain_community.vectorstores import FAISS
|
41 |
+
# from langchain.vectorstores.utils import filter_complex_metadata
|
42 |
+
# from langchain_community.embeddings import HuggingFaceEmbeddings
|
43 |
+
|
44 |
+
# # Configure the text splitter
|
45 |
+
# text_splitter = RecursiveCharacterTextSplitter(
|
46 |
+
# separators=["\n\n", "\n", "(?<=\. )", " ", ""], # Define the separators for splitting text
|
47 |
+
# chunk_size=500, # The size of each text chunk
|
48 |
+
# chunk_overlap=50, # Overlap between chunks to ensure continuity
|
49 |
+
# length_function=len, # Function to determine the length of each chunk
|
50 |
+
# )
|
51 |
+
|
52 |
+
# try:
|
53 |
+
# # Stage one: Splitting the documents into chunks for vectorization
|
54 |
+
# st = time.time() # Start time for performance measurement
|
55 |
+
# print('Loading documents and creating chunks ...')
|
56 |
+
# # Split each document into chunks using the configured text splitter
|
57 |
+
# chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
|
58 |
+
# et = time.time() - st # Calculate time taken for splitting
|
59 |
+
# print(f"created "+chunks+" chunks")
|
60 |
+
# print(f'Time taken for document chunking: {et} seconds.')
|
61 |
+
# except Exception as e:
|
62 |
+
# print(f"Error during document chunking: {e}", file=sys.stderr)
|
63 |
+
|
64 |
+
# # Path for saving the FAISS index
|
65 |
+
# FAISS_INDEX_PATH = "./vectorstore/lc-faiss-multi-mpnet-500"
|
66 |
+
|
67 |
+
# try:
|
68 |
+
# # Stage two: Vectorization of the document chunks
|
69 |
+
# model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1" # Model used for embedding
|
70 |
+
|
71 |
+
# # Initialize HuggingFace embeddings with the specified model
|
72 |
+
# embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
73 |
+
|
74 |
+
# print(f'Loading chunks into vector store ...')
|
75 |
+
# st = time.time() # Start time for performance measurement
|
76 |
+
# # Create a FAISS vector store from the document chunks and save it locally
|
77 |
+
# db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)
|
78 |
+
# db.save_local(FAISS_INDEX_PATH)
|
79 |
+
# et = time.time() - st # Calculate time taken for vectorization
|
80 |
+
# print(f'Time taken for vectorization and saving: {et} seconds.')
|
81 |
+
# except Exception as e:
|
82 |
+
# print(f"Error during vectorization or FAISS index saving: {e}", file=sys.stderr)
|
83 |
+
|
84 |
+
# alternatively download a preparaed vectorized index from S3 and load the index into vectorstore
|
85 |
+
# Import necessary libraries for AWS S3 interaction, file handling, and FAISS vector stores
|
86 |
+
import boto3
|
87 |
+
from botocore import UNSIGNED
|
88 |
+
from botocore.client import Config
|
89 |
+
import zipfile
|
90 |
+
from langchain_community.vectorstores import FAISS
|
91 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
92 |
+
from dotenv import load_dotenv
|
93 |
+
|
94 |
+
# Load environment variables from a .env file
|
95 |
+
config = load_dotenv(".env")
|
96 |
+
|
97 |
+
# Retrieve the Hugging Face API token from environment variables
|
98 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
99 |
+
S3_LOCATION = os.getenv("S3_LOCATION")
|
100 |
+
S3_FILE_NAME = os.getenv("FAISS_VS_NAME")
|
101 |
+
FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH")
|
102 |
+
|
103 |
+
# try:
|
104 |
+
# # Initialize an S3 client with unsigned configuration for public access
|
105 |
+
# s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
106 |
+
|
107 |
+
# # Define the FAISS index path and the destination for the downloaded file
|
108 |
+
# #FAISS_INDEX_PATH = './vectorstore/lc-faiss-multi-mpnet-500-markdown'
|
109 |
+
# VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
|
110 |
+
|
111 |
+
# # Download the pre-prepared vectorized index from the S3 bucket
|
112 |
+
# print("Downloading the pre-prepared vectorized index from S3...")
|
113 |
+
# s3.download_file(S3_LOCATION, S3_FILE_NAME, VS_DESTINATION)
|
114 |
+
|
115 |
+
# # Extract the downloaded zip file
|
116 |
+
# with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
117 |
+
# zip_ref.extractall('./vectorstore/')
|
118 |
+
# print("Download and extraction completed.")
|
119 |
+
|
120 |
+
# except Exception as e:
|
121 |
+
# print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
|
122 |
+
|
123 |
+
# Define the model name for embeddings
|
124 |
+
model_name = os.getenv("EMBEDDING_MODEL")
|
125 |
+
|
126 |
+
try:
|
127 |
+
# Initialize HuggingFace embeddings with the specified model
|
128 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
129 |
+
|
130 |
+
# Load the local FAISS index with the specified embeddings
|
131 |
+
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
|
132 |
+
print("FAISS index loaded successfully.")
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Error during FAISS index loading: {e}", file=sys.stderr)
|
135 |
+
|
136 |
+
# Import necessary modules for environment variable management and HuggingFace integration
|
137 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
138 |
+
|
139 |
+
# Initialize the vector store as a retriever for the RAG pipeline
|
140 |
+
retriever = db.as_retriever()#search_type="mmr", search_kwargs={'k': 3, 'lambda_mult': 0.25})
|
141 |
+
|
142 |
+
llm_model = os.getenv("LLM_MODEL")
|
143 |
+
|
144 |
+
try:
|
145 |
+
# Load the model from the Hugging Face Hub
|
146 |
+
model_id = HuggingFaceEndpoint(repo_id=llm_model,
|
147 |
+
temperature=0.1, # Controls randomness in response generation (lower value means less random)
|
148 |
+
max_new_tokens=1024, # Maximum number of new tokens to generate in responses
|
149 |
+
repetition_penalty=1.2, # Penalty for repeating the same words (higher value increases penalty)
|
150 |
+
return_full_text=False # If False, only the newly generated text is returned; if True, the input is included as well
|
151 |
+
)
|
152 |
+
print("Model loaded successfully from Hugging Face Hub.")
|
153 |
+
except Exception as e:
|
154 |
+
print(f"Error loading model from Hugging Face Hub: {e}", file=sys.stderr)
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
# Importing necessary modules for retrieval-based question answering and prompt handling
|
159 |
+
from langchain.chains import RetrievalQA
|
160 |
+
from langchain.chains import LLMChain
|
161 |
+
from langchain_core.prompts import PromptTemplate
|
162 |
+
from langchain.memory import ConversationBufferMemory
|
163 |
+
from langchain_core.output_parsers import StrOutputParser
|
164 |
+
|
165 |
+
# Declare a global variable 'qa' for the retrieval-based question answering system
|
166 |
+
global qa
|
167 |
+
|
168 |
+
# Define a prompt template for guiding the model's responses
|
169 |
+
template = """
|
170 |
+
You are a friendly insurance product advisor, your task is to help customers find the best products from Württembergische GmbH.\
|
171 |
+
You help the user find the answers to all his questions queries. Answer in short and simple terms and offer to explain the product and terms to the user.\
|
172 |
+
Use the following context (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) to help find the best product for the user:
|
173 |
+
------
|
174 |
+
<ctx>
|
175 |
+
{context}
|
176 |
+
</ctx>
|
177 |
+
------
|
178 |
+
<hs>
|
179 |
+
{history}
|
180 |
+
</hs>
|
181 |
+
------
|
182 |
+
{question}
|
183 |
+
Answer:
|
184 |
+
"""
|
185 |
+
|
186 |
+
# Create a PromptTemplate object with specified input variables and the defined template
|
187 |
+
prompt = PromptTemplate.from_template(
|
188 |
+
#input_variables=["history", "context", "question"], # Variables to be included in the prompt
|
189 |
+
template=template, # The prompt template as defined above
|
190 |
+
)
|
191 |
+
prompt.format(context="context", history="history", question="question")
|
192 |
+
# Create a memory buffer to manage conversation history
|
193 |
+
memory = ConversationBufferMemory(
|
194 |
+
memory_key="history", # Key for storing the conversation history
|
195 |
+
input_key="question" # Key for the input question
|
196 |
+
)
|
197 |
+
|
198 |
+
# Initialize the RetrievalQA object with the specified model, retriever, and additional configurations
|
199 |
+
qa = RetrievalQA.from_chain_type(
|
200 |
+
llm=model_id, # Language model loaded from Hugging Face Hub
|
201 |
+
retriever=retriever, # The vector store retriever initialized earlier
|
202 |
+
return_source_documents=True, # Option to return source documents along with responses
|
203 |
+
chain_type_kwargs={
|
204 |
+
"verbose": True, # Enables verbose output for debugging and analysis
|
205 |
+
"memory": memory, # Memory buffer for managing conversation history
|
206 |
+
"prompt": prompt # Prompt template for guiding the model's responses
|
207 |
+
}
|
208 |
+
)
|
209 |
+
|
210 |
+
def generate_qa_retriever(history: dict, question: str, llm_model:HuggingFaceEndpoint = model_id) -> dict:
|
211 |
+
""" Generare a response to queries using the retriever"""
|
212 |
+
|
213 |
+
# Define a prompt template for guiding the model's responses
|
214 |
+
template = """
|
215 |
+
You are a friendly insurance product advisor, your task is to help customers find the best products from Württembergische GmbH.\
|
216 |
+
You help the user find the answers to all his questions. Answer in short and simple terms and offer to explain the product and terms to the user.\
|
217 |
+
Respond only using the provided context (delimited by <ctx></ctx>) and only in German or Englisch, depending on the question's language.
|
218 |
+
Use the chat history (delimited by <hs></hs>) to help find the best product for the user:
|
219 |
+
------
|
220 |
+
<ctx>
|
221 |
+
{context}
|
222 |
+
</ctx>
|
223 |
+
------
|
224 |
+
<hs>
|
225 |
+
{history}
|
226 |
+
</hs>
|
227 |
+
------
|
228 |
+
{question}
|
229 |
+
Answer:
|
230 |
+
"""
|
231 |
+
|
232 |
+
# Create a PromptTemplate object with specified input variables and the defined template
|
233 |
+
prompt = PromptTemplate.from_template(
|
234 |
+
template=template, # The prompt template as defined above
|
235 |
+
)
|
236 |
+
prompt.format(context="context", history="history", question="question")
|
237 |
+
# Create a memory buffer to manage conversation history
|
238 |
+
memory = ConversationBufferMemory(
|
239 |
+
memory_key="history", # Key for storing the conversation history
|
240 |
+
input_key="question" # Key for the input question
|
241 |
+
)
|
242 |
+
|
243 |
+
llm_chain = prompt | llm_model
|
244 |
+
result = llm_chain.invoke({"context": retriever, "history": history, "question": question})
|
245 |
+
print(result)
|
246 |
+
return result
|
247 |
+
|
248 |
+
# Import Gradio for UI, along with other necessary libraries
|
249 |
+
import gradio as gr
|
250 |
+
|
251 |
+
# Function to add a new input to the chat history
|
252 |
+
def add_text(history, text):
|
253 |
+
# Append the new text to the history with a placeholder for the response
|
254 |
+
history = history + [(text, None)]
|
255 |
+
return history, ""
|
256 |
+
|
257 |
+
# Function representing the bot's response mechanism
|
258 |
+
def bot(history):
|
259 |
+
# Obtain the response from the 'infer' function using the latest input
|
260 |
+
response = infer(history[-1][0], history)
|
261 |
+
sources = [doc.metadata.get("source") for doc in response['source_documents']]
|
262 |
+
src_list = '\n'.join(sources)
|
263 |
+
print_this = response['result'] + "\n\n\n Sources: \n\n\n" + src_list
|
264 |
+
|
265 |
+
|
266 |
+
#history[-1][1] = response #print_this #response['answer']
|
267 |
+
history[-1][1] = print_this #response['answer']
|
268 |
+
# Update the history with the bot's response
|
269 |
+
#history[-1][1] = response['result']
|
270 |
+
return history
|
271 |
+
|
272 |
+
# Function to infer the response using the RAG model
|
273 |
+
def infer(question, history):
|
274 |
+
# Use the question and history to query the RAG model
|
275 |
+
#result = generate_qa_retriever(history, question)
|
276 |
+
result = qa({"query": question, "history": history, "question": question})
|
277 |
+
print(*result)
|
278 |
+
return result
|
279 |
+
|
280 |
+
# CSS styling for the Gradio interface
|
281 |
+
css = """
|
282 |
+
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
283 |
+
"""
|
284 |
+
|
285 |
+
# HTML content for the Gradio interface title
|
286 |
+
title = """
|
287 |
+
<div style="text-align:left;">
|
288 |
+
<p>Hello, I BotTina 2.0, your intelligent AI assistant. I can help you explore Wuerttembergische Versicherungs products.<br />
|
289 |
+
</div>
|
290 |
+
"""
|
291 |
+
|
292 |
+
# Building the Gradio interface
|
293 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
294 |
+
with gr.Column(elem_id="col-container"):
|
295 |
+
gr.HTML(title) # Add the HTML title to the interface
|
296 |
+
chatbot = gr.Chatbot([], elem_id="chatbot",
|
297 |
+
label="BotTina 2.0",
|
298 |
+
bubble_full_width=False,
|
299 |
+
avatar_images=(None, "https://dacodi-production.s3.amazonaws.com/store/87bc00b6727589462954f2e3ff6f531c.png"),
|
300 |
+
height=680,) # Initialize the chatbot component
|
301 |
+
clear = gr.Button("Clear") # Add a button to clear the chat
|
302 |
+
|
303 |
+
# Create a row for the question input
|
304 |
+
with gr.Row():
|
305 |
+
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
|
306 |
+
|
307 |
+
# Define the action when the question is submitted
|
308 |
+
question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
|
309 |
+
bot, chatbot, chatbot
|
310 |
+
)
|
311 |
+
# Define the action for the clear button
|
312 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
313 |
+
|
314 |
+
# Launch the Gradio demo interface
|
315 |
+
demo.launch(debug=True)
|
docs/template.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Template
|
2 |
+
|
3 |
+
1. **Architecture of advanced RAG**
|
4 |
+
2. **Test setup and Evaluation metrics**
|
5 |
+
3. **Data preparation (vectorization & chunking)**
|
6 |
+
4. **Search indexing**
|
7 |
+
5. **Query transformation**
|
8 |
+
6. **Chat logic and query routing**
|
9 |
+
7. **Multi agents and agent-services**
|
10 |
+
8. **Monitoring responses and adding security**
|
11 |
+
|
12 |
+
## Additional Resources
|
13 |
+
|
14 |
+
[Enterprise Rag](https://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-systemhttps://www.rungalileo.io/blog/mastering-rag-how-to-architect-an-enterprise-rag-system)
|
15 |
+
|
16 |
+
[Advanced RAG](https://medium.com/towards-artificial-intelligence/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)
|
docs/workflow-advanced-rag.drawio
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<mxfile host="app.diagrams.net" modified="2024-02-02T11:21:08.029Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0" etag="EvpGiXuqtWkE4FAqL8_g" version="22.1.21" type="github">
|
2 |
+
<diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">
|
3 |
+
<mxGraphModel dx="1434" dy="774" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
4 |
+
<root>
|
5 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-0" />
|
6 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-1" parent="WIyWlLk6GJQsqaUBKTNV-0" />
|
7 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-1" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-3" target="eFb6EC-VP60E3mpf6WAh-0">
|
8 |
+
<mxGeometry relative="1" as="geometry" />
|
9 |
+
</mxCell>
|
10 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-3" value="User Authentication and Input" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
11 |
+
<mxGeometry x="24" y="80" width="120" height="40" as="geometry" />
|
12 |
+
</mxCell>
|
13 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="WIyWlLk6GJQsqaUBKTNV-11">
|
14 |
+
<mxGeometry relative="1" as="geometry" />
|
15 |
+
</mxCell>
|
16 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-7" value="Query Processing" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#d5e8d4;strokeColor=#82b366;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
17 |
+
<mxGeometry x="180" y="160" width="120" height="40" as="geometry" />
|
18 |
+
</mxCell>
|
19 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-11" target="eFb6EC-VP60E3mpf6WAh-3">
|
20 |
+
<mxGeometry relative="1" as="geometry" />
|
21 |
+
</mxCell>
|
22 |
+
<mxCell id="WIyWlLk6GJQsqaUBKTNV-11" value="Data Preparation and Management" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#ffe6cc;strokeColor=#d79b00;" parent="WIyWlLk6GJQsqaUBKTNV-1" vertex="1">
|
23 |
+
<mxGeometry x="330" y="160" width="120" height="40" as="geometry" />
|
24 |
+
</mxCell>
|
25 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="WIyWlLk6GJQsqaUBKTNV-7">
|
26 |
+
<mxGeometry relative="1" as="geometry" />
|
27 |
+
</mxCell>
|
28 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-0" value="Input Guardrails" style="whiteSpace=wrap;html=1;rounded=1;glass=0;strokeWidth=1;shadow=0;fillColor=#f8cecc;strokeColor=#b85450;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
29 |
+
<mxGeometry x="24" y="160" width="120" height="40" as="geometry" />
|
30 |
+
</mxCell>
|
31 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-3" target="eFb6EC-VP60E3mpf6WAh-5">
|
32 |
+
<mxGeometry relative="1" as="geometry" />
|
33 |
+
</mxCell>
|
34 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-3" value="Retrieval System" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
35 |
+
<mxGeometry x="480" y="160" width="120" height="40" as="geometry" />
|
36 |
+
</mxCell>
|
37 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-9">
|
38 |
+
<mxGeometry relative="1" as="geometry" />
|
39 |
+
</mxCell>
|
40 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=0.75;entryY=1;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-5" target="eFb6EC-VP60E3mpf6WAh-6">
|
41 |
+
<mxGeometry relative="1" as="geometry">
|
42 |
+
<Array as="points">
|
43 |
+
<mxPoint x="630" y="130" />
|
44 |
+
<mxPoint x="450" y="130" />
|
45 |
+
</Array>
|
46 |
+
</mxGeometry>
|
47 |
+
</mxCell>
|
48 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-5" value="Information Processing and Augmentation" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#fff2cc;strokeColor=#d6b656;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
49 |
+
<mxGeometry x="630" y="160" width="120" height="40" as="geometry" />
|
50 |
+
</mxCell>
|
51 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-6" value="Observability and Feedback" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
52 |
+
<mxGeometry x="360" y="40" width="120" height="40" as="geometry" />
|
53 |
+
</mxCell>
|
54 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.98;exitY=0.02;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.575;entryY=1;entryDx=0;entryDy=0;dashed=1;entryPerimeter=0;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-8" target="eFb6EC-VP60E3mpf6WAh-3">
|
55 |
+
<mxGeometry relative="1" as="geometry" />
|
56 |
+
</mxCell>
|
57 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-8" value="Caching" style="strokeWidth=2;html=1;shape=mxgraph.flowchart.multi-document;whiteSpace=wrap;fillColor=#ffe6cc;strokeColor=#d79b00;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
58 |
+
<mxGeometry x="376" y="240" width="88" height="60" as="geometry" />
|
59 |
+
</mxCell>
|
60 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-6">
|
61 |
+
<mxGeometry relative="1" as="geometry" />
|
62 |
+
</mxCell>
|
63 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-9" value="Output and Response" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;glass=0;strokeWidth=1;shadow=0;fillColor=#e1d5e7;strokeColor=#9673a6;" vertex="1" parent="WIyWlLk6GJQsqaUBKTNV-1">
|
64 |
+
<mxGeometry x="630" y="80" width="120" height="40" as="geometry" />
|
65 |
+
</mxCell>
|
66 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0;exitDx=0;exitDy=0;entryX=0.325;entryY=0.975;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-0" target="eFb6EC-VP60E3mpf6WAh-6">
|
67 |
+
<mxGeometry relative="1" as="geometry">
|
68 |
+
<Array as="points">
|
69 |
+
<mxPoint x="144" y="130" />
|
70 |
+
<mxPoint x="399" y="130" />
|
71 |
+
</Array>
|
72 |
+
</mxGeometry>
|
73 |
+
</mxCell>
|
74 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.75;exitY=1;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="WIyWlLk6GJQsqaUBKTNV-7" target="eFb6EC-VP60E3mpf6WAh-8">
|
75 |
+
<mxGeometry relative="1" as="geometry" />
|
76 |
+
</mxCell>
|
77 |
+
<mxCell id="eFb6EC-VP60E3mpf6WAh-20" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;dashed=1;" edge="1" parent="WIyWlLk6GJQsqaUBKTNV-1" source="eFb6EC-VP60E3mpf6WAh-9" target="eFb6EC-VP60E3mpf6WAh-8">
|
78 |
+
<mxGeometry relative="1" as="geometry" />
|
79 |
+
</mxCell>
|
80 |
+
</root>
|
81 |
+
</mxGraphModel>
|
82 |
+
</diagram>
|
83 |
+
</mxfile>
|
example.env
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# API Keys for services
|
2 |
+
HUGGINGFACEHUB_API_TOKEN=""
|
3 |
+
GOOGLE_CSE_ID=""
|
4 |
+
GOOGLE_API_KEY=""
|
5 |
+
|
6 |
+
# AWS S3 object storage
|
7 |
+
S3_LOCATION=""
|
8 |
+
S3_FILE_NAME=""
|
9 |
+
|
10 |
+
# Local vectorstore storage
|
11 |
+
FAISS_INDEX_PATH = ""
|
12 |
+
|
13 |
+
# llm and embedding models
|
14 |
+
embedding_model=""
|
15 |
+
llm_model=""
|
mail_automation_draft.drawio
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
<mxfile host="app.diagrams.net" modified="2024-05-21T13:40:30.482Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0" etag="dPw-sfjJEinoEt61k41I" version="24.4.4" type="github">
|
2 |
-
<diagram name="Page-1" id="JIRIjCkc2eHKuGLTyx-l">
|
3 |
-
<mxGraphModel dx="1434" dy="-1564" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
4 |
-
<root>
|
5 |
-
<mxCell id="0" />
|
6 |
-
<mxCell id="1" parent="0" />
|
7 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-1" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-19" target="0ipz06AKfEBxF18oT8nj-6" edge="1">
|
8 |
-
<mxGeometry relative="1" as="geometry" />
|
9 |
-
</mxCell>
|
10 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-2" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-3" target="0ipz06AKfEBxF18oT8nj-31" edge="1">
|
11 |
-
<mxGeometry relative="1" as="geometry" />
|
12 |
-
</mxCell>
|
13 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-3" value="Ticket Creation" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
14 |
-
<mxGeometry x="217.5" y="2640" width="120" height="80" as="geometry" />
|
15 |
-
</mxCell>
|
16 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-4" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-6" target="0ipz06AKfEBxF18oT8nj-8" edge="1">
|
17 |
-
<mxGeometry relative="1" as="geometry" />
|
18 |
-
</mxCell>
|
19 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-5" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-6" target="0ipz06AKfEBxF18oT8nj-21" edge="1">
|
20 |
-
<mxGeometry relative="1" as="geometry" />
|
21 |
-
</mxCell>
|
22 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-6" value="Fetch Emails" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
23 |
-
<mxGeometry x="210.5" y="2480" width="120" height="80" as="geometry" />
|
24 |
-
</mxCell>
|
25 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-7" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-8" target="0ipz06AKfEBxF18oT8nj-10" edge="1">
|
26 |
-
<mxGeometry relative="1" as="geometry" />
|
27 |
-
</mxCell>
|
28 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-8" value="Entity Recognition" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
29 |
-
<mxGeometry x="410.5" y="2480" width="120" height="80" as="geometry" />
|
30 |
-
</mxCell>
|
31 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-9" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-10" target="0ipz06AKfEBxF18oT8nj-14" edge="1">
|
32 |
-
<mxGeometry relative="1" as="geometry" />
|
33 |
-
</mxCell>
|
34 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-10" value="Classification &amp; Summarization" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
35 |
-
<mxGeometry x="630" y="2480" width="120" height="80" as="geometry" />
|
36 |
-
</mxCell>
|
37 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-11" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-17" target="0ipz06AKfEBxF18oT8nj-15" edge="1">
|
38 |
-
<mxGeometry relative="1" as="geometry" />
|
39 |
-
</mxCell>
|
40 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-12" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-14" target="0ipz06AKfEBxF18oT8nj-17" edge="1">
|
41 |
-
<mxGeometry relative="1" as="geometry" />
|
42 |
-
</mxCell>
|
43 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-13" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-14" target="0ipz06AKfEBxF18oT8nj-24" edge="1">
|
44 |
-
<mxGeometry relative="1" as="geometry" />
|
45 |
-
</mxCell>
|
46 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-14" value="Data Enrichment" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
47 |
-
<mxGeometry x="630" y="2640" width="120" height="80" as="geometry" />
|
48 |
-
</mxCell>
|
49 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-15" value="Draft Response" style="ellipse;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" parent="1" vertex="1">
|
50 |
-
<mxGeometry x="410.5" y="2800" width="120" height="80" as="geometry" />
|
51 |
-
</mxCell>
|
52 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-16" value="" style="curved=1;endArrow=classic;html=1;rounded=0;exitX=0.75;exitY=1;exitDx=0;exitDy=0;entryX=0.25;entryY=1;entryDx=0;entryDy=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-19" target="0ipz06AKfEBxF18oT8nj-19" edge="1">
|
53 |
-
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
54 |
-
<mxPoint x="200" y="2730" as="sourcePoint" />
|
55 |
-
<mxPoint x="80" y="2580" as="targetPoint" />
|
56 |
-
<Array as="points">
|
57 |
-
<mxPoint x="130" y="2600" />
|
58 |
-
<mxPoint x="90" y="2610" />
|
59 |
-
<mxPoint x="60" y="2620" />
|
60 |
-
<mxPoint x="53" y="2570" />
|
61 |
-
</Array>
|
62 |
-
</mxGeometry>
|
63 |
-
</mxCell>
|
64 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-17" value="Data Complete" style="rhombus;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
65 |
-
<mxGeometry x="430.5" y="2640" width="80" height="80" as="geometry" />
|
66 |
-
</mxCell>
|
67 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-18" value="No" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" parent="1" vertex="1">
|
68 |
-
<mxGeometry x="460.5" y="2748" width="40" height="30" as="geometry" />
|
69 |
-
</mxCell>
|
70 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-19" value="Periodically Check Emails" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
|
71 |
-
<mxGeometry x="30" y="2490" width="120" height="60" as="geometry" />
|
72 |
-
</mxCell>
|
73 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-20" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-21" target="0ipz06AKfEBxF18oT8nj-22" edge="1">
|
74 |
-
<mxGeometry relative="1" as="geometry" />
|
75 |
-
</mxCell>
|
76 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-21" value="Download Attachements" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
77 |
-
<mxGeometry x="210.5" y="2360" width="120" height="60" as="geometry" />
|
78 |
-
</mxCell>
|
79 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-22" value="Extract Text and Images from PDF" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
80 |
-
<mxGeometry x="399.5" y="2360" width="120" height="60" as="geometry" />
|
81 |
-
</mxCell>
|
82 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-23" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.596;exitY=1.033;exitDx=0;exitDy=0;exitPerimeter=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-22" target="0ipz06AKfEBxF18oT8nj-8" edge="1">
|
83 |
-
<mxGeometry relative="1" as="geometry">
|
84 |
-
<mxPoint x="281" y="2490" as="sourcePoint" />
|
85 |
-
<mxPoint x="281" y="2430" as="targetPoint" />
|
86 |
-
</mxGeometry>
|
87 |
-
</mxCell>
|
88 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-24" value="CRM, SQL, Product KB" style="ellipse;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
89 |
-
<mxGeometry x="650" y="2800" width="80" height="80" as="geometry" />
|
90 |
-
</mxCell>
|
91 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-25" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-17" target="0ipz06AKfEBxF18oT8nj-3" edge="1">
|
92 |
-
<mxGeometry relative="1" as="geometry">
|
93 |
-
<mxPoint x="620.5" y="2690" as="sourcePoint" />
|
94 |
-
<mxPoint x="520.5" y="2690" as="targetPoint" />
|
95 |
-
</mxGeometry>
|
96 |
-
</mxCell>
|
97 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-26" value="Yes" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="0ipz06AKfEBxF18oT8nj-25" vertex="1" connectable="0">
|
98 |
-
<mxGeometry x="-0.1183" y="-5" relative="1" as="geometry">
|
99 |
-
<mxPoint as="offset" />
|
100 |
-
</mxGeometry>
|
101 |
-
</mxCell>
|
102 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-27" value="" style="endArrow=classic;html=1;rounded=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=1;entryY=0;entryDx=0;entryDy=0;" parent="1" source="0ipz06AKfEBxF18oT8nj-15" target="0ipz06AKfEBxF18oT8nj-8" edge="1">
|
103 |
-
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
104 |
-
<mxPoint x="520" y="2980" as="sourcePoint" />
|
105 |
-
<mxPoint x="570" y="2930" as="targetPoint" />
|
106 |
-
<Array as="points">
|
107 |
-
<mxPoint x="470" y="2930" />
|
108 |
-
<mxPoint x="770" y="2930" />
|
109 |
-
<mxPoint x="770" y="2690" />
|
110 |
-
<mxPoint x="770" y="2450" />
|
111 |
-
<mxPoint x="513" y="2450" />
|
112 |
-
</Array>
|
113 |
-
</mxGeometry>
|
114 |
-
</mxCell>
|
115 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-28" value="Mail / WhatsApp /..." style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="0ipz06AKfEBxF18oT8nj-27" vertex="1" connectable="0">
|
116 |
-
<mxGeometry x="-0.6717" y="3" relative="1" as="geometry">
|
117 |
-
<mxPoint as="offset" />
|
118 |
-
</mxGeometry>
|
119 |
-
</mxCell>
|
120 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-29" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="0ipz06AKfEBxF18oT8nj-31" target="0ipz06AKfEBxF18oT8nj-32" edge="1">
|
121 |
-
<mxGeometry relative="1" as="geometry" />
|
122 |
-
</mxCell>
|
123 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-30" value="slack / discord / teams" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="0ipz06AKfEBxF18oT8nj-29" vertex="1" connectable="0">
|
124 |
-
<mxGeometry x="-0.3" y="2" relative="1" as="geometry">
|
125 |
-
<mxPoint as="offset" />
|
126 |
-
</mxGeometry>
|
127 |
-
</mxCell>
|
128 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-31" value="Fetch Relevant historical data &amp; BP" style="whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
129 |
-
<mxGeometry x="217.5" y="2810" width="120" height="60" as="geometry" />
|
130 |
-
</mxCell>
|
131 |
-
<mxCell id="0ipz06AKfEBxF18oT8nj-32" value="Recommendations to Agent" style="whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" parent="1" vertex="1">
|
132 |
-
<mxGeometry x="217.5" y="2950" width="120" height="60" as="geometry" />
|
133 |
-
</mxCell>
|
134 |
-
<mxCell id="1TZoFbZY8AwqjJc08LRQ-1" value="email, customer name, contract id, date, category, summary, issue category,T&amp;C, Process descriptions, customer history, issue summary" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;" vertex="1" parent="1">
|
135 |
-
<mxGeometry x="265" y="3028" width="770" height="30" as="geometry" />
|
136 |
-
</mxCell>
|
137 |
-
</root>
|
138 |
-
</mxGraphModel>
|
139 |
-
</diagram>
|
140 |
-
</mxfile>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_app/create_embedding.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# embeddings functions
|
2 |
+
#from langchain_community.vectorstores import FAISS
|
3 |
+
#from langchain_community.document_loaders import ReadTheDocsLoader
|
4 |
+
#from langchain_community.vectorstores.utils import filter_complex_metadata
|
5 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
7 |
+
import time
|
8 |
+
from langchain_core.documents import Document
|
9 |
+
|
10 |
+
|
11 |
+
def create_embeddings(
|
12 |
+
docs: list[Document],
|
13 |
+
chunk_size:int = 500,
|
14 |
+
chunk_overlap:int = 50,
|
15 |
+
embedding_model: str = "sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
16 |
+
):
|
17 |
+
"""given a sequence of `Document` objects this fucntion will
|
18 |
+
generate embeddings for it.
|
19 |
+
|
20 |
+
## argument
|
21 |
+
:params docs (list[Document]) -> list of `list[Document]`
|
22 |
+
:params chunk_size (int) -> chunk size in which documents are chunks, defaults to 500
|
23 |
+
:params chunk_overlap (int) -> the amount of token that will be overlapped between chunks, defaults to 50
|
24 |
+
:params embedding_model (str) -> the huggingspace model that will embed the documents
|
25 |
+
## Return
|
26 |
+
Tuple of embedding and chunks
|
27 |
+
"""
|
28 |
+
|
29 |
+
|
30 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
31 |
+
separators=["\n\n", "\n", "(?<=\. )", " ", ""],
|
32 |
+
chunk_size = chunk_size,
|
33 |
+
chunk_overlap = chunk_overlap,
|
34 |
+
length_function = len,
|
35 |
+
)
|
36 |
+
|
37 |
+
# Stage one: read all the docs, split them into chunks.
|
38 |
+
st = time.time()
|
39 |
+
print('Loading documents and creating chunks ...')
|
40 |
+
|
41 |
+
# Split each document into chunks using the configured text splitter
|
42 |
+
chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
|
43 |
+
et = time.time() - st
|
44 |
+
print(f'Time taken to chunk {len(docs)} documents: {et} seconds.')
|
45 |
+
|
46 |
+
#Stage two: embed the docs.
|
47 |
+
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
|
48 |
+
print(f"created a total of {len(chunks)} chunks")
|
49 |
+
|
50 |
+
return embeddings,chunks
|
rag_app/generate_summary.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
2 |
+
from langchain_core.prompts import PromptTemplate
|
3 |
+
from langchain_core.output_parsers import StrOutputParser
|
4 |
+
import json
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import os
|
7 |
+
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
HF_API_TOKEN = os.getenv('HUGGINGFACE_API_TOKEN')
|
11 |
+
model_id=os.getenv('LLM_MODEL')
|
12 |
+
|
13 |
+
LLM = HuggingFaceEndpoint(
|
14 |
+
repo_id=model_id,
|
15 |
+
temperature=0.1,
|
16 |
+
max_new_tokens=512,
|
17 |
+
repetition_penalty=1.2,
|
18 |
+
return_full_text=False,
|
19 |
+
huggingfacehub_api_token=HF_API_TOKEN)
|
20 |
+
|
21 |
+
def generate_keywords(document:dict,
|
22 |
+
llm_model:HuggingFaceEndpoint = LLM) -> str:
|
23 |
+
""" Generate a meaningful list of meta keywords for the provided document or chunk"""
|
24 |
+
|
25 |
+
template = (
|
26 |
+
"""
|
27 |
+
You are a SEO expert bot. Your task is to craft a meaningful list of 5 keywords to organize documents.
|
28 |
+
The keywords should help us in searching and retrieving the documents later.
|
29 |
+
|
30 |
+
You will only respond with the clear, concise and meaningful 5 of keywords separated by comma.
|
31 |
+
|
32 |
+
<<<
|
33 |
+
Document: {document}
|
34 |
+
>>>
|
35 |
+
|
36 |
+
Keywords:
|
37 |
+
"""
|
38 |
+
)
|
39 |
+
|
40 |
+
prompt = PromptTemplate.from_template(template=template)
|
41 |
+
|
42 |
+
chain = prompt | llm_model | StrOutputParser()
|
43 |
+
result = chain.invoke({'document': document})
|
44 |
+
return result.strip()
|
45 |
+
|
46 |
+
def generate_description(document:dict,
|
47 |
+
llm_model:HuggingFaceEndpoint = LLM) -> str:
|
48 |
+
""" Generate a meaningful document description based on document content """
|
49 |
+
|
50 |
+
template = (
|
51 |
+
"""
|
52 |
+
You are a SEO expert bot. Your task is to craft a meaningful summary to descripe and organize documents.
|
53 |
+
The description should be a meaningful summary of the document's content and help us in searching and retrieving the documents later.
|
54 |
+
|
55 |
+
You will only respond with the clear, concise and meaningful description.
|
56 |
+
|
57 |
+
<<<
|
58 |
+
Document: {document}
|
59 |
+
>>>
|
60 |
+
|
61 |
+
Description:
|
62 |
+
"""
|
63 |
+
)
|
64 |
+
|
65 |
+
prompt = PromptTemplate.from_template(template=template)
|
66 |
+
|
67 |
+
chain = prompt | llm_model | StrOutputParser()
|
68 |
+
result = chain.invoke({'document': document})
|
69 |
+
return result.strip()
|
rag_app/get_db_retriever.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# retriever and qa_chain function
|
2 |
+
|
3 |
+
# HF libraries
|
4 |
+
from langchain.llms import HuggingFaceHub
|
5 |
+
from langchain.embeddings import HuggingFaceHubEmbeddings
|
6 |
+
# vectorestore
|
7 |
+
from langchain.vectorstores import FAISS
|
8 |
+
# retrieval chain
|
9 |
+
from langchain.chains import RetrievalQA
|
10 |
+
# prompt template
|
11 |
+
from langchain.prompts import PromptTemplate
|
12 |
+
from langchain.memory import ConversationBufferMemory
|
13 |
+
|
14 |
+
|
15 |
+
def get_db_retriever(vector_db:str=None):
|
16 |
+
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
17 |
+
embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
|
18 |
+
|
19 |
+
#db = Chroma(persist_directory="./vectorstore/lc-chroma-multi-mpnet-500", embedding_function=embeddings)
|
20 |
+
#db.get()
|
21 |
+
if not vector_db:
|
22 |
+
FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
|
23 |
+
else:
|
24 |
+
FAISS_INDEX_PATH=vector_db
|
25 |
+
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
26 |
+
|
27 |
+
retriever = db.as_retriever()
|
28 |
+
|
29 |
+
return retriever
|
rag_app/handle_vector_store.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vectorization functions
|
2 |
+
from langchain_community.vectorstores import FAISS
|
3 |
+
from langchain_community.vectorstores import Chroma
|
4 |
+
from langchain_community.document_loaders import ReadTheDocsLoader
|
5 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
7 |
+
from langchain_community.retrievers import BM25Retriever
|
8 |
+
from rag_app.create_embedding import create_embeddings
|
9 |
+
from rag_app.generate_summary import generate_description, generate_keywords
|
10 |
+
import time
|
11 |
+
import os
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
|
14 |
+
def build_vector_store(
|
15 |
+
docs: list,
|
16 |
+
db_path: str,
|
17 |
+
embedding_model: str,
|
18 |
+
new_db:bool=False,
|
19 |
+
chunk_size:int=500,
|
20 |
+
chunk_overlap:int=50,
|
21 |
+
):
|
22 |
+
"""
|
23 |
+
|
24 |
+
"""
|
25 |
+
|
26 |
+
if db_path is None:
|
27 |
+
FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH")
|
28 |
+
else:
|
29 |
+
FAISS_INDEX_PATH = db_path
|
30 |
+
|
31 |
+
embeddings,chunks = create_embeddings(docs, chunk_size, chunk_overlap, embedding_model)
|
32 |
+
for chunk in chunks:
|
33 |
+
keywords=generate_keywords(chunk)
|
34 |
+
description=generate_description(chunk)
|
35 |
+
chunk.metadata['chunk_keywords']=keywords
|
36 |
+
chunk.metadata['chunk_description']=description
|
37 |
+
|
38 |
+
#load chunks into vector store
|
39 |
+
print(f'Loading chunks into faiss vector store ...')
|
40 |
+
st = time.time()
|
41 |
+
if new_db:
|
42 |
+
db_faiss = FAISS.from_documents(chunks, embeddings)
|
43 |
+
bm25_retriever = BM25Retriever.from_documents(chunks)
|
44 |
+
else:
|
45 |
+
db_faiss = FAISS.add_documents(chunks, embeddings)
|
46 |
+
bm25_retriever = BM25Retriever.add_documents(chunks)
|
47 |
+
db_faiss.save_local(FAISS_INDEX_PATH)
|
48 |
+
et = time.time() - st
|
49 |
+
print(f'Time taken: {et} seconds.')
|
50 |
+
|
51 |
+
print(f'Loading chunks into chroma vector store ...')
|
52 |
+
st = time.time()
|
53 |
+
persist_directory='./vectorstore/chroma-insurance-agent-1500'
|
54 |
+
db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
|
55 |
+
et = time.time() - st
|
56 |
+
print(f'Time taken: {et} seconds.')
|
57 |
+
result = f"built vectore store at {FAISS_INDEX_PATH}"
|
58 |
+
return result
|
59 |
+
|
60 |
+
|
61 |
+
# # Path for saving the FAISS index
|
62 |
+
# FAISS_INDEX_PATH = "./vectorstore/lc-faiss-multi-mpnet-500"
|
63 |
+
|
64 |
+
# try:
|
65 |
+
# # Stage two: Vectorization of the document chunks
|
66 |
+
# model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1" # Model used for embedding
|
67 |
+
|
68 |
+
# # Initialize HuggingFace embeddings with the specified model
|
69 |
+
# embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
70 |
+
|
71 |
+
# print(f'Loading chunks into vector store ...')
|
72 |
+
# st = time.time() # Start time for performance measurement
|
73 |
+
# # Create a FAISS vector store from the document chunks and save it locally
|
74 |
+
# db = FAISS.from_documents(filter_complex_metadata(chunks), embeddings)
|
75 |
+
# db.save_local(FAISS_INDEX_PATH)
|
76 |
+
# et = time.time() - st # Calculate time taken for vectorization
|
77 |
+
# print(f'Time taken for vectorization and saving: {et} seconds.')
|
78 |
+
# except Exception as e:
|
79 |
+
# print(f"Error during vectorization or FAISS index saving: {e}", file=sys.stderr)
|
80 |
+
|
81 |
+
# alternatively download a preparaed vectorized index from S3 and load the index into vectorstore
|
82 |
+
# Import necessary libraries for AWS S3 interaction, file handling, and FAISS vector stores
|
rag_app/load_data_from_urls.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# documents loader function
|
2 |
+
from langchain_community.document_loaders import RecursiveUrlLoader
|
3 |
+
from bs4 import BeautifulSoup as Soup
|
4 |
+
from validators import url as url_validator
|
5 |
+
from langchain_core.documents import Document
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
+
import sys
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
def load_docs_from_urls(
|
13 |
+
urls: list = ["https://docs.python.org/3/"],
|
14 |
+
max_depth: int = 5,
|
15 |
+
) -> list[Document]:
|
16 |
+
"""
|
17 |
+
Load documents from a list of URLs.
|
18 |
+
|
19 |
+
## Args:
|
20 |
+
urls (list, optional): A list of URLs to load documents from. Defaults to ["https://docs.python.org/3/"].
|
21 |
+
max_depth (int, optional): Maximum depth to recursively load documents from each URL. Defaults to 5.
|
22 |
+
|
23 |
+
## Returns:
|
24 |
+
list: A list of documents loaded from the given URLs.
|
25 |
+
|
26 |
+
## Raises:
|
27 |
+
ValueError: If any URL in the provided list is invalid.
|
28 |
+
"""
|
29 |
+
stf = time.time() # Start time for performance measurement
|
30 |
+
docs = []
|
31 |
+
for url in urls:
|
32 |
+
st = time.time() # Start time for outer performance measurement
|
33 |
+
if not url_validator(url):
|
34 |
+
raise ValueError(f"Invalid URL: {url}")
|
35 |
+
try:
|
36 |
+
st = time.time() # Start time for inner performance measurement
|
37 |
+
loader = RecursiveUrlLoader(url=url, max_depth=max_depth, extractor=lambda x: Soup(x, "html.parser").text)
|
38 |
+
docs.extend(loader.load())
|
39 |
+
|
40 |
+
et = time.time() - st # Calculate time taken for splitting
|
41 |
+
logMessage=f'Time taken for downloading documents from {url}: {et} seconds.'
|
42 |
+
logger.info(logMessage)
|
43 |
+
print(logMessage)
|
44 |
+
except Exception as e:
|
45 |
+
logMessage=f"Failed to load or parse the URL {url}. Error: {e}"
|
46 |
+
logger.error(logMessage)
|
47 |
+
print(logMessage, file=sys.stderr)
|
48 |
+
etf = time.time() - stf # Calculate time taken for scrapping all URLs
|
49 |
+
print(f'Total time taken for downloading {len(docs)} documents: {etf} seconds.')
|
50 |
+
return docs
|
rag_app/load_vector_stores.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# preprocessed vectorstore retrieval
|
2 |
+
import boto3
|
3 |
+
from botocore import UNSIGNED
|
4 |
+
from botocore.client import Config
|
5 |
+
import zipfile
|
6 |
+
from langchain_community.vectorstores import FAISS
|
7 |
+
from langchain_community.vectorstores import Chroma
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
import logging
|
13 |
+
|
14 |
+
# Load environment variables from a .env file
|
15 |
+
config = load_dotenv(".env")
|
16 |
+
|
17 |
+
# Retrieve the Hugging Face API token from environment variables
|
18 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
19 |
+
S3_LOCATION = os.getenv("S3_LOCATION")
|
20 |
+
FAISS_VS_NAME = os.getenv("FAISS_VS_NAME")
|
21 |
+
FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH")
|
22 |
+
CHROMA_DIRECTORY = os.getenv("CHROMA_DIRECTORY")
|
23 |
+
CHROMA_VS_NAME = os.getenv("CHROMA_VS_NAME")
|
24 |
+
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
25 |
+
|
26 |
+
model_name = EMBEDDING_MODEL
|
27 |
+
#model_kwargs = {"device": "cuda"}
|
28 |
+
|
29 |
+
embeddings = HuggingFaceEmbeddings(
|
30 |
+
model_name=model_name,
|
31 |
+
# model_kwargs=model_kwargs
|
32 |
+
)
|
33 |
+
|
34 |
+
## FAISS
|
35 |
+
def get_faiss_vs():
|
36 |
+
# Initialize an S3 client with unsigned configuration for public access
|
37 |
+
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
38 |
+
|
39 |
+
# Define the destination for the downloaded file
|
40 |
+
VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
|
41 |
+
try:
|
42 |
+
# Download the pre-prepared vectorized index from the S3 bucket
|
43 |
+
print("Downloading the pre-prepared vectorized index from S3...")
|
44 |
+
s3.download_file(S3_LOCATION, FAISS_VS_NAME, VS_DESTINATION)
|
45 |
+
|
46 |
+
# Extract the downloaded zip file
|
47 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
48 |
+
zip_ref.extractall('./vectorstore/')
|
49 |
+
print("Download and extraction completed.")
|
50 |
+
return FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
|
54 |
+
#faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
55 |
+
|
56 |
+
|
57 |
+
## Chroma DB
|
58 |
+
def get_chroma_vs():
|
59 |
+
# Initialize an S3 client with unsigned configuration for public access
|
60 |
+
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
61 |
+
|
62 |
+
VS_DESTINATION = CHROMA_DIRECTORY+".zip"
|
63 |
+
try:
|
64 |
+
s3.download_file(S3_LOCATION, CHROMA_VS_NAME, VS_DESTINATION)
|
65 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
66 |
+
zip_ref.extractall('./vectorstore/')
|
67 |
+
chromadb = Chroma(persist_directory=CHROMA_DIRECTORY, embedding_function=embeddings)
|
68 |
+
chromadb.get()
|
69 |
+
except Exception as e:
|
70 |
+
print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
|
rag_app/react_agent.py
ADDED
File without changes
|
rag_app/simple_qa_chain.py
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain-community
|
3 |
+
langchain-HuggingFace
|
4 |
+
langchain-text-splitters
|
5 |
+
beautifulsoup4
|
6 |
+
faiss-cpu
|
7 |
+
chromadb
|
8 |
+
validators
|
9 |
+
sentence_transformers
|
10 |
+
typing-extensions
|
11 |
+
unstructured
|
12 |
+
gradio
|
13 |
+
boto3
|
test_this.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from rag_app.load_data_from_urls import load_docs_from_urls
|
2 |
+
from rag_app.create_embedding import create_embeddings
|
3 |
+
from rag_app.generate_summary import generate_description, generate_keywords
|
4 |
+
from rag_app.handle_vector_store import build_vector_store
|
5 |
+
|
6 |
+
docs = load_docs_from_urls(["https://www.wuerttembergische.de/"],5)
|
7 |
+
|
8 |
+
for doc in docs:
|
9 |
+
keywords=generate_keywords(doc)
|
10 |
+
description=generate_description(doc)
|
11 |
+
doc.metadata['keywords']=keywords
|
12 |
+
doc.metadata['description']=description
|
13 |
+
|
14 |
+
build_vector_store(docs, './vectorstore/faiss-insurance-agent-1500','sentence-transformers/multi-qa-mpnet-base-dot-v1',True,1500,150)
|
15 |
+
|
16 |
+
|
17 |
+
#print(create_embeddings(docs))
|
vectorstore/placeholder.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
This file keeps the folder from being deleted for now
|