# Create vecdb - notebook

In [1]:
from langchain_community.vectorstores import Chroma
from langchain_together.embeddings import TogetherEmbeddings


import os
from dotenv import load_dotenv
load_dotenv()
together_api_key = os.getenv("TOGETHER_API_KEY")

embeddings = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-2k-retrieval")

# Load
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://lexfridman.com/sam-altman-2-transcript/")
data = loader.load()

# Split
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=250)
all_splits = text_splitter.split_documents(data)

In [4]:
# Add to vectorDB
vectorstore = Chroma.from_documents(persist_directory="vecdb_test",
                                    documents=all_splits, 
                                    collection_name="rag-chroma",
                                    embedding=embeddings,
                                    )
retriever = vectorstore.as_retriever()

### Huggingface transformers embeddings

more complicated but "free" way of creating embeddings
you will need to install
```
sentence-transformers
einops
opt_einsum
```

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings

In [8]:
model_name = "togethercomputer/m2-bert-80M-2k-retrieval"
model_kwargs = {'device': 'cpu', 'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm
No sentence-transformers model found with name togethercomputer/m2-bert-80M-2k-retrieval. Creating a new one with MEAN pooling.
You are using a model of type m2_bert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.


-- Bidirectional: True
-- Using Long Conv Residual: True
-- Hyena w: 10
-- Hyena w mod: 1
-- Hyena filter order: 128
-- Hyena filter dropout: 0.2
-- Hyena filter wd: 0.1
-- Hyena filter emb dim: 5
-- Hyena filter lr: 0.001
-- Hyena filter lr pos emb: 1e-05


In [9]:
# Add to vectorDB
vectorstore = Chroma.from_documents(persist_directory="vecdb_hf_test",
                                    documents=all_splits, 
                                    collection_name="rag-chroma",
                                    embedding=hf,
                                    )
retriever = vectorstore.as_retriever()