SexBot / different_index_strategies.py
Pew404's picture
Upload folder using huggingface_hub
13fbd2e verified
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import Settings
import nest_asyncio
nest_asyncio.apply()
llm = Ollama(model="llama3:instruct")
embed_model = OllamaEmbeddings(model="llama3:instruct")
Settings.llm = llm
Settings.embed_model = embed_model
node_parser = SentenceSplitter(chunk_size=500, chunk_overlap=50)
docs = SimpleDirectoryReader(input_dir="/data1/home/purui/projects/chatbot/tests/data/txt").load_data()
# nodes = node_parser.get_nodes_from_documents(documents=docs, show_progress=True)
# Summary Index
from llama_index.core import DocumentSummaryIndex
# default mode of building the index
response_synthesizer = get_response_synthesizer(
response_mode="tree_summarize", use_async=True
)
doc_summary_index = DocumentSummaryIndex.from_documents(
docs,
llm=llm,
transformations=[node_parser],
response_synthesizer=response_synthesizer,
show_progress=True,
)
doc_summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_index")
from llama_index.core import VectorStoreIndex
vector_store_index = VectorStoreIndex.from_documents(
docs, show_progress=True
)
vector_store_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_vectorindex")
from llama_index.core import SummaryIndex
summary_index = SummaryIndex.from_documents(
docs, show_progress=True)
summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_summaryindex")