File size: 1,729 Bytes
13fbd2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import Settings
import nest_asyncio
nest_asyncio.apply()
llm = Ollama(model="llama3:instruct")
embed_model = OllamaEmbeddings(model="llama3:instruct")
Settings.llm = llm
Settings.embed_model = embed_model
node_parser = SentenceSplitter(chunk_size=500, chunk_overlap=50)
docs = SimpleDirectoryReader(input_dir="/data1/home/purui/projects/chatbot/tests/data/txt").load_data()
# nodes = node_parser.get_nodes_from_documents(documents=docs, show_progress=True)
# Summary Index
from llama_index.core import DocumentSummaryIndex
# default mode of building the index
response_synthesizer = get_response_synthesizer(
response_mode="tree_summarize", use_async=True
)
doc_summary_index = DocumentSummaryIndex.from_documents(
docs,
llm=llm,
transformations=[node_parser],
response_synthesizer=response_synthesizer,
show_progress=True,
)
doc_summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_index")
from llama_index.core import VectorStoreIndex
vector_store_index = VectorStoreIndex.from_documents(
docs, show_progress=True
)
vector_store_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_vectorindex")
from llama_index.core import SummaryIndex
summary_index = SummaryIndex.from_documents(
docs, show_progress=True)
summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_summaryindex") |