File size: 1,729 Bytes
13fbd2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import Settings
import nest_asyncio

nest_asyncio.apply()

llm = Ollama(model="llama3:instruct")
embed_model = OllamaEmbeddings(model="llama3:instruct")
Settings.llm = llm
Settings.embed_model = embed_model

node_parser = SentenceSplitter(chunk_size=500, chunk_overlap=50)

docs = SimpleDirectoryReader(input_dir="/data1/home/purui/projects/chatbot/tests/data/txt").load_data()
# nodes = node_parser.get_nodes_from_documents(documents=docs, show_progress=True)

# Summary Index
from llama_index.core import DocumentSummaryIndex
# default mode of building the index
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize", use_async=True
)
doc_summary_index = DocumentSummaryIndex.from_documents(
    docs,
    llm=llm,
    transformations=[node_parser],
    response_synthesizer=response_synthesizer,
    show_progress=True,
)
doc_summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_index")

from llama_index.core import VectorStoreIndex
vector_store_index = VectorStoreIndex.from_documents(
    docs, show_progress=True
)
vector_store_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_vectorindex")

from llama_index.core import SummaryIndex
summary_index = SummaryIndex.from_documents(
    docs, show_progress=True)
summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_summaryindex")