|
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer |
|
from llama_index.core.node_parser import SentenceSplitter |
|
from llama_index.llms.ollama import Ollama |
|
from langchain_community.embeddings.ollama import OllamaEmbeddings |
|
from llama_index.core import Settings |
|
import nest_asyncio |
|
|
|
nest_asyncio.apply() |
|
|
|
llm = Ollama(model="llama3:instruct") |
|
embed_model = OllamaEmbeddings(model="llama3:instruct") |
|
Settings.llm = llm |
|
Settings.embed_model = embed_model |
|
|
|
node_parser = SentenceSplitter(chunk_size=500, chunk_overlap=50) |
|
|
|
docs = SimpleDirectoryReader(input_dir="/data1/home/purui/projects/chatbot/tests/data/txt").load_data() |
|
|
|
|
|
|
|
from llama_index.core import DocumentSummaryIndex |
|
|
|
response_synthesizer = get_response_synthesizer( |
|
response_mode="tree_summarize", use_async=True |
|
) |
|
doc_summary_index = DocumentSummaryIndex.from_documents( |
|
docs, |
|
llm=llm, |
|
transformations=[node_parser], |
|
response_synthesizer=response_synthesizer, |
|
show_progress=True, |
|
) |
|
doc_summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_index") |
|
|
|
from llama_index.core import VectorStoreIndex |
|
vector_store_index = VectorStoreIndex.from_documents( |
|
docs, show_progress=True |
|
) |
|
vector_store_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_vectorindex") |
|
|
|
from llama_index.core import SummaryIndex |
|
summary_index = SummaryIndex.from_documents( |
|
docs, show_progress=True) |
|
summary_index.storage_context.persist("/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_summaryindex") |