|
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer |
|
from llama_index.core import DocumentSummaryIndex |
|
from llama_index.llms.ollama import Ollama |
|
from llama_index.core.node_parser import SentenceSplitter |
|
from langchain_community.embeddings import OllamaEmbeddings |
|
from llama_index.core import Settings |
|
import nest_asyncio |
|
|
|
nest_asyncio.apply() |
|
|
|
|
|
react_doc = SimpleDirectoryReader(input_dir="../data/pdf", file_metadata={"category": "AI applications"}).load_data() |
|
|
|
llm = Ollama(model="llama3", request_timeout=120, base_url="http://localhost:11434") |
|
embed_model = OllamaEmbeddings(model="llama3") |
|
Settings.llm = llm |
|
Settings.embed_model = embed_model |
|
|
|
|
|
sentence_splitter = SentenceSplitter(chunk_size=500, chunk_overlap=0) |
|
|
|
|
|
response_synthesizer = get_response_synthesizer( |
|
response_mode="tree_summarize", |
|
use_async=True, |
|
) |
|
|
|
|
|
index = DocumentSummaryIndex.from_documents( |
|
react_doc, |
|
llm=llm, |
|
transformations=[sentence_splitter], |
|
response_synthesizer=response_synthesizer, |
|
show_progress=True, |
|
) |
|
|
|
|
|
index.storage_context.persist("../kb/index") |
|
|