from llama_index.core import SimpleDirectoryReader, get_response_synthesizer from llama_index.core import DocumentSummaryIndex from llama_index.llms.ollama import Ollama from llama_index.core.node_parser import SentenceSplitter from langchain_community.embeddings import OllamaEmbeddings from llama_index.core import Settings import nest_asyncio nest_asyncio.apply() # Load data react_doc = SimpleDirectoryReader(input_dir="../data/pdf", file_metadata={"category": "AI applications"}).load_data() llm = Ollama(model="llama3", request_timeout=120, base_url="http://localhost:11434") embed_model = OllamaEmbeddings(model="llama3") Settings.llm = llm Settings.embed_model = embed_model # Text Splitter sentence_splitter = SentenceSplitter(chunk_size=500, chunk_overlap=0) # response_mode: 检索返回node的模式 response_synthesizer = get_response_synthesizer( response_mode="tree_summarize", use_async=True, ) # Buid index: 建立索引,transformations可以指定对文档进行预处理,response_synthesizer指定检索返回node的模式 index = DocumentSummaryIndex.from_documents( react_doc, llm=llm, transformations=[sentence_splitter], response_synthesizer=response_synthesizer, show_progress=True, ) # 存储index index.storage_context.persist("../kb/index")