File size: 1,309 Bytes
13fbd2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core import DocumentSummaryIndex
from llama_index.llms.ollama import Ollama
from llama_index.core.node_parser import SentenceSplitter
from langchain_community.embeddings import OllamaEmbeddings
from llama_index.core import Settings
import nest_asyncio

nest_asyncio.apply()

# Load data
react_doc = SimpleDirectoryReader(input_dir="../data/pdf", file_metadata={"category": "AI applications"}).load_data()

llm = Ollama(model="llama3", request_timeout=120, base_url="http://localhost:11434")
embed_model = OllamaEmbeddings(model="llama3")
Settings.llm = llm
Settings.embed_model = embed_model

# Text Splitter
sentence_splitter = SentenceSplitter(chunk_size=500, chunk_overlap=0)

# response_mode: 检索返回node的模式
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
    use_async=True,
)

# Buid index: 建立索引,transformations可以指定对文档进行预处理,response_synthesizer指定检索返回node的模式
index = DocumentSummaryIndex.from_documents(
    react_doc,
    llm=llm,
    transformations=[sentence_splitter],
    response_synthesizer=response_synthesizer,
    show_progress=True,
)

# 存储index
index.storage_context.persist("../kb/index")