Pudding48 commited on
Commit
fd4565a
·
verified ·
1 Parent(s): 45e2c69

Update qabot.py

Browse files
Files changed (1) hide show
  1. qabot.py +68 -66
qabot.py CHANGED
@@ -1,66 +1,68 @@
1
- from langchain_community.llms import CTransformers
2
- from langchain.prompts import PromptTemplate
3
- from langchain_core.runnables import RunnableSequence
4
- from langchain.chains import RetrievalQA
5
- from langchain_community.embeddings import GPT4AllEmbeddings
6
- from langchain_community.vectorstores import FAISS
7
-
8
- from huggingface_hub import hf_hub_download
9
-
10
- model_file = hf_hub_download(
11
- repo_id="Pudding48/TinyLlamaTest", # Replace with your model repo
12
- filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
13
- cache_dir="model" # Will be created in the Space's environment
14
- )
15
-
16
- # Cấu hình
17
- #model_file = "model/tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
18
- vector_dp_path = "vectorstores/db_faiss"
19
-
20
- # Load LLM
21
- def load_llm(model_file):
22
- llm = CTransformers(
23
- model=model_file,
24
- model_type="llama",
25
- temperature=0.01,
26
- config={'gpu_layers': 0},
27
- max_new_tokens=128,
28
- context_length=512
29
- )
30
- return llm
31
-
32
- # Tạo prompt template
33
- def creat_prompt(template):
34
- prompt = PromptTemplate(template=template, input_variables=["context","question"])
35
- return prompt
36
-
37
- # Tạo pipeline chain (thay cho LLMChain)
38
- def create_qa_chain(prompt, llm, db):
39
- llm_chain = RetrievalQA.from_chain_type(
40
- llm = llm,
41
- chain_type = "stuff",
42
- retriever =db.as_retriever(search_kwargs = {"k":1}),
43
- return_source_documents = False,
44
- chain_type_kwargs={'prompt':prompt}
45
- )
46
- return llm_chain
47
-
48
- def read_vector_db():
49
- embedding_model = GPT4AllEmbeddings(model_file = "model/all-minilm-l6-v2-q4_0.gguf")
50
- db = FAISS.load_local(vector_dp_path, embedding_model,allow_dangerous_deserialization=True)
51
- return db
52
-
53
- db = read_vector_db()
54
- llm = load_llm(model_file)
55
- # Mẫu prompt
56
- template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
57
- {context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""
58
-
59
- # Khởi tạo các thành phần
60
- prompt = creat_prompt(template)
61
- llm_chain =create_qa_chain(prompt, llm, db)
62
-
63
- # Chạy thử chain
64
- question = "Khoa công nghệ thông tin thành lập năm nào ?"
65
- response = llm_chain.invoke({"query": question})
66
- print(response)
 
 
 
1
+ from langchain_community.llms import CTransformers
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain_core.runnables import RunnableSequence
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_community.embeddings import GPT4AllEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+
8
+ from huggingface_hub import hf_hub_download
9
+ # !pip install llama-cpp-python
10
+
11
+ # from llama_cpp import Llama
12
+
13
+ # model_file = Llama.from_pretrained(
14
+ # repo_id="Pudding48/TinyLLamaTest",
15
+ # filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
16
+ # )
17
+
18
+ model_file = hf_hub_download(
19
+ repo_id="Pudding48/TinyLlamaTest", # 🟢 This must be a model repo, not a Space
20
+ filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
21
+ cache_dir="model"
22
+ )
23
+
24
+ # Vector store location
25
+ vector_dp_path = "vectorstores/db_faiss"
26
+
27
+ from prepare_vector_dp import create_db_from_text
28
+ create_db_from_text()
29
+
30
+ # Load LLM with CTransformers
31
+ def load_llm(model_file):
32
+ return CTransformers(
33
+ model=model_file,
34
+ model_type="llama",
35
+ temperature=0.01,
36
+ config={'gpu_layers': 0},
37
+ max_new_tokens=128,
38
+ context_length=512
39
+ )
40
+
41
+ # Create the prompt
42
+ def creat_prompt(template):
43
+ return PromptTemplate(template=template, input_variables=["context", "question"])
44
+
45
+ # Create QA pipeline
46
+ def create_qa_chain(prompt, llm, db):
47
+ return RetrievalQA.from_chain_type(
48
+ llm=llm,
49
+ chain_type="stuff",
50
+ retriever=db.as_retriever(search_kwargs={"k": 1}),
51
+ return_source_documents=False,
52
+ chain_type_kwargs={'prompt': prompt}
53
+ )
54
+
55
+ # Load vector DB
56
+ def read_vector_db():
57
+ embedding_model = GPT4AllEmbeddings(model_file=model_file)
58
+ return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True)
59
+
60
+ # Build everything
61
+ db = read_vector_db()
62
+ llm = load_llm(model_file)
63
+
64
+ template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
65
+ {context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant"""
66
+
67
+ prompt = creat_prompt(template)
68
+ llm_chain = create_qa_chain(prompt, llm, db)