from langchain_community.llms import CTransformers from langchain.prompts import PromptTemplate from langchain_core.runnables import RunnableSequence from langchain.chains import RetrievalQA #from langchain_community.embeddings import GPT4AllEmbeddings from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from huggingface_hub import hf_hub_download # !pip install llama-cpp-python # from llama_cpp import Llama # model_file = Llama.from_pretrained( # repo_id="Pudding48/TinyLLamaTest", # filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf", # ) import os cache_path = "/home/user/app/hf_cache" os.makedirs(cache_path, exist_ok=True) model_file = hf_hub_download( repo_id="Pudding48/TinyLlamaTest", filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf", cache_dir=cache_path ) # Vector store location vector_dp_path = "/home/user/app/vectorstores/db_faiss" from prepare_vector_dp import create_db_from_text create_db_from_text() # Load LLM with CTransformers def load_llm(model_file): return CTransformers( model=model_file, model_type="llama", temperature=0.01, config={'gpu_layers': 0}, max_new_tokens=128, context_length=512 ) # Create the prompt def creat_prompt(template): return PromptTemplate(template=template, input_variables=["context", "question"]) # Create QA pipeline def create_qa_chain(prompt, llm, db): return RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 1}), return_source_documents=False, chain_type_kwargs={'prompt': prompt} ) # Load vector DB def read_vector_db(): embedding_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2") return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True) # Build everything db = read_vector_db() llm = load_llm(model_file) template = """<|im_start|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n {context}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant""" prompt = creat_prompt(template) llm_chain = create_qa_chain(prompt, llm, db)