Spaces:
Runtime error
Runtime error
File size: 3,554 Bytes
23d152f c7e71a9 23d152f 1275a3a 23d152f f2838c8 23d152f 0b9b0c3 84f477c 23d152f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import torch
from auto_gptq import AutoGPTQForCausalLM
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path
from transformers import AutoTokenizer, TextStreamer, pipeline, AutoModelForCausalLM
from chatBot.common.pdfToText import loadLatestPdf
from transformers import LlamaTokenizer
from langchain.document_loaders import PyPDFLoader
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(DEVICE)
data = loadLatestPdf()
embeddings = HuggingFaceInstructEmbeddings(
model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(data)
db = Chroma.from_documents(texts, embeddings, persist_directory="db")
model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "model"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# model = AutoGPTQForCausalLM.from_quantized(
# model_name_or_path,
# device_map="auto",
# revision="gptq-4bit-128g-actorder_True",
# model_basename=model_basename,
# use_safetensors=True,
# trust_remote_code=True,
# inject_fused_attention=False,
# device=DEVICE,
# quantize_config=None,
# )
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
device_map="auto",
trust_remote_code=False,
revision="main")
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
""".strip()
def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
return f"""
[INST] <<SYS>>
{system_prompt}
<</SYS>>
{prompt} [/INST]
""".strip()
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
text_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=1024,
temperature=0,
top_p=0.95,
repetition_penalty=1.15,
streamer=streamer,
)
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})
SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
template = generate_prompt(
"""
{context}
Question: {question}
""",
system_prompt=SYSTEM_PROMPT,
)
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
llamaModel = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2}),
return_source_documents=True,
chain_type_kwargs={"prompt": prompt},
) |