Spaces:
Runtime error
Runtime error
File size: 4,529 Bytes
7593f06 e5ef682 7593f06 e5ef682 7593f06 e5ef682 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
from datasets import load_dataset
import os
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
import torch
from threading import Thread
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import time
token = os.environ["HF_TOKEN"]
ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
dataset = load_dataset("AI-4-Health/embedded-dataset")
data = dataset["train"]
data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# use quantization to lower GPU usage
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id,token=token)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
quantization_config=bnb_config,
token=token
)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
SYS_PROMPT = """You are an assistant for answering questions.
You are given the extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just say "I do not know." Don't make up an answer."""
def search(query: str, k: int = 3 ):
"""a function that embeds a new query and returns the most probable results"""
embedded_query = ST.encode(query) # embed new query
scores, retrieved_examples = data.get_nearest_examples( # retrieve results
"embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
k=k # get only top k results
)
return scores, retrieved_examples
def format_prompt(prompt,retrieved_documents,k):
"""using the retrieved documents we will prompt the model to generate our responses"""
PROMPT = f"Question:{prompt}\nContext:"
for idx in range(k) :
PROMPT+= f"{retrieved_documents['text'][idx]}\n"
return PROMPT
TITLE = "# RAG"
DESCRIPTION = """
HPP Chatbot
"""
@spaces.GPU(duration=150)
def talk(prompt):
k = 1 # number of retrieved documents
scores, retrieved_documents = search(prompt, k)
filename = retrieved_documents['filename'][0] # Assuming filename is in the returned dictionary
print("filename is ", filename)
formatted_prompt = format_prompt(prompt, retrieved_documents, k)
formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
messages = [{"role":"system", "content":SYS_PROMPT}, {"role":"user", "content":formatted_prompt}]
# Tell the model to generate
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
outputs = model.generate(
input_ids,
max_new_tokens=1024,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
streamer = TextIteratorStreamer(
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=1024,
do_sample=True,
top_p=0.95,
temperature=0.75,
eos_token_id=terminators,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
outputs = []
for text in streamer:
outputs.append(text)
#print(outputs)
return "".join(outputs), filename, filename
def update_document(filename):
# Reads the content of the specified file for display
with open('datasets/'+filename, "r", encoding='iso-8859-15') as file:
content = file.read()
return content
TITLE = "# RAG"
DESCRIPTION = """
HPP Chatbot
"""
with gr.Blocks() as demo:
with gr.Row():
prompt_input = gr.Textbox(label="Enter your prompt")
submit_button = gr.Button("Submit")
chat_output = gr.Textbox(label="Chat Response", lines=5)
filename = gr.Textbox(label="File Name", lines=1)
file_display = gr.Textbox(label="File Content", lines=10)
submit_button.click(
fn=talk,
inputs=prompt_input,
outputs=[chat_output, filename, file_display]
)
file_display.change(
fn=update_document,
inputs=filename,
outputs=file_display
)
demo.launch(debug=True, share=True) |