Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 6,032 Bytes
6650ee4 ccf7d04 7055307 6650ee4 0d2d09d 41127cc 2e45345 6650ee4 7055307 3c305fd 7055307 3c305fd 6650ee4 5d73a55 7055307 ccf7d04 6650ee4 41127cc 7055307 6650ee4 19f082d 6650ee4 2e45345 6650ee4 19f082d 6650ee4 2e45345 6650ee4 0d2d09d 6650ee4 ccf7d04 6650ee4 7055307 5d73a55 2e45345 838a4fb 5d73a55 7055307 6650ee4 7055307 6650ee4 2e45345 6650ee4 5d73a55 6650ee4 5d73a55 7055307 6650ee4 5d73a55 2e45345 6650ee4 838a4fb b9e1d94 5d73a55 838a4fb 5d73a55 41127cc 838a4fb 41127cc 7055307 b9e1d94 838a4fb 7055307 6650ee4 2e45345 6650ee4 838a4fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import torch
import transformers
import gradio as gr
from ragatouille import RAGPretrainedModel
from huggingface_hub import InferenceClient
import re
from datetime import datetime
import json
retrieve_results = 10
show_examples = False
llm_models_to_choose = ['mistralai/Mixtral-8x7B-Instruct-v0.1','mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
generate_kwargs = dict(
temperature = None,
max_new_tokens = 512,
top_p = None,
do_sample = False,
)
RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
try:
gr.Info("Setting up retriever, please wait...")
rag_initial_output = RAG.search("what is Mistral?", k = 1)
gr.Info("Retriever working successfully!")
except:
gr.Warning("Retriever not working!")
mark_text = '# 🔍 Search Results\n'
header_text = "# ArXivCS RAG \n"
try:
with open("README.md", "r") as f:
mdfile = f.read()
date_pattern = r'Index Last Updated : \d{4}-\d{2}-\d{2}'
match = re.search(date_pattern, mdfile)
date = match.group().split(': ')[1]
formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
header_text += f'Index Last Updated: {formatted_date}\n'
except:
pass
if show_examples:
with open("sample_outputs.json", "r") as f:
sample_outputs = json.load(f)
output_placeholder = sample_outputs['output_placeholder']
md_text_initial = sample_outputs['search_placeholder']
else:
output_placeholder = None
md_text_initial = ''
def rag_cleaner(inp):
rank = inp['rank']
title = inp['document_metadata']['title']
content = inp['content']
date = inp['document_metadata']['_time']
return f"{rank}. <b> {title} </b> \n Date : {date} \n Abstract: {content}"
def get_prompt_text(question, context, formatted = True, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
if formatted:
sys_instruction = f"Context:\n {context} \n Given the following scientific paper abstracts, take a deep breath and lets think step by step to answer the question. Cite the titles of your sources when answering, do not cite links or dates."
message = f"Question: {question}"
if 'mistralai' in llm_model_picked:
return f"<s>" + f"[INST] {sys_instruction}" + f" {message}[/INST]"
elif 'gemma' in llm_model_picked:
return f"<bos><start_of_turn>user\n{sys_instruction}" + f" {message}<end_of_turn>\n"
return f"Context:\n {context} \n Given the following info, take a deep breath and lets think step by step to answer the question: {question}. Cite the titles of your sources when answering.\n\n"
def get_references(question, retriever, k = retrieve_results):
rag_out = retriever.search(query=question, k=k)
return rag_out
def get_rag(message):
return get_references(message, RAG)
with gr.Blocks(theme = gr.themes.Soft()) as demo:
header = gr.Markdown(header_text)
with gr.Group():
msg = gr.Textbox(label = 'Search', placeholder = 'What is Mistral?')
with gr.Accordion("Advanced Settings", open=False):
with gr.Row(equal_height = True):
llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'mistralai/Mistral-7B-Instruct-v0.2', label = 'LLM Model')
llm_results = gr.Slider(minimum=4, maximum=10, value=5, step=1, interactive=True, label="Top n results as context")
stream_results = gr.Checkbox(value = True, label = "Stream output")
output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
input = gr.Textbox(show_label = False, visible = False)
gr_md = gr.Markdown(mark_text + md_text_initial)
def update_with_rag_md(message, llm_results_use = 5, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2'):
rag_out = get_rag(message)
md_text_updated = mark_text
for i in range(retrieve_results):
rag_answer = rag_out[i]
title = rag_answer['document_metadata']['title'].replace('\n','')
#score = round(rag_answer['score'], 2)
date = rag_answer['document_metadata']['_time']
paper_title = f'''### {date} | [{title}](https://arxiv.org/abs/{rag_answer['document_id']}) | [⬇️](https://arxiv.org/pdf/{rag_answer['document_id']})\n'''
paper_abs = rag_answer['content']
authors = rag_answer['document_metadata']['authors'].replace('\n','')
authors_formatted = f'*{authors}*' + ' \n\n'
md_text_updated += paper_title + authors_formatted + paper_abs + '\n---------------\n'+ '\n'
prompt = get_prompt_text(message, '\n\n'.join(rag_cleaner(out) for out in rag_out[:llm_results_use]), llm_model_picked = llm_model_picked)
return md_text_updated, prompt
def ask_llm(prompt, llm_model_picked = 'mistralai/Mistral-7B-Instruct-v0.2', stream_outputs = False):
model_disabled_text = "LLM Model is disabled"
output = ""
if llm_model_picked == 'None':
if stream_outputs:
for out in model_disabled_text:
output += out
yield output
return output
else:
return model_disabled_text
client = InferenceClient(llm_model_picked)
try:
stream = client.text_generation(prompt, **generate_kwargs, stream=stream_outputs, details=False, return_full_text=False)
except:
gr.Warning("LLM Inference rate limit reached, try again later!")
return ""
#output = output.lstrip(' \n') if output.lstrip().startswith('\n') else output
if stream_outputs:
for response in stream:
output += response
yield output
return output
else:
return stream
msg.submit(update_with_rag_md, [msg, llm_results, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
demo.queue(default_concurrency_limit=10).launch() |