Spaces:
Runtime error
Runtime error
from pathlib import Path | |
from urllib.parse import urlparse | |
import gradio as gr | |
import psutil | |
from ctransformers import AutoModelForCausalLM | |
from huggingface_hub import hf_hub_download | |
_ = """ | |
snapshot_download( | |
repo_id="TheBloke/Luna-AI-Llama2-Uncensored-GGML", | |
allow_patterns="luna-ai-llama2-uncensored.ggmlv3.q2_K.bin", | |
revision="main", | |
local_dir="models", | |
local_dir_use_symlinks=False, # default "auto" | |
) | |
hf_hub_download( | |
repo_id=repo_id, | |
filename=model_filename, | |
local_dir=local_path, | |
local_dir_use_symlinks=True, | |
) | |
# """ | |
# 4.06G | |
_ = """ | |
llm = AutoModelForCausalLM.from_pretrained( | |
"TheBloke/Luna-AI-Llama2-Uncensored-GGML", | |
model_file="luna-ai-llama2-uncensored.ggmlv3.q2_K.bin", | |
model_type="llama", gpu_layers=32, threads=2, | |
) | |
# """ | |
# _ = Path("models", "luna-ai-llama2-uncensored.ggmlv3.q2_K.bin").absolute().as_posix() | |
# assert Path(_).exists(), f"{_} does not exist, perhaps snapshot_download failed?" | |
# URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/main/falcon-7b-instruct.ggccv1.q4_1.bin" | |
URL = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGML/resolve/main/luna-ai-llama2-uncensored.ggmlv3.q2_K.bin" | |
repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2]) | |
model_file = Path(URL).name | |
_ = hf_hub_download( | |
repo_id=repo_id, | |
revision="main", | |
filename=model_file, | |
local_dir="models", | |
# local_dir_use_symlinks=True, | |
) | |
llm = AutoModelForCausalLM.from_pretrained( | |
# repo_id, # "TheBloke/falcon-7b-instruct-GGML", | |
# model_file=model_file, | |
# model_file=_, | |
_, | |
model_type="llama", | |
threads=psutil.cpu_count(logical=False), | |
) | |
TITLE = f"""<h2 align="center"> chat-ggml ({model_file})""" | |
USER_NAME = "User" | |
BOT_NAME = "Assistant" | |
DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant and a human User. In the following interactions, User and Assistant will converse and Assistant will answer User's questions. | |
""" | |
RETRY_COMMAND = "/retry" | |
STOP_STR = f"\n{USER_NAME}:" | |
STOP_SUSPECT_LIST = [":", "\n", "User"] | |
def chat_accordion(): | |
with gr.Accordion("Parameters", open=False): | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=2.0, | |
value=0.8, | |
step=0.1, | |
interactive=True, | |
label="Temperature", | |
) | |
top_p = gr.Slider( | |
minimum=0.1, | |
maximum=0.99, | |
value=0.9, | |
step=0.01, | |
interactive=True, | |
label="p (nucleus sampling)", | |
) | |
return temperature, top_p | |
def format_chat_prompt(message: str, chat_history, instructions: str) -> str: | |
instructions = instructions.strip(" ").strip("\n") | |
prompt = instructions | |
for turn in chat_history: | |
user_message, bot_message = turn | |
prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}" | |
prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:" | |
return prompt | |
def chat(): | |
with gr.Column(elem_id="chat_container"): | |
with gr.Row(): | |
chatbot = gr.Chatbot(elem_id="chatbot") | |
with gr.Row(): | |
inputs = gr.Textbox( | |
placeholder=f"Hello {BOT_NAME} !!", | |
label="Type an input and press Enter", | |
max_lines=3, | |
) | |
with gr.Row(elem_id="button_container"): | |
with gr.Column(): | |
retry_button = gr.Button("♻️ Retry last turn") | |
with gr.Column(): | |
delete_turn_button = gr.Button("🧽 Delete last turn") | |
with gr.Column(): | |
clear_chat_button = gr.Button("✨ Delete all history") | |
gr.Examples( | |
[ | |
["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"], | |
["What's the Everett interpretation of quantum mechanics?"], | |
[ | |
"Give me a list of the top 10 dive sites you would recommend around the world." | |
], | |
["Can you tell me more about deep-water soloing?"], | |
[ | |
"Can you write a short tweet about the Apache 2.0 release of our latest AI model, Falcon LLM?" | |
], | |
], | |
inputs=inputs, | |
label="Click on any example and press Enter in the input textbox!", | |
) | |
with gr.Row(elem_id="param_container"): | |
with gr.Column(): | |
temperature, top_p = chat_accordion() | |
with gr.Column(): | |
with gr.Accordion("Instructions", open=False): | |
instructions = gr.Textbox( | |
placeholder="LLM instructions", | |
value=DEFAULT_INSTRUCTIONS, | |
lines=10, | |
interactive=True, | |
label="Instructions", | |
max_lines=16, | |
show_label=False, | |
) | |
def run_chat( | |
message: str, chat_history, instructions: str, temperature: float, top_p: float | |
): | |
if not message or (message == RETRY_COMMAND and len(chat_history) == 0): | |
yield chat_history | |
return | |
if message == RETRY_COMMAND and chat_history: | |
prev_turn = chat_history.pop(-1) | |
user_message, _ = prev_turn | |
message = user_message | |
prompt = format_chat_prompt(message, chat_history, instructions) | |
chat_history = chat_history + [[message, ""]] | |
stream = llm( | |
prompt, | |
max_new_tokens=2048, | |
stop=[STOP_STR, "<|endoftext|>"], | |
temperature=temperature, | |
top_p=top_p, | |
stream=True, | |
) | |
acc_text = "" | |
for idx, response in enumerate(stream): | |
text_token = response | |
if text_token in STOP_SUSPECT_LIST: | |
acc_text += text_token | |
continue | |
if idx == 0 and text_token.startswith(" "): | |
text_token = text_token[1:] | |
acc_text += text_token | |
last_turn = list(chat_history.pop(-1)) | |
last_turn[-1] += acc_text | |
chat_history = chat_history + [last_turn] | |
yield chat_history | |
acc_text = "" | |
def delete_last_turn(chat_history): | |
if chat_history: | |
chat_history.pop(-1) | |
return {chatbot: gr.update(value=chat_history)} | |
def run_retry( | |
message: str, chat_history, instructions: str, temperature: float, top_p: float | |
): | |
yield from run_chat( | |
RETRY_COMMAND, chat_history, instructions, temperature, top_p | |
) | |
def clear_chat(): | |
return [] | |
inputs.submit( | |
run_chat, | |
[inputs, chatbot, instructions, temperature, top_p], | |
outputs=[chatbot], | |
show_progress="minimal", | |
) | |
inputs.submit(lambda: "", inputs=None, outputs=inputs) | |
delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot]) | |
retry_button.click( | |
run_retry, | |
[inputs, chatbot, instructions, temperature, top_p], | |
outputs=[chatbot], | |
show_progress="minimal", | |
) | |
clear_chat_button.click(clear_chat, [], chatbot) | |
def get_demo(): | |
with gr.Blocks( | |
# css=None | |
# css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;} | |
# #button_container {width: 700px; margin-left: auto; margin-right: auto;} | |
# #param_container {width: 700px; margin-left: auto; margin-right: auto;}""" | |
css="""#chatbot { | |
font-size: 14px; | |
min-height: 300px; | |
}""" | |
) as demo: | |
gr.HTML(TITLE) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown( | |
"""**Chat with [Falcon-7b-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct), brainstorm ideas, discuss your holiday plans, and more!** | |
✨ This demo is powered by [Falcon-7B](https://huggingface.co/tiiuae/falcon-7b), finetuned on the [Baize](https://github.com/project-baize/baize-chatbot) dataset, and running with [Text Generation Inference](https://github.com/huggingface/text-generation-inference). [Falcon-7B](https://huggingface.co/tiiuae/falcon-7b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 1 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the Apache 2.0 license. It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This demo is made available by the [HuggingFace H4 team](https://huggingface.co/HuggingFaceH4). | |
🧪 This is only a **first experimental preview**: the [H4 team](https://huggingface.co/HuggingFaceH4) intends to provide increasingly capable versions of Falcon Chat in the future, based on improved datasets and RLHF/RLAIF. | |
👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/) | |
➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-7B](https://huggingface.co/tiiuae/falcon-7b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions! | |
⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words. | |
""" | |
) | |
chat() | |
return demo | |
if __name__ == "__main__": | |
demo = get_demo() | |
demo.queue(max_size=64, concurrency_count=8) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |