Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

File size: 8,062 Bytes

9ff18cc
2e98411
9ff18cc
d43a99c
 
ab4ecf4
 
6e49c29
9ff18cc
50d589f
 
db5b405
b70a398
50d589f
390738c
 
9ff18cc
97d635c
f8c306d
9ff18cc
68f6e9e
9ff18cc
 
 
a926d81
 
 
6ea968f
a926d81
91aaa3e
9ff18cc
 
 
 
 
f8c306d
 
9ff18cc
 
 
 
 
f8c306d
9ff18cc
f8c306d
9ff18cc
 
cf9bb0c
 
 
5d8f4a6
 
9ff18cc
cf9bb0c
deda174
 
 
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
c7eff8d
9ff18cc
 
 
68f6e9e
 
 
9ff18cc
 
f8c306d
9ff18cc
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
49774f4
9ff18cc
 
 
 
97d635c
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
85b4edc
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c306d
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
85b4edc
9ff18cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173da86

from transformers import AutoModelForCausalLM, AutoTokenizer
from tokenization_yi import YiTokenizer
import torch
import os
import gradio as gr
import sentencepiece

model_id = "01-ai/Yi-34B-200K"

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="device", load_in_8bit=True, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
# model = model.to(device)

def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
    prompt = get_prompt(message, chat_history)
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    input_ids = input_ids.to(model.device)
    response_ids = model.generate(
        input_ids,
        max_length=max_new_tokens + input_ids.shape[1],
        temperature=temperature,  
        top_p=top_p,              
        top_k=top_k,              
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True            

    )

    response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

def get_prompt(message, chat_history):
    texts = []

    do_strip = False
    for user_input, response in chat_history:
        user_input = user_input.strip() if do_strip else user_input
        do_strip = True
        texts.append(f" {response.strip()} {user_input} ")
    message = message.strip() if do_strip else message
    texts.append(f"{message}")
    return ''.join(texts)

DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀YI-200K🚀"
You can use this Space to test out the current model [Tonic/YI](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻‍🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""

MAX_MAX_NEW_TOKENS = 4056
DEFAULT_MAX_NEW_TOKENS = 1256
MAX_INPUT_TOKEN_LENGTH = 120000

def clear_and_save_textbox(message): return '', message

def display_input(message, history=[]):
    history.append((message, ''))
    return history

def delete_prev_fn(history=[]):
    try:
        message, _ = history.pop()
    except IndexError:
        message = ''
    return history, message or ''

def generate(message, history_with_input, max_new_tokens, temperature, top_p, top_k):
    if int(max_new_tokens) > MAX_MAX_NEW_TOKENS:
        raise ValueError

    history = history_with_input[:-1]
    response = run(message, history, max_new_tokens, temperature, top_p, top_k)
    yield history + [(message, response)]


def process_example(message):
    generator = generate(message, [], 1024, 2.5, 0.95, 900)
    for x in generator:
        pass
    return '', x

def check_input_token_length(message, chat_history):
    input_token_length = len(message) + len(chat_history)
    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
        raise gr.Error(f"The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.")

with gr.Blocks(theme='ParityError/Anime') as demo:
    gr.Markdown(DESCRIPTION)


    
    with gr.Group():
        chatbot = gr.Chatbot(label='TonicYi-30B-200K')
        with gr.Row():
            textbox = gr.Textbox(
                container=False,
                show_label=False,
                placeholder='As the dawn approached, they leant in and said',
                scale=10
            )
            submit_button = gr.Button('Submit', variant='primary', scale=1, min_width=0)

    with gr.Row():
        retry_button = gr.Button('Retry', variant='secondary')
        undo_button = gr.Button('Undo', variant='secondary')
        clear_button = gr.Button('Clear', variant='secondary')

    saved_input = gr.State()

    with gr.Accordion(label='Advanced options', open=False):
#       system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=5, interactive=False)
        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
        temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=0.1)
        top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
        top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=10)

    textbox.submit(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name="Generate",
    )

    button_event_preprocess = submit_button.click(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name="Cgenerate",
    )

    retry_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            max_new_tokens,
            temperature,
            top_p,
            top_k,
        ],
        outputs=chatbot,
        api_name=False,
    )

    undo_button.click(
        fn=delete_prev_fn,
        inputs=chatbot,
        outputs=[chatbot, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=lambda x: x,
        inputs=[saved_input],
        outputs=textbox,
        api_name=False,
        queue=False,
    )

    clear_button.click(
        fn=lambda: ([], ''),
        outputs=[chatbot, saved_input],
        queue=False,
        api_name=False,
    )

demo.queue().launch(show_api=True)