File size: 3,908 Bytes
ca5433e 2682883 1b11cae 0256a46 2682883 ca5433e 2682883 1b11cae 4d0d5e0 ea82e95 755e1ba 2682883 ea82e95 2682883 d53c674 2682883 755e1ba 2682883 ea82e95 755e1ba 2682883 755e1ba ea82e95 2682883 755e1ba ea82e95 755e1ba 2682883 f6df3e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
# from tokenization_yi import YiTokenizer
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "larryvrh/Yi-34B-200K-Llamafied"
tokenizer = AutoTokenizer.from_pretrained("larryvrh/Yi-34B-200K-Llamafied")
model = AutoModelForCausalLM.from_pretrained("larryvrh/Yi-34B-200K-Llamafied", device_map="auto", torch_dtype="bfloat16", trust_remote_code=True)
# gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2})
# tokenizer = YiTokenizer.from_pretrained("./")
# model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800, do_sample=False):
prompt = message.strip()
input_ids = tokenizer.encode(prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
response_ids = model.generate(
input_ids,
max_length=max_new_tokens + input_ids.shape[1],
temperature=temperature,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.eos_token_id,
do_sample=do_sample
)
response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
return [("bot", response)]
DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻♂️Tonic's🧑🏻🚀YI-200K🚀"
You can use this Space to test out the current model [Tonic/YI](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""
with gr.Blocks(theme='ParityError/Anime') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
textbox = gr.Textbox(placeholder='Enter your message here', label='Your Message', lines=2)
submit_button = gr.Button('Submit', variant='primary')
chatbot = gr.Chatbot(label='TonicYi-30B-200K')
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=8000)
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
do_sample_checkbox = gr.Checkbox(label='Do Sample', value=True, tooltip="Disable for faster inference")
submit_button.click(
fn=predict,
inputs=[textbox, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
outputs=chatbot
)
demo.launch() |