Spaces:
Paused
Paused
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline | |
| token = os.environ["HUGGINGFACEHUB_API_TOKEN"] | |
| model_id = 'Deci-early-access/DeciLM-7B-instruct-early' | |
| SYSTEM_PROMPT_TEMPLATE = """### System: You are an AI assistant that follows instruction extremely well. Help as much as you can. | |
| ### User: | |
| {instruction} | |
| ### Assistant: | |
| """ | |
| DESCRIPTION = """ | |
| # <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-7B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p> | |
| <span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-7B-instruct" style="color: #3264ff;">DeciLM-7B-Instruct</a>! DeciLM-6B-Instruct is a 6B parameter instruction-tuned language model and released under the Llama license. It's an instruction-tuned model, not a chat-tuned model; you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span> | |
| <p><span style='color: #292b47;'>Learn more about the base model <a href="" style="color: #3264ff;">DeciLM-7B.</a></span></p> | |
| """ | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit = True, | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| if not torch.cuda.is_available(): | |
| DESCRIPTION += 'You need a GPU for this example. Try using colab: ' | |
| if torch.cuda.is_available(): | |
| model = AutoModelForCausalLM.from_pretrained(model_id, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| quantization_config=bnb_config, | |
| # use_flash_attention_2=True, #DeciLM doesn't use flash_attention_2 | |
| # this token will be deleted | |
| token=token | |
| ) | |
| else: | |
| model = None | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Function to construct the prompt using the new system prompt template | |
| def get_prompt_with_template(message: str) -> str: | |
| return SYSTEM_PROMPT_TEMPLATE.format(instruction=message) | |
| # Function to generate the model's response | |
| def generate_model_response(message: str) -> str: | |
| prompt = get_prompt_with_template(message) | |
| inputs = tokenizer(prompt, return_tensors='pt') | |
| if torch.cuda.is_available(): | |
| inputs = inputs.to('cuda') | |
| # Include **generate_kwargs to include the user-defined options | |
| output = model.generate(**inputs, | |
| max_new_tokens=4096, | |
| do_sample=True, | |
| temperature=0.1 | |
| ) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| def extract_response_content(full_response: str) -> str: | |
| response_start_index = full_response.find("### Assistant:") | |
| if response_start_index != -1: | |
| return full_response[response_start_index + len("### Assistant:"):].strip() | |
| else: | |
| return full_response | |
| def get_response_with_template(message: str) -> str: | |
| full_response = generate_model_response(message) | |
| return extract_response_content(full_response) | |
| with gr.Blocks(css="style.css") as demo: | |
| gr.Markdown(DESCRIPTION) | |
| gr.DuplicateButton(value='Duplicate Space for private use', | |
| elem_id='duplicate-button') | |
| with gr.Group(): | |
| chatbot = gr.Textbox(label='DeciLM-6B-Instruct Output:') | |
| with gr.Row(): | |
| textbox = gr.Textbox( | |
| container=False, | |
| show_label=False, | |
| placeholder='Type an instruction...', | |
| scale=10, | |
| elem_id="textbox" | |
| ) | |
| submit_button = gr.Button( | |
| '💬 Submit', | |
| variant='primary', | |
| scale=1, | |
| min_width=0, | |
| elem_id="submit_button" | |
| ) | |
| # Clear button to clear the chat history | |
| clear_button = gr.Button( | |
| '🗑️ Clear', | |
| variant='secondary', | |
| ) | |
| clear_button.click( | |
| fn=lambda: ('',''), | |
| outputs=[textbox, chatbot], | |
| queue=False, | |
| api_name=False, | |
| ) | |
| submit_button.click( | |
| fn=get_response_with_template, | |
| inputs=textbox, | |
| outputs= chatbot, | |
| queue=False, | |
| api_name=False, | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| 'Write detailed instructions for making chocolate chip pancakes.', | |
| 'Write a 250-word article about your love of pancakes.', | |
| 'Explain the plot of Back to the Future in three sentences.', | |
| 'How do I make a trap beat?', | |
| 'A step-by-step guide to learning Python in one month.', | |
| ], | |
| inputs=textbox, | |
| outputs=chatbot, | |
| fn=get_response_with_template, | |
| cache_examples=True, | |
| elem_id="examples" | |
| ) | |
| gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciLM-6b-instruct/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>") | |
| demo.launch() |