Spaces:
Runtime error
Runtime error
File size: 5,341 Bytes
71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 0459f8a 0ab46f1 0459f8a 0ab46f1 71d010d 0ab46f1 71d010d 0ab46f1 71d010d 5c6b5ae 71d010d 0ab46f1 16b61e6 71d010d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
description = """# <p style="text-align: center; color: #292b47;"> ποΈ <span style='color: #3264ff;'>DeciCoder-6B:</span> A Fast Code Generation Modelπ¨ </p>
<span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciCoder-6B" style="color: #3264ff;">DeciCoder</a>!
DeciCoder-6B was trained on the Python, Java, Javascript, Rust, C++, C, and C# subset of the Starcoder Training Dataset, and it's released under the Apache 2.0 license. This model is capable of code-completion and instruction following. It surpasses CodeGen 2.5 7B, CodeLlama 7B, abd StarCoder 7B in its supported languages on HumanEval, and leads by 3 points in Python over StarCoderBase 15.5B."""
GENERATION_TITLE= "<p style='font-size: 24px; color: #292b47;'>π» Your generated code:</p>"
def instantiate_huggingface_model(
model_name,
quantization_config=None,
device_map="auto",
use_cache=True,
trust_remote_code=None,
pad_token=None,
padding_side="left"
):
"""
Instantiate a HuggingFace model with optional quantization using the BitsAndBytes library.
Parameters:
- model_name (str): The name of the model to load from HuggingFace's model hub.
- quantization_config (BitsAndBytesConfig, optional): Configuration for model quantization.
If None, defaults to a pre-defined quantization configuration for 4-bit quantization.
- device_map (str, optional): Device placement strategy for model layers ('auto' by default).
- use_cache (bool, optional): Whether to cache model outputs (False by default).
- trust_remote_code (bool, optional): Whether to trust remote code for custom layers (True by default).
- pad_token (str, optional): The pad token to be used by the tokenizer. If None, uses the EOS token.
- padding_side (str, optional): The side on which to pad the sequences ('left' by default).
Returns:
- model (PreTrainedModel): The instantiated model ready for inference or fine-tuning.
- tokenizer (PreTrainedTokenizer): The tokenizer associated with the model.
The function will throw an exception if model loading fails.
"""
# If quantization_config is not provided, use the default configuration
if quantization_config is None:
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
low_cpu_mem_usage=True,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map=device_map,
use_cache=use_cache,
trust_remote_code=trust_remote_code
)
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=trust_remote_code)
if pad_token is not None:
tokenizer.pad_token = pad_token
else:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = padding_side
return model, tokenizer
model, tokenizer = instantiate_huggingface_model("Deci-early-access/DeciCoder-6B", trust_remote_code=True)
pipe = pipeline("text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
max_length=2048,
temperature=1e-3,
)
def post_processing(prompt: str, completion: str) -> str:
"""
Post-processes the generated code completion with HTML styling.
Args:
prompt (str): The input code prompt.
completion (str): The generated code completion.
Returns:
str: The HTML-styled code with prompt and completion.
"""
completion = "<span style='color: #ff5b86;'>" + completion + "</span>"
prompt = "<span style='color: #7484b7;'>" + prompt + "</span>"
code_html = f"<br><hr><br><pre style='font-size: 12px'><code>{prompt}{completion}</code></pre><br><hr>"
return GENERATION_TITLE + code_html
def code_generation(prompt: str) -> str:
"""
Generates code based on the given prompt. Handles both regular and FIM (Fill-In-Missing) generation.
Args:
prompt (str): The input code prompt.
Returns:
str: The HTML-styled generated code.
"""
completion = pipe(prompt)[0]['generated_text']
completion = completion[len(prompt):]
return post_processing(prompt, completion)
demo = gr.Blocks(
css=".gradio-container {background-color: #FAFBFF; color: #292b47}"
)
with demo:
with gr.Row():
_, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
with colum_2:
gr.Markdown(value=description)
code = gr.Code(lines=5, language="python", label="Input code", value="def nth_element_in_fibonnaci(element):\n \"\"\"Returns the nth element of the Fibonnaci sequence.\"\"\"")
run = gr.Button(value="π¨π½βπ» Generate code", size='lg')
output = gr.HTML(label="π» Your generated code")
event = run.click(code_generation, [code], output)
gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciCoder-Demo/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")
demo.launch() |