import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig description = """#
ποΈ DeciCoder-6B: A Fast Code Generation Modelπ¨
Welcome to DeciCoder! DeciCoder-6B was trained on the Python, Java, Javascript, Rust, C++, C, and C# subset of the Starcoder Training Dataset, and it's released under the Apache 2.0 license. This model is capable of code-completion and instruction following. It surpasses CodeGen 2.5 7B, CodeLlama 7B, abd StarCoder 7B in its supported languages on HumanEval, and leads by 3 points in Python over StarCoderBase 15.5B.""" GENERATION_TITLE= "π» Your generated code:
" def instantiate_huggingface_model( model_name, quantization_config=None, device_map="auto", use_cache=True, trust_remote_code=None, pad_token=None, padding_side="left" ): """ Instantiate a HuggingFace model with optional quantization using the BitsAndBytes library. Parameters: - model_name (str): The name of the model to load from HuggingFace's model hub. - quantization_config (BitsAndBytesConfig, optional): Configuration for model quantization. If None, defaults to a pre-defined quantization configuration for 4-bit quantization. - device_map (str, optional): Device placement strategy for model layers ('auto' by default). - use_cache (bool, optional): Whether to cache model outputs (False by default). - trust_remote_code (bool, optional): Whether to trust remote code for custom layers (True by default). - pad_token (str, optional): The pad token to be used by the tokenizer. If None, uses the EOS token. - padding_side (str, optional): The side on which to pad the sequences ('left' by default). Returns: - model (PreTrainedModel): The instantiated model ready for inference or fine-tuning. - tokenizer (PreTrainedTokenizer): The tokenizer associated with the model. The function will throw an exception if model loading fails. """ # If quantization_config is not provided, use the default configuration if quantization_config is None: quantization_config = BitsAndBytesConfig( load_in_8bit=True, low_cpu_mem_usage=True, ) model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quantization_config, device_map=device_map, use_cache=use_cache, trust_remote_code=trust_remote_code ) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code) if pad_token is not None: tokenizer.pad_token = pad_token else: tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = padding_side return model, tokenizer model, tokenizer = instantiate_huggingface_model("Deci-early-access/DeciCoder-6B", trust_remote_code=True) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto", max_length=2048, temperature=1e-3, ) def post_processing(prompt: str, completion: str) -> str: """ Post-processes the generated code completion with HTML styling. Args: prompt (str): The input code prompt. completion (str): The generated code completion. Returns: str: The HTML-styled code with prompt and completion. """ completion = "" + completion + "" prompt = "" + prompt + "" code_html = f"{prompt}{completion}