|
import gradio as gr |
|
|
|
import os |
|
from dataclasses import dataclass, asdict |
|
from ctransformers import AutoModelForCausalLM, AutoConfig |
|
|
|
|
|
@dataclass |
|
class GenerationConfig: |
|
temperature: float |
|
top_k: int |
|
top_p: float |
|
repetition_penalty: float |
|
max_new_tokens: int |
|
seed: int |
|
reset: bool |
|
stream: bool |
|
threads: int |
|
stop: list[str] |
|
|
|
|
|
def format_prompt(user_prompt: str): |
|
return f"""### Instruction: |
|
{user_prompt} |
|
|
|
### Response:""" |
|
|
|
|
|
def generate( |
|
llm: AutoModelForCausalLM, |
|
generation_config: GenerationConfig, |
|
user_prompt: str, |
|
): |
|
"""run model inference, will return a Generator if streaming is true""" |
|
|
|
return llm(format_prompt(user_prompt), **asdict(generation_config)) |
|
|
|
config = AutoConfig.from_pretrained( |
|
"teknium/Replit-v2-CodeInstruct-3B", context_length=2048 |
|
) |
|
llm = AutoModelForCausalLM.from_pretrained( |
|
os.path.abspath("replit-code-instruct-glaive.ggmlv1.q4_1.bin"), |
|
model_type="replit", |
|
config=config, |
|
) |
|
|
|
generation_config = GenerationConfig( |
|
temperature=0.2, |
|
top_k=50, |
|
top_p=0.9, |
|
repetition_penalty=1.0, |
|
max_new_tokens=512, |
|
seed=42, |
|
reset=True, |
|
stream=True, |
|
threads=int(os.cpu_count() / 6), |
|
stop=["<|endoftext|>"], |
|
) |
|
|
|
user_prefix = "[user]: " |
|
assistant_prefix = f"[assistant]:" |
|
|
|
title = "Replit-v2-CodeInstruct-3b-ggml" |
|
description = "This space is an attempt to run the GGML 4 bit quantized version of 'Replit's CodeInstruct 3B' on a CPU" |
|
|
|
example_1 = "Write a python script for a function which calculates the factorial of the number inputted by user." |
|
example_2 = "Write a python script which prints 'you are logged in' only if the user inputs a number between 1-10" |
|
|
|
examples = [example_1, example_2] |
|
|
|
def generate_code(user_input): |
|
response = generate(llm, generation_config, user_input) |
|
code = "" |
|
for word in response: |
|
code = code + word |
|
return code |
|
|
|
UI = gr.Interface( |
|
fn=generate_code, |
|
inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."), |
|
outputs=gr.Textbox(label="Assistant"), |
|
title=title, |
|
description=description, |
|
examples=examples |
|
) |
|
|
|
UI.launch() |