Spaces:
Sleeping
Sleeping
File size: 5,160 Bytes
0dc3013 ed1e9c8 c8ab0ef ad34200 ed1e9c8 c8ab0ef d9e5666 ad34200 0dc3013 c8ab0ef 0e04af2 e9c8f39 0e04af2 e9c8f39 0e04af2 c8ab0ef 0dc3013 c8ab0ef d9e5666 ad34200 0dc3013 0e04af2 0dc3013 47f233b c8ab0ef 0dc3013 c8ab0ef d4e150d c8ab0ef d9e5666 ad34200 ec37392 ad34200 ec37392 ad34200 47f233b ad34200 0e04af2 ad34200 ec37392 d4e150d ad34200 d9e5666 0e04af2 d9e5666 ad34200 d9e5666 ad34200 d9e5666 ad34200 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import spaces
import gradio as gr
from transformers import PreTrainedTokenizerFast, AutoModelForCausalLM
import torch
from threading import Thread
from transformers import TextIteratorStreamer
import os
# Initialize model and tokenizer
MODEL_ID = "erikbeltran/pydiff"
GGUF_FILE = "unsloth.Q4_K_M.gguf"
try:
# Use PreTrainedTokenizerFast instead of LlamaTokenizer
tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_ID)
# Ensure the tokenizer has the necessary special tokens
special_tokens = {
# 'pad_token': '[PAD]',
'eos_token': '<|eot_id|>'
# 'bos_token': '<s>',
# 3 'unk_token': '<unk>'
}
tokenizer.add_special_tokens(special_tokens)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, gguf_file=GGUF_FILE)
# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
except Exception as e:
print(f"Error initializing model or tokenizer: {str(e)}")
raise
def format_diff_response(response):
"""Format the response to look like a diff output"""
lines = response.split('\n')
formatted = []
for line in lines:
if line.startswith('+'):
formatted.append(f'<span style="color: green">{line}</span>')
elif line.startswith('-'):
formatted.append(f'<span style="color: red">{line}</span>')
else:
formatted.append(line)
return '<br>'.join(formatted)
def create_prompt(request, file_content, system_message):
# return f"""<system>{system_message}</system>
#<request>{request}</request>
#<file>{file_content}</file>"""
return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Cutting Knowledge Date: December 2023
Today Date: 26 July 2024
{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>
<request>{request}</request>
<file>{file_content}</file><|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
@spaces.GPU
def respond(request, file_content, system_message, max_tokens, temperature, top_p):
try:
prompt = create_prompt(request, file_content, system_message)
# Tokenize input
inputs = tokenizer(
prompt,
return_tensors="pt",
add_special_tokens=True,
padding=True,
truncation=True,
max_length=2048
).to(device)
# Generate response with streaming
response = ""
streamer = TextIteratorStreamer(tokenizer,skip_prompt = True , skip_special_tokens=True)
generation_kwargs = dict(
input_ids=inputs["input_ids"],
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
streamer=streamer,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
do_sample=True,
)
# Start generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Yield formatted responses as they're generated
for new_text in streamer:
response += new_text
yield format_diff_response(response)
except Exception as e:
yield f"<span style='color: red'>Error generating response: {str(e)}</span>"
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Code Review Assistant")
with gr.Row():
with gr.Column():
request_input = gr.Textbox(
label="Request",
value="fix the error",
placeholder="Enter your request (e.g., 'fix the function', 'add error handling')",
lines=3
)
file_input = gr.Code(
label="File Content",
value="""def suma(a, b):
return a + b
print(suma(5, "3"))
""",
language="python",
lines=10
)
with gr.Column():
output = gr.HTML(label="Diff Output")
with gr.Accordion("Advanced Settings", open=False):
system_msg = gr.Textbox(
value="you are a coder asistant, returns the answer to user in diff format",
label="System Message"
)
max_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=128,
step=1,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.5,
step=0.5,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=1,
step=0.05,
label="Top-p"
)
submit_btn = gr.Button("Submit")
submit_btn.click(
fn=respond,
inputs=[
request_input,
file_input,
system_msg,
max_tokens,
temperature,
top_p
],
outputs=output
)
if __name__ == "__main__":
demo.launch() |