Terry Zhuo commited on
Commit
371a048
·
1 Parent(s): c32a030
Files changed (2) hide show
  1. app.py +29 -104
  2. utils.py +61 -82
app.py CHANGED
@@ -3,9 +3,9 @@ import gradio as gr
3
  from gradio.utils import get_space
4
  from e2b_code_interpreter import Sandbox
5
  from pathlib import Path
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
7
  import json
8
- import re
9
 
10
  if not get_space():
11
  try:
@@ -20,12 +20,9 @@ from utils import (
20
  run_interactive_notebook,
21
  create_base_notebook,
22
  update_notebook_display,
23
- update_notebook_with_cell,
24
- update_notebook_with_markdown,
25
  )
26
 
27
  E2B_API_KEY = os.environ["E2B_API_KEY"]
28
- HF_TOKEN = os.environ["HF_TOKEN"]
29
  DEFAULT_MAX_TOKENS = 512
30
  SANDBOXES = {}
31
  TMP_DIR = './tmp/'
@@ -39,42 +36,6 @@ with open(TMP_DIR+"jupyter-agent.ipynb", 'w', encoding='utf-8') as f:
39
  with open("ds-system-prompt.txt", "r") as f:
40
  DEFAULT_SYSTEM_PROMPT = f.read()
41
 
42
- # Add this constant at the top with other constants
43
- MAX_TURNS = 10
44
-
45
- # Replace the client initialization with local model loading
46
- def load_model_and_tokenizer(model_name="bigcomputer/jupycoder-7b-lora-350"):
47
- if model_name == "bigcomputer/jupycoder-7b-lora-350":
48
- model = AutoModelForCausalLM.from_pretrained(
49
- model_name,
50
- device_map="auto"
51
- )
52
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B-Instruct")
53
- else:
54
- model = AutoModelForCausalLM.from_pretrained(
55
- model_name,
56
- device_map="auto"
57
- )
58
- tokenizer = AutoTokenizer.from_pretrained(model_name)
59
- return model, tokenizer
60
-
61
- # Function to extract code and text from model response
62
- def parse_model_response(response_text):
63
- cells = []
64
- # Split by code blocks
65
- parts = re.split(r'(```python[\s\S]*?```)', response_text)
66
-
67
- for part in parts:
68
- if part.strip():
69
- if part.startswith('```python'):
70
- # Extract code without the markers
71
- code = re.sub(r'```python\n|```', '', part).strip()
72
- cells.append({"type": "code", "content": code})
73
- else:
74
- # Regular text becomes markdown
75
- cells.append({"type": "markdown", "content": part.strip()})
76
-
77
- return cells
78
 
79
  def execute_jupyter_agent(
80
  system_prompt, user_input, max_new_tokens, model_name, files, message_history, request: gr.Request
@@ -87,9 +48,18 @@ def execute_jupyter_agent(
87
  os.makedirs(save_dir, exist_ok=True)
88
  save_dir = os.path.join(save_dir, 'jupyter-agent.ipynb')
89
 
90
- model, tokenizer = load_model_and_tokenizer(model_name)
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Handle file uploads
93
  filenames = []
94
  if files is not None:
95
  for filepath in files:
@@ -99,73 +69,28 @@ def execute_jupyter_agent(
99
  sbx.files.write(filpath.name, file)
100
  filenames.append(filpath.name)
101
 
102
- # Initialize conversation
103
  if len(message_history) == 0:
104
- message_history.append({
105
- "role": "system",
106
- "content": system_prompt.format("- " + "\n- ".join(filenames))
107
- })
 
 
108
  message_history.append({"role": "user", "content": user_input})
109
 
110
- # Create initial notebook
111
- notebook_data = create_base_notebook([])
112
- turn_count = 0
113
 
114
- while turn_count < MAX_TURNS:
115
- turn_count += 1
 
 
116
 
117
- # Generate response
118
- input_text = "\n".join([msg["content"] for msg in message_history])
119
- inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
120
-
121
- outputs = model.generate(
122
- **inputs,
123
- max_new_tokens=max_new_tokens,
124
- do_sample=True,
125
- temperature=0.7,
126
- )
127
- response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
128
-
129
- # Parse response into cells
130
- cells = parse_model_response(response_text)
131
-
132
- # Process each cell
133
- has_code = False
134
- for cell in cells:
135
- if cell["type"] == "code":
136
- has_code = True
137
- # Execute code cell
138
- result = sbx.python.run(cell["content"])
139
- # Add code cell and output to notebook
140
- notebook_data = update_notebook_with_cell(notebook_data, cell["content"], result)
141
- # Add execution result to message history
142
- message_history.append({
143
- "role": "assistant",
144
- "content": cell["content"]
145
- })
146
- message_history.append({
147
- "role": "user",
148
- "content": f"Execution result:\n{result}"
149
- })
150
- else:
151
- # Add markdown cell to notebook
152
- notebook_data = update_notebook_with_markdown(notebook_data, cell["content"])
153
- message_history.append({
154
- "role": "assistant",
155
- "content": cell["content"]
156
- })
157
-
158
- # Update display after each cell
159
- notebook_html = update_notebook_display(notebook_data)
160
- yield notebook_html, message_history, save_dir
161
-
162
- # If no code was generated or we've reached max turns, stop
163
- if not has_code or turn_count >= MAX_TURNS:
164
- break
165
-
166
- # Save final notebook
167
  with open(save_dir, 'w', encoding='utf-8') as f:
168
  json.dump(notebook_data, f, indent=2)
 
169
 
170
  def clear(msg_state):
171
  msg_state = []
@@ -254,4 +179,4 @@ with gr.Blocks() as demo:
254
  """
255
  )
256
 
257
- demo.launch(ssr_mode=False)
 
3
  from gradio.utils import get_space
4
  from e2b_code_interpreter import Sandbox
5
  from pathlib import Path
6
+ from peft import PeftModel
7
+ from transformers import AutoTokenizer,AutoModelForCausalLM
8
  import json
 
9
 
10
  if not get_space():
11
  try:
 
20
  run_interactive_notebook,
21
  create_base_notebook,
22
  update_notebook_display,
 
 
23
  )
24
 
25
  E2B_API_KEY = os.environ["E2B_API_KEY"]
 
26
  DEFAULT_MAX_TOKENS = 512
27
  SANDBOXES = {}
28
  TMP_DIR = './tmp/'
 
36
  with open("ds-system-prompt.txt", "r") as f:
37
  DEFAULT_SYSTEM_PROMPT = f.read()
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  def execute_jupyter_agent(
41
  system_prompt, user_input, max_new_tokens, model_name, files, message_history, request: gr.Request
 
48
  os.makedirs(save_dir, exist_ok=True)
49
  save_dir = os.path.join(save_dir, 'jupyter-agent.ipynb')
50
 
51
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B-Instruct")
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ "Qwen/Qwen2.5-Coder-7B-Instruct", torch_dtype='auto'
54
+ ).eval()
55
+ # # Load the LoRA adapter and move the model to GPU
56
+ model = PeftModel.from_pretrained(
57
+ model,
58
+ model_name,
59
+ device_map="auto", # Automatically allocate model layers to available devices
60
+ trust_remote_code=True
61
+ ).eval()
62
 
 
63
  filenames = []
64
  if files is not None:
65
  for filepath in files:
 
69
  sbx.files.write(filpath.name, file)
70
  filenames.append(filpath.name)
71
 
72
+ # Initialize message_history if it doesn't exist
73
  if len(message_history) == 0:
74
+ message_history.append(
75
+ {
76
+ "role": "system",
77
+ "content": system_prompt.format("- " + "\n- ".join(filenames)),
78
+ }
79
+ )
80
  message_history.append({"role": "user", "content": user_input})
81
 
82
+ print("history:", message_history)
 
 
83
 
84
+ for notebook_html, notebook_data, messages in run_interactive_notebook(
85
+ model, tokenizer, message_history, sbx, max_new_tokens=max_new_tokens
86
+ ):
87
+ message_history = messages
88
 
89
+ yield notebook_html, message_history, TMP_DIR+"jupyter-agent.ipynb"
90
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  with open(save_dir, 'w', encoding='utf-8') as f:
92
  json.dump(notebook_data, f, indent=2)
93
+ yield notebook_html, message_history, save_dir
94
 
95
  def clear(msg_state):
96
  msg_state = []
 
179
  """
180
  )
181
 
182
+ demo.launch(share=True, ssr_mode=False)
utils.py CHANGED
@@ -5,18 +5,18 @@ from huggingface_hub import InferenceClient
5
  from e2b_code_interpreter import Sandbox
6
  from transformers import AutoTokenizer
7
  from traitlets.config import Config
 
8
 
9
  config = Config()
10
  html_exporter = HTMLExporter(config=config, template_name="classic")
11
 
 
 
12
 
13
  with open("llama3_template.jinja", "r") as f:
14
  llama_template = f.read()
15
 
16
 
17
- MAX_TURNS = 4
18
-
19
-
20
  def parse_exec_result_nb(execution):
21
  """Convert an E2B Execution object to Jupyter notebook cell output format"""
22
  outputs = []
@@ -219,103 +219,82 @@ def update_notebook_display(notebook_data):
219
  notebook_body = notebook_body.replace(bad_html_bad, "")
220
  return notebook_body
221
 
222
- def run_interactive_notebook(client, model, tokenizer, messages, sbx, max_new_tokens=512):
223
  notebook_data, code_cell_counter = create_base_notebook(messages)
224
  turns = 0
225
 
226
- #code_cell_counter = 0
227
  while turns <= MAX_TURNS:
228
  turns += 1
229
- input_tokens = tokenizer.apply_chat_template(
230
- messages,
231
- chat_template=llama_template,
232
- builtin_tools=["code_interpreter"],
233
- add_generation_prompt=True
234
  )
235
- model_input = tokenizer.decode(input_tokens)
236
 
237
- print(f"Model input:\n{model_input}\n{'='*80}")
238
-
239
- response_stream = client.text_generation(
240
- model=model,
241
- prompt=model_input,
242
- details=True,
243
- stream=True,
244
- do_sample=True,
245
- repetition_penalty=1.1,
246
- temperature=0.8,
247
- max_new_tokens=max_new_tokens,
248
  )
 
 
 
 
249
 
250
- assistant_response = ""
251
- tokens = []
252
 
253
- code_cell = False
254
- for i, chunk in enumerate(response_stream):
255
- if not chunk.token.special:
256
- content = chunk.token.text
257
- else:
258
- content = ""
259
- tokens.append(chunk.token.text)
260
- assistant_response += content
261
-
262
- if len(tokens)==1:
263
- create_cell=True
264
- code_cell = "<|python_tag|>" in tokens[0]
265
- if code_cell:
266
- code_cell_counter +=1
267
- else:
268
- create_cell = False
269
-
270
- # Update notebook in real-time
271
- if create_cell:
272
- if "<|python_tag|>" in tokens[0]:
273
  notebook_data["cells"].append({
274
  "cell_type": "code",
275
- "execution_count": None,
276
  "metadata": {},
277
- "source": assistant_response,
278
  "outputs": []
279
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  else:
 
281
  notebook_data["cells"].append({
282
  "cell_type": "markdown",
283
  "metadata": {},
284
- "source": assistant_response
285
  })
286
- else:
287
- notebook_data["cells"][-1]["source"] = assistant_response
288
- if i%16 == 0:
289
- yield update_notebook_display(notebook_data), notebook_data, messages
 
 
290
  yield update_notebook_display(notebook_data), notebook_data, messages
291
-
292
-
293
- # Handle code execution
294
- if code_cell:
295
- notebook_data["cells"][-1]["execution_count"] = code_cell_counter
296
-
297
-
298
- exec_result, execution = execute_code(sbx, assistant_response)
299
- messages.append({
300
- "role": "assistant",
301
- "content": assistant_response,
302
- "tool_calls": [{
303
- "type": "function",
304
- "function": {
305
- "name": "code_interpreter",
306
- "arguments": {"code": assistant_response}
307
- }
308
- }]
309
- })
310
- messages.append({"role": "ipython", "content": parse_exec_result_llm(execution), "nbformat": parse_exec_result_nb(execution)})
311
-
312
- # Update the last code cell with execution results
313
- notebook_data["cells"][-1]["outputs"] = parse_exec_result_nb(execution)
314
- update_notebook_display(notebook_data)
315
- else:
316
- messages.append({"role": "assistant", "content": assistant_response})
317
- if tokens[-1] == "<|eot_id|>":
318
- break
319
 
320
  yield update_notebook_display(notebook_data), notebook_data, messages
321
 
@@ -325,11 +304,11 @@ def update_notebook_with_cell(notebook_data, code, output):
325
  "cell_type": "code",
326
  "execution_count": None,
327
  "metadata": {},
328
- "source": code.split('\n'),
329
  "outputs": [{
330
  "output_type": "stream",
331
  "name": "stdout",
332
- "text": str(output).split('\n')
333
  }] if output else []
334
  }
335
  notebook_data['cells'].append(cell)
@@ -340,7 +319,7 @@ def update_notebook_with_markdown(notebook_data, markdown_text):
340
  cell = {
341
  "cell_type": "markdown",
342
  "metadata": {},
343
- "source": markdown_text.split('\n')
344
  }
345
  notebook_data['cells'].append(cell)
346
  return notebook_data
 
5
  from e2b_code_interpreter import Sandbox
6
  from transformers import AutoTokenizer
7
  from traitlets.config import Config
8
+ import re
9
 
10
  config = Config()
11
  html_exporter = HTMLExporter(config=config, template_name="classic")
12
 
13
+ # Constants
14
+ MAX_TURNS = 10
15
 
16
  with open("llama3_template.jinja", "r") as f:
17
  llama_template = f.read()
18
 
19
 
 
 
 
20
  def parse_exec_result_nb(execution):
21
  """Convert an E2B Execution object to Jupyter notebook cell output format"""
22
  outputs = []
 
219
  notebook_body = notebook_body.replace(bad_html_bad, "")
220
  return notebook_body
221
 
222
+ def run_interactive_notebook(model, tokenizer, messages, sbx, max_new_tokens=512):
223
  notebook_data, code_cell_counter = create_base_notebook(messages)
224
  turns = 0
225
 
 
226
  while turns <= MAX_TURNS:
227
  turns += 1
228
+ # Generate response using the model
229
+ text = tokenizer.apply_chat_template(
230
+ messages, tokenize=False, add_generation_prompt=True
 
 
231
  )
232
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
233
 
234
+ generated_ids = model.generate(
235
+ **model_inputs,
236
+ max_new_tokens=max_new_tokens
 
 
 
 
 
 
 
 
237
  )
238
+ generated_ids = [
239
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
240
+ ]
241
+ response_stream = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
242
 
243
+ # Process the full response at once
244
+ parts = re.split(r'(```python[\s\S]*?```)', response_stream)
245
 
246
+ for part in parts:
247
+ if part.strip():
248
+ if part.startswith('```python'):
249
+ # Extract code without the markers
250
+ code = re.sub(r'```python\n|```', '', part).strip()
251
+ code_cell_counter += 1
252
+
253
+ # Add code cell
 
 
 
 
 
 
 
 
 
 
 
 
254
  notebook_data["cells"].append({
255
  "cell_type": "code",
256
+ "execution_count": code_cell_counter,
257
  "metadata": {},
258
+ "source": code,
259
  "outputs": []
260
  })
261
+
262
+ # Execute code
263
+ exec_result, execution = execute_code(sbx, code)
264
+ messages.append({
265
+ "role": "assistant",
266
+ "content": code,
267
+ "tool_calls": [{
268
+ "type": "function",
269
+ "function": {
270
+ "name": "code_interpreter",
271
+ "arguments": {"code": code}
272
+ }
273
+ }]
274
+ })
275
+ messages.append({
276
+ "role": "ipython",
277
+ "content": parse_exec_result_llm(execution),
278
+ "nbformat": parse_exec_result_nb(execution)
279
+ })
280
+
281
+ # Update cell with execution results
282
+ notebook_data["cells"][-1]["outputs"] = parse_exec_result_nb(execution)
283
  else:
284
+ # Add markdown cell for non-code content
285
  notebook_data["cells"].append({
286
  "cell_type": "markdown",
287
  "metadata": {},
288
+ "source": part.strip()
289
  })
290
+ messages.append({
291
+ "role": "assistant",
292
+ "content": part.strip()
293
+ })
294
+
295
+ # Return the final result
296
  yield update_notebook_display(notebook_data), notebook_data, messages
297
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  yield update_notebook_display(notebook_data), notebook_data, messages
300
 
 
304
  "cell_type": "code",
305
  "execution_count": None,
306
  "metadata": {},
307
+ "source": code,
308
  "outputs": [{
309
  "output_type": "stream",
310
  "name": "stdout",
311
+ "text": str(output)
312
  }] if output else []
313
  }
314
  notebook_data['cells'].append(cell)
 
319
  cell = {
320
  "cell_type": "markdown",
321
  "metadata": {},
322
+ "source": markdown_text
323
  }
324
  notebook_data['cells'].append(cell)
325
  return notebook_data