Spaces:
Runtime error
Runtime error
import nbformat | |
from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell | |
from nbconvert import HTMLExporter | |
from huggingface_hub import InferenceClient | |
from e2b_code_interpreter import Sandbox | |
from transformers import AutoTokenizer | |
from traitlets.config import Config | |
import re | |
config = Config() | |
html_exporter = HTMLExporter(config=config, template_name="classic") | |
# Constants | |
MAX_TURNS = 10 | |
with open("llama3_template.jinja", "r") as f: | |
llama_template = f.read() | |
def parse_exec_result_nb(execution): | |
"""Convert an E2B Execution object to Jupyter notebook cell output format""" | |
outputs = [] | |
if execution.logs.stdout: | |
outputs.append({ | |
'output_type': 'stream', | |
'name': 'stdout', | |
'text': ''.join(execution.logs.stdout) | |
}) | |
if execution.logs.stderr: | |
outputs.append({ | |
'output_type': 'stream', | |
'name': 'stderr', | |
'text': ''.join(execution.logs.stderr) | |
}) | |
if execution.error: | |
outputs.append({ | |
'output_type': 'error', | |
'ename': execution.error.name, | |
'evalue': execution.error.value, | |
'traceback': [line for line in execution.error.traceback.split('\n')] | |
}) | |
for result in execution.results: | |
output = { | |
'output_type': 'execute_result' if result.is_main_result else 'display_data', | |
'metadata': {}, | |
'data': {} | |
} | |
if result.text: | |
output['data']['text/plain'] = [result.text] # Array for text/plain | |
if result.html: | |
output['data']['text/html'] = result.html | |
if result.png: | |
output['data']['image/png'] = result.png | |
if result.svg: | |
output['data']['image/svg+xml'] = result.svg | |
if result.jpeg: | |
output['data']['image/jpeg'] = result.jpeg | |
if result.pdf: | |
output['data']['application/pdf'] = result.pdf | |
if result.latex: | |
output['data']['text/latex'] = result.latex | |
if result.json: | |
output['data']['application/json'] = result.json | |
if result.javascript: | |
output['data']['application/javascript'] = result.javascript | |
if result.is_main_result and execution.execution_count is not None: | |
output['execution_count'] = execution.execution_count | |
if output['data']: | |
outputs.append(output) | |
return outputs | |
system_template = """\ | |
<details> | |
<summary style="display: flex; align-items: center;"> | |
<div class="alert alert-block alert-info" style="margin: 0; width: 100%;"> | |
<b>System: <span class="arrow">▶</span></b> | |
</div> | |
</summary> | |
<div class="alert alert-block alert-info"> | |
{} | |
</div> | |
</details> | |
<style> | |
details > summary .arrow {{ | |
display: inline-block; | |
transition: transform 0.2s; | |
}} | |
details[open] > summary .arrow {{ | |
transform: rotate(90deg); | |
}} | |
</style> | |
""" | |
user_template = """<div class="alert alert-block alert-success"> | |
<b>User:</b> {} | |
</div> | |
""" | |
header_message = """<p align="center"> | |
<img src="https://huggingface.co/spaces/lvwerra/jupyter-agent/resolve/main/jupyter-agent.png" /> | |
</p> | |
<p style="text-align:center;">Let a LLM agent write and execute code inside a notebook!</p>""" | |
bad_html_bad = """input[type="file"] { | |
display: block; | |
}""" | |
def create_base_notebook(messages): | |
base_notebook = { | |
"metadata": { | |
"kernel_info": {"name": "python3"}, | |
"language_info": { | |
"name": "python", | |
"version": "3.12", | |
}, | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"cells": [] | |
} | |
base_notebook["cells"].append({ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": header_message | |
}) | |
if len(messages)==0: | |
base_notebook["cells"].append({ | |
"cell_type": "code", | |
"execution_count": None, | |
"metadata": {}, | |
"source": "", | |
"outputs": [] | |
}) | |
code_cell_counter = 0 | |
for message in messages: | |
if message["role"] == "system": | |
text = system_template.format(message["content"].replace('\n', '<br>')) | |
base_notebook["cells"].append({ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": text | |
}) | |
elif message["role"] == "user": | |
text = user_template.format(message["content"].replace('\n', '<br>')) | |
base_notebook["cells"].append({ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": text | |
}) | |
elif message["role"] == "assistant" and "tool_calls" in message: | |
base_notebook["cells"].append({ | |
"cell_type": "code", | |
"execution_count": None, | |
"metadata": {}, | |
"source": message["content"], | |
"outputs": [] | |
}) | |
elif message["role"] == "ipython": | |
code_cell_counter +=1 | |
base_notebook["cells"][-1]["outputs"] = message["nbformat"] | |
base_notebook["cells"][-1]["execution_count"] = code_cell_counter | |
elif message["role"] == "assistant" and "tool_calls" not in message: | |
base_notebook["cells"].append({ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": message["content"] | |
}) | |
else: | |
raise ValueError(message) | |
return base_notebook, code_cell_counter | |
def execute_code(sbx, code): | |
execution = sbx.run_code(code, on_stdout=lambda data: print('stdout:', data)) | |
output = "" | |
if len(execution.logs.stdout) > 0: | |
output += "\n".join(execution.logs.stdout) | |
if len(execution.logs.stderr) > 0: | |
output += "\n".join(execution.logs.stderr) | |
if execution.error is not None: | |
output += execution.error.traceback | |
return output, execution | |
def parse_exec_result_llm(execution): | |
output = "" | |
if len(execution.logs.stdout) > 0: | |
output += "\n".join(execution.logs.stdout) | |
if len(execution.logs.stderr) > 0: | |
output += "\n".join(execution.logs.stderr) | |
if execution.error is not None: | |
output += execution.error.traceback | |
return output | |
def update_notebook_display(notebook_data): | |
notebook = nbformat.from_dict(notebook_data) | |
notebook_body, _ = html_exporter.from_notebook_node(notebook) | |
notebook_body = notebook_body.replace(bad_html_bad, "") | |
return notebook_body | |
def run_interactive_notebook(model, tokenizer, messages, sbx, max_new_tokens=512): | |
notebook_data, code_cell_counter = create_base_notebook(messages) | |
turns = 0 | |
while turns <= MAX_TURNS: | |
turns += 1 | |
# Generate response using the model | |
text = tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
generated_ids = model.generate( | |
**model_inputs, | |
max_new_tokens=max_new_tokens | |
) | |
generated_ids = [ | |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
] | |
response_stream = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# Process the full response at once | |
parts = re.split(r'(```python[\s\S]*?```)', response_stream) | |
for part in parts: | |
if part.strip(): | |
if part.startswith('```python'): | |
# Extract code without the markers | |
code = re.sub(r'```python\n|```', '', part).strip() | |
code_cell_counter += 1 | |
# Add code cell | |
notebook_data["cells"].append({ | |
"cell_type": "code", | |
"execution_count": code_cell_counter, | |
"metadata": {}, | |
"source": code, | |
"outputs": [] | |
}) | |
# Execute code | |
exec_result, execution = execute_code(sbx, code) | |
messages.append({ | |
"role": "assistant", | |
"content": code, | |
"tool_calls": [{ | |
"type": "function", | |
"function": { | |
"name": "code_interpreter", | |
"arguments": {"code": code} | |
} | |
}] | |
}) | |
messages.append({ | |
"role": "ipython", | |
"content": parse_exec_result_llm(execution), | |
"nbformat": parse_exec_result_nb(execution) | |
}) | |
# Update cell with execution results | |
notebook_data["cells"][-1]["outputs"] = parse_exec_result_nb(execution) | |
else: | |
# Add markdown cell for non-code content | |
notebook_data["cells"].append({ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": part.strip() | |
}) | |
messages.append({ | |
"role": "assistant", | |
"content": part.strip() | |
}) | |
# Return the final result | |
yield update_notebook_display(notebook_data), notebook_data, messages | |
break | |
yield update_notebook_display(notebook_data), notebook_data, messages | |
def update_notebook_with_cell(notebook_data, code, output): | |
"""Add a code cell and its output to the notebook""" | |
cell = { | |
"cell_type": "code", | |
"execution_count": None, | |
"metadata": {}, | |
"source": code, | |
"outputs": [{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": str(output) | |
}] if output else [] | |
} | |
notebook_data['cells'].append(cell) | |
return notebook_data | |
def update_notebook_with_markdown(notebook_data, markdown_text): | |
"""Add a markdown cell to the notebook""" | |
cell = { | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": markdown_text | |
} | |
notebook_data['cells'].append(cell) | |
return notebook_data |