jupyter-agent

Runtime error

App Files Files Community

Terry Zhuo commited on Feb 22

Commit

371a048

1 Parent(s): c32a030

.

Browse files

Files changed (2) hide show

app.py +29 -104
utils.py +61 -82

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import gradio as gr
 from gradio.utils import get_space
 from e2b_code_interpreter import Sandbox
 from pathlib import Path
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import json
-import re
 if not get_space():
     try:
@@ -20,12 +20,9 @@ from utils import (
     run_interactive_notebook,
     create_base_notebook,
     update_notebook_display,
-    update_notebook_with_cell,
-    update_notebook_with_markdown,
 )
 E2B_API_KEY = os.environ["E2B_API_KEY"]
-HF_TOKEN = os.environ["HF_TOKEN"]
 DEFAULT_MAX_TOKENS = 512
 SANDBOXES = {}
 TMP_DIR = './tmp/'
@@ -39,42 +36,6 @@ with open(TMP_DIR+"jupyter-agent.ipynb", 'w', encoding='utf-8') as f:
 with open("ds-system-prompt.txt", "r") as f:
     DEFAULT_SYSTEM_PROMPT = f.read()
-# Add this constant at the top with other constants
-MAX_TURNS = 10
-# Replace the client initialization with local model loading
-def load_model_and_tokenizer(model_name="bigcomputer/jupycoder-7b-lora-350"):
-    if model_name == "bigcomputer/jupycoder-7b-lora-350":
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map="auto"
-        )
-        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B-Instruct")
-    else:
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map="auto"
-        )
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-    return model, tokenizer
-# Function to extract code and text from model response
-def parse_model_response(response_text):
-    cells = []
-    # Split by code blocks
-    parts = re.split(r'(```python[\s\S]*?```)', response_text)
-    for part in parts:
-        if part.strip():
-            if part.startswith('```python'):
-                # Extract code without the markers
-                code = re.sub(r'```python\n|```', '', part).strip()
-                cells.append({"type": "code", "content": code})
-            else:
-                # Regular text becomes markdown
-                cells.append({"type": "markdown", "content": part.strip()})
-    return cells
 def execute_jupyter_agent(
     system_prompt, user_input, max_new_tokens, model_name, files, message_history, request: gr.Request
@@ -87,9 +48,18 @@ def execute_jupyter_agent(
     os.makedirs(save_dir, exist_ok=True)
     save_dir = os.path.join(save_dir, 'jupyter-agent.ipynb')
-    model, tokenizer = load_model_and_tokenizer(model_name)
-    # Handle file uploads
     filenames = []
     if files is not None:
         for filepath in files:
@@ -99,73 +69,28 @@ def execute_jupyter_agent(
                 sbx.files.write(filpath.name, file)
                 filenames.append(filpath.name)
-    # Initialize conversation
     if len(message_history) == 0:
-        message_history.append({
-            "role": "system",
-            "content": system_prompt.format("- " + "\n- ".join(filenames))
-        })
     message_history.append({"role": "user", "content": user_input})
-    # Create initial notebook
-    notebook_data = create_base_notebook([])
-    turn_count = 0
-    while turn_count < MAX_TURNS:
-        turn_count += 1
-        # Generate response
-        input_text = "\n".join([msg["content"] for msg in message_history])
-        inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            temperature=0.7,
-        )
-        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Parse response into cells
-        cells = parse_model_response(response_text)
-        # Process each cell
-        has_code = False
-        for cell in cells:
-            if cell["type"] == "code":
-                has_code = True
-                # Execute code cell
-                result = sbx.python.run(cell["content"])
-                # Add code cell and output to notebook
-                notebook_data = update_notebook_with_cell(notebook_data, cell["content"], result)
-                # Add execution result to message history
-                message_history.append({
-                    "role": "assistant",
-                    "content": cell["content"]
-                })
-                message_history.append({
-                    "role": "user",
-                    "content": f"Execution result:\n{result}"
-                })
-            else:
-                # Add markdown cell to notebook
-                notebook_data = update_notebook_with_markdown(notebook_data, cell["content"])
-                message_history.append({
-                    "role": "assistant",
-                    "content": cell["content"]
-                })
-            # Update display after each cell
-            notebook_html = update_notebook_display(notebook_data)
-            yield notebook_html, message_history, save_dir
-        # If no code was generated or we've reached max turns, stop
-        if not has_code or turn_count >= MAX_TURNS:
-            break
-    # Save final notebook
     with open(save_dir, 'w', encoding='utf-8') as f:
         json.dump(notebook_data, f, indent=2)
 def clear(msg_state):
     msg_state = []
@@ -254,4 +179,4 @@ with gr.Blocks() as demo:
 """
     )
-demo.launch(ssr_mode=False)

 from gradio.utils import get_space
 from e2b_code_interpreter import Sandbox
 from pathlib import Path
+from peft import PeftModel
+from transformers import AutoTokenizer,AutoModelForCausalLM
 import json
 if not get_space():
     try:
     run_interactive_notebook,
     create_base_notebook,
     update_notebook_display,
 )
 E2B_API_KEY = os.environ["E2B_API_KEY"]
 DEFAULT_MAX_TOKENS = 512
 SANDBOXES = {}
 TMP_DIR = './tmp/'
 with open("ds-system-prompt.txt", "r") as f:
     DEFAULT_SYSTEM_PROMPT = f.read()
 def execute_jupyter_agent(
     system_prompt, user_input, max_new_tokens, model_name, files, message_history, request: gr.Request
     os.makedirs(save_dir, exist_ok=True)
     save_dir = os.path.join(save_dir, 'jupyter-agent.ipynb')
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B-Instruct")
+    model = AutoModelForCausalLM.from_pretrained(
+        "Qwen/Qwen2.5-Coder-7B-Instruct", torch_dtype='auto'
+    ).eval()
+    # # Load the LoRA adapter and move the model to GPU
+    model = PeftModel.from_pretrained(
+        model,
+        model_name,
+        device_map="auto",  # Automatically allocate model layers to available devices
+        trust_remote_code=True
+    ).eval()
     filenames = []
     if files is not None:
         for filepath in files:
                 sbx.files.write(filpath.name, file)
                 filenames.append(filpath.name)
+    # Initialize message_history if it doesn't exist
     if len(message_history) == 0:
+        message_history.append(
+            {
+                "role": "system",
+                "content": system_prompt.format("- " + "\n- ".join(filenames)),
+            }
+        )
     message_history.append({"role": "user", "content": user_input})
+    print("history:", message_history)
+    for notebook_html, notebook_data, messages in run_interactive_notebook(
+        model, tokenizer, message_history, sbx, max_new_tokens=max_new_tokens
+    ):
+        message_history = messages
+        yield notebook_html, message_history, TMP_DIR+"jupyter-agent.ipynb"
     with open(save_dir, 'w', encoding='utf-8') as f:
         json.dump(notebook_data, f, indent=2)
+    yield notebook_html, message_history, save_dir
 def clear(msg_state):
     msg_state = []
 """
     )
+demo.launch(share=True, ssr_mode=False)

utils.py CHANGED Viewed

@@ -5,18 +5,18 @@ from huggingface_hub import InferenceClient
 from e2b_code_interpreter import Sandbox
 from transformers import AutoTokenizer
 from traitlets.config import Config
 config = Config()
 html_exporter = HTMLExporter(config=config, template_name="classic")
 with open("llama3_template.jinja", "r") as f:
     llama_template = f.read()
-MAX_TURNS = 4
 def parse_exec_result_nb(execution):
     """Convert an E2B Execution object to Jupyter notebook cell output format"""
     outputs = []
@@ -219,103 +219,82 @@ def update_notebook_display(notebook_data):
     notebook_body = notebook_body.replace(bad_html_bad, "")
     return notebook_body
-def run_interactive_notebook(client, model, tokenizer, messages, sbx, max_new_tokens=512):
     notebook_data, code_cell_counter = create_base_notebook(messages)
     turns = 0
-    #code_cell_counter = 0
     while turns <= MAX_TURNS:
         turns += 1
-        input_tokens = tokenizer.apply_chat_template(
-            messages,
-            chat_template=llama_template,
-            builtin_tools=["code_interpreter"],
-            add_generation_prompt=True
         )
-        model_input = tokenizer.decode(input_tokens)
-        print(f"Model input:\n{model_input}\n{'='*80}")
-        response_stream = client.text_generation(
-            model=model,
-            prompt=model_input,
-            details=True,
-            stream=True,
-            do_sample=True,
-            repetition_penalty=1.1,
-            temperature=0.8,
-            max_new_tokens=max_new_tokens,
         )
-        assistant_response = ""
-        tokens = []
-        code_cell = False
-        for i, chunk in enumerate(response_stream):
-            if not chunk.token.special:
-                content = chunk.token.text
-            else:
-                content = ""
-            tokens.append(chunk.token.text)
-            assistant_response += content
-            if len(tokens)==1:
-                create_cell=True
-                code_cell = "<|python_tag|>" in tokens[0]
-                if code_cell:
-                    code_cell_counter +=1
-            else:
-                create_cell = False
-            # Update notebook in real-time
-            if create_cell:
-                if "<|python_tag|>" in tokens[0]:
                     notebook_data["cells"].append({
                         "cell_type": "code",
-                        "execution_count": None,
                         "metadata": {},
-                        "source": assistant_response,
                         "outputs": []
                     })
                 else:
                     notebook_data["cells"].append({
                         "cell_type": "markdown",
                         "metadata": {},
-                        "source": assistant_response
                     })
-            else:
-                notebook_data["cells"][-1]["source"] = assistant_response
-            if i%16 == 0:
-                yield update_notebook_display(notebook_data), notebook_data, messages
         yield update_notebook_display(notebook_data), notebook_data, messages
-        # Handle code execution
-        if code_cell:
-            notebook_data["cells"][-1]["execution_count"] = code_cell_counter
-            exec_result, execution = execute_code(sbx, assistant_response)
-            messages.append({
-                "role": "assistant",
-                "content": assistant_response,
-                "tool_calls": [{
-                    "type": "function",
-                    "function": {
-                        "name": "code_interpreter",
-                        "arguments": {"code": assistant_response}
-                    }
-                }]
-            })
-            messages.append({"role": "ipython", "content": parse_exec_result_llm(execution), "nbformat": parse_exec_result_nb(execution)})
-            # Update the last code cell with execution results
-            notebook_data["cells"][-1]["outputs"] = parse_exec_result_nb(execution)
-            update_notebook_display(notebook_data)
-        else:
-            messages.append({"role": "assistant", "content": assistant_response})
-            if tokens[-1] == "<|eot_id|>":
-                break
     yield update_notebook_display(notebook_data), notebook_data, messages
@@ -325,11 +304,11 @@ def update_notebook_with_cell(notebook_data, code, output):
         "cell_type": "code",
         "execution_count": None,
         "metadata": {},
-        "source": code.split('\n'),
         "outputs": [{
             "output_type": "stream",
             "name": "stdout",
-            "text": str(output).split('\n')
         }] if output else []
     }
     notebook_data['cells'].append(cell)
@@ -340,7 +319,7 @@ def update_notebook_with_markdown(notebook_data, markdown_text):
     cell = {
         "cell_type": "markdown",
         "metadata": {},
-        "source": markdown_text.split('\n')
     }
     notebook_data['cells'].append(cell)
     return notebook_data

 from e2b_code_interpreter import Sandbox
 from transformers import AutoTokenizer
 from traitlets.config import Config
+import re
 config = Config()
 html_exporter = HTMLExporter(config=config, template_name="classic")
+# Constants
+MAX_TURNS = 10
 with open("llama3_template.jinja", "r") as f:
     llama_template = f.read()
 def parse_exec_result_nb(execution):
     """Convert an E2B Execution object to Jupyter notebook cell output format"""
     outputs = []
     notebook_body = notebook_body.replace(bad_html_bad, "")
     return notebook_body
+def run_interactive_notebook(model, tokenizer, messages, sbx, max_new_tokens=512):
     notebook_data, code_cell_counter = create_base_notebook(messages)
     turns = 0
     while turns <= MAX_TURNS:
         turns += 1
+        # Generate response using the model
+        text = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
         )
+        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+        generated_ids = model.generate(
+            **model_inputs,
+            max_new_tokens=max_new_tokens
         )
+        generated_ids = [
+            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        response_stream = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # Process the full response at once
+        parts = re.split(r'(```python[\s\S]*?```)', response_stream)
+        for part in parts:
+            if part.strip():
+                if part.startswith('```python'):
+                    # Extract code without the markers
+                    code = re.sub(r'```python\n|```', '', part).strip()
+                    code_cell_counter += 1
+                    # Add code cell
                     notebook_data["cells"].append({
                         "cell_type": "code",
+                        "execution_count": code_cell_counter,
                         "metadata": {},
+                        "source": code,
                         "outputs": []
                     })
+                    # Execute code
+                    exec_result, execution = execute_code(sbx, code)
+                    messages.append({
+                        "role": "assistant",
+                        "content": code,
+                        "tool_calls": [{
+                            "type": "function",
+                            "function": {
+                                "name": "code_interpreter",
+                                "arguments": {"code": code}
+                            }
+                        }]
+                    })
+                    messages.append({
+                        "role": "ipython",
+                        "content": parse_exec_result_llm(execution),
+                        "nbformat": parse_exec_result_nb(execution)
+                    })
+                    # Update cell with execution results
+                    notebook_data["cells"][-1]["outputs"] = parse_exec_result_nb(execution)
                 else:
+                    # Add markdown cell for non-code content
                     notebook_data["cells"].append({
                         "cell_type": "markdown",
                         "metadata": {},
+                        "source": part.strip()
                     })
+                    messages.append({
+                        "role": "assistant",
+                        "content": part.strip()
+                    })
+        # Return the final result
         yield update_notebook_display(notebook_data), notebook_data, messages
+        break
     yield update_notebook_display(notebook_data), notebook_data, messages
         "cell_type": "code",
         "execution_count": None,
         "metadata": {},
+        "source": code,
         "outputs": [{
             "output_type": "stream",
             "name": "stdout",
+            "text": str(output)
         }] if output else []
     }
     notebook_data['cells'].append(cell)
     cell = {
         "cell_type": "markdown",
         "metadata": {},
+        "source": markdown_text
     }
     notebook_data['cells'].append(cell)
     return notebook_data