Spaces:

Nirav-Madhani
/

Octo-1.5-Small

Sleeping

App Files Files Community

Nirav Madhani commited on 21 days ago

Commit

0558e79

1 Parent(s): ab273a0

Flash server

Browse files

Files changed (5) hide show

Dockerfile +27 -3
app.py +65 -0
init_model.py +9 -0
main.py +13 -6
test_api.py +54 -0

Dockerfile CHANGED Viewed

@@ -1,11 +1,35 @@
-FROM python:3.10
 WORKDIR /app
 RUN git clone https://github.com/octo-models/octo.git
 WORKDIR /app/octo
 RUN pip3 install -e .
 RUN pip3 install -r requirements.txt
 RUN pip3 install --upgrade "jax[cuda11_pip]==0.4.20" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-RUN pip install scipy==1.10.1
 COPY main.py /app/octo
-CMD ["python", "main.py"]

+FROM python:3.10-slim
+# Update package list and install git
+RUN apt-get update && \
+    apt-get install -y git && \
+    rm -rf /var/lib/apt/lists/*  # Clean up to reduce image size
 WORKDIR /app
+# Clone the octo repository
 RUN git clone https://github.com/octo-models/octo.git
 WORKDIR /app/octo
+# Install dependencies
 RUN pip3 install -e .
 RUN pip3 install -r requirements.txt
 RUN pip3 install --upgrade "jax[cuda11_pip]==0.4.20" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+RUN pip3 install scipy==1.10.1
+# Install FastAPI and Uvicorn for the API
+RUN pip3 install fastapi uvicorn
+# Copy and run the model initialization script to cache the model
+COPY init_model.py /app/octo
+RUN python init_model.py
+# Copy the original main.py and the API app.py
 COPY main.py /app/octo
+COPY app.py /app/octo
+# Expose port 8000 for the API
+EXPOSE 8000
+# Run the API with Uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from octo.model.octo_model import OctoModel
+from PIL import Image
+import numpy as np
+import jax
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import os
+import io
+import base64
+# Set JAX to use CPU platform (adjust if GPU is needed)
+os.environ['JAX_PLATFORMS'] = 'cpu'
+# Load the model once globally (assumes it's cached locally)
+model = OctoModel.load_pretrained("hf://rail-berkeley/octo-small-1.5")
+# Initialize FastAPI app
+app = FastAPI(title="Octo Model Inference API")
+# Define request body model
+class InferenceRequest(BaseModel):
+    image_base64: str  # Base64-encoded image string
+    task: str = "pick up the fork"  # Default task
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+# Inference endpoint
+@app.post("/predict")
+async def predict(request: InferenceRequest):
+    try:
+        # Decode base64 image
+        img_base64 = request.image_base64
+        if img_base64.startswith("data:image"):
+            img_base64 = img_base64.split(",")[1]
+        img_data = base64.b64decode(img_base64)
+        img = Image.open(io.BytesIO(img_data)).resize((256, 256))
+        img = np.array(img)
+        # Add batch and time horizon dimensions
+        img = img[np.newaxis, np.newaxis, ...]  # Shape: (1, 1, 256, 256, 3)
+        observation = {
+            "image_primary": img,
+            "timestep_pad_mask": np.array([[True]])
+        }
+        # Create task and predict actions
+        task_obj = model.create_tasks(texts=[request.task])
+        actions = model.sample_actions(
+            observation,
+            task_obj,
+            unnormalization_statistics=model.dataset_statistics["bridge_dataset"]["action"],
+            rng=jax.random.PRNGKey(0)
+        )
+        actions = actions[0]
+        # Convert NumPy array to list for JSON response
+        actions_list = actions.tolist()
+        return {"actions": actions_list}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")

init_model.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from octo.model.octo_model import OctoModel
+import os
+# Set JAX to CPU (consistent with your setup)
+os.environ['JAX_PLATFORMS'] = 'cpu'
+# Load the model to cache it
+model = OctoModel.load_pretrained("hf://rail-berkeley/octo-small-1.5")
+print("Model downloaded and cached successfully.")

main.py CHANGED Viewed

@@ -17,10 +17,17 @@ img = np.array(Image.open(requests.get(IMAGE_URL, stream=True).raw).resize((256,
 img = img[np.newaxis,np.newaxis,...]
 observation = {"image_primary": img, "timestep_pad_mask": np.array([[True]])}
 task = model.create_tasks(texts=["pick up the fork"])
-action = model.sample_actions(
-    observation,
-    task,
-    unnormalization_statistics=model.dataset_statistics["bridge_dataset"]["action"],
-    rng=jax.random.PRNGKey(0)
 )
-print(action)

 img = img[np.newaxis,np.newaxis,...]
 observation = {"image_primary": img, "timestep_pad_mask": np.array([[True]])}
 task = model.create_tasks(texts=["pick up the fork"])
+norm_actions = model.sample_actions(observation, task, rng=jax.random.PRNGKey(0))
+norm_actions = norm_actions[0]   # remove batch
+actions = (
+    norm_actions * model.dataset_statistics["bridge_dataset"]['action']['std']
+    + model.dataset_statistics["bridge_dataset"]['action']['mean']
 )
+actions = np.concatenate(
+        (
+            steps[step+1]['action']['world_vector'],
+            steps[step+1]['action']['rotation_delta'],
+            np.array(steps[step+1]['action']['open_gripper']).astype(np.float32)[None]
+        ), axis=-1
+    )
+print(actions)

test_api.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import requests
+import base64
+from PIL import Image
+import io
+# API endpoint URL (adjust if running on a different host/port)
+API_URL = "http://localhost:8000/predict"
+# Example image URL from main.py
+IMAGE_URL = "https://rail.eecs.berkeley.edu/datasets/bridge_release/raw/bridge_data_v2/datacol2_toykitchen7/drawer_pnp/01/2023-04-19_09-18-15/raw/traj_group0/traj0/images0/im_12.jpg"
+TASK_TEXT = "pick up the fork"
+def test_api(image_url=IMAGE_URL, task=TASK_TEXT):
+    try:
+        # Download image from URL
+        response = requests.get(image_url, stream=True)
+        response.raise_for_status()  # Check for HTTP errors
+        img = Image.open(response.raw).resize((256, 256))
+        # Convert image to base64
+        img_byte_arr = io.BytesIO()
+        img.save(img_byte_arr, format="JPEG")  # Save as JPEG (adjust if needed)
+        img_byte_arr = img_byte_arr.getvalue()
+        base64_string = base64.b64encode(img_byte_arr).decode("utf-8")
+        # Prepare payload for API
+        payload = {
+            "image_base64": base64_string,
+            "task": task
+        }
+        # Send POST request to API
+        api_response = requests.post(API_URL, json=payload)
+        api_response.raise_for_status()  # Check for API errors
+        # Print the result
+        result = api_response.json()
+        print(f"Task: {task}")
+        print(f"Image URL: {image_url}")
+        print(f"Predicted Actions: {result['actions']}")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching image or calling API: {e}")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+if __name__ == "__main__":
+    # Test with default values (same as main.py)
+    test_api()
+    # Test with a different URL and task (optional)
+    # Replace with another valid URL if desired
+    print("\nTesting with another URL and task:")
+    test_api(IMAGE_URL, TASK_TEXT)