| # Azure Machine Learning Deployment Guide | |
| This guide provides step-by-step instructions for deploying the Image Description application to Azure Machine Learning. | |
| ## Prerequisites | |
| - Azure subscription | |
| - Azure CLI installed and configured | |
| - Azure Machine Learning workspace | |
| - The source code from this repository | |
| ## Step 1: Set Up Azure Machine Learning | |
| 1. Create a Resource Group (if you don't have one): | |
| ```bash | |
| az group create --name image-descriptor-rg --location eastus | |
| ``` | |
| 2. Create an Azure Machine Learning workspace: | |
| ```bash | |
| az ml workspace create --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg \ | |
| --location eastus | |
| ``` | |
| ## Step 2: Create a Compute Cluster | |
| Create a GPU-enabled compute cluster for training and inference: | |
| ```bash | |
| az ml compute create --name gpu-cluster \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg \ | |
| --type AmlCompute \ | |
| --min-instances 0 \ | |
| --max-instances 1 \ | |
| --size Standard_NC6s_v3 | |
| ``` | |
| ## Step 3: Prepare Environment Configuration | |
| Create an environment.yml file to define dependencies: | |
| ```yaml | |
| name: image_descriptor_env | |
| channels: | |
| - pytorch | |
| - conda-forge | |
| - defaults | |
| dependencies: | |
| - python=3.9 | |
| - pip=23.0 | |
| - pytorch=2.0.0 | |
| - torchvision=0.15.0 | |
| - pip: | |
| - transformers>=4.36.0 | |
| - accelerate>=0.25.0 | |
| - bitsandbytes>=0.41.0 | |
| - safetensors>=0.4.0 | |
| - flask>=2.3.2 | |
| - flask-cors>=4.0.0 | |
| - gunicorn>=21.2.0 | |
| - pillow>=10.0.0 | |
| - matplotlib>=3.7.0 | |
| - python-dotenv>=1.0.0 | |
| - azureml-core>=1.48.0 | |
| - azureml-defaults>=1.48.0 | |
| - inference-schema>=1.4.1 | |
| ``` | |
| ## Step 4: Create a Model Entry Script | |
| Create a file called `score.py` to handle Azure ML model inference: | |
| ```python | |
| import json | |
| import os | |
| import io | |
| import base64 | |
| import logging | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Global variables | |
| model = None | |
| processor = None | |
| tokenizer = None | |
| def init(): | |
| """Initialize the model when the service starts""" | |
| global model, processor, tokenizer | |
| logger.info("Loading model...") | |
| model_id = "Qwen/Qwen2-VL-7B" | |
| # Load model components with quantization for efficiency | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # Load model with 4-bit quantization to reduce memory requirements | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| load_in_4bit=True, | |
| device_map="auto" | |
| ) | |
| logger.info("Model loaded successfully") | |
| def run(raw_data): | |
| """Process an image and generate descriptions | |
| Args: | |
| raw_data: A JSON string containing the image as base64 encoded data | |
| Returns: | |
| A JSON string containing the descriptions | |
| """ | |
| global model, processor, tokenizer | |
| try: | |
| # Parse input | |
| data = json.loads(raw_data) | |
| # Get the image data (from base64 or URL) | |
| if 'image_data' in data: | |
| image_bytes = base64.b64decode(data['image_data']) | |
| image = Image.open(io.BytesIO(image_bytes)).convert('RGB') | |
| logger.info("Loaded image from base64 data") | |
| elif 'image_url' in data: | |
| # Handle image URLs (for Azure Storage or public URLs) | |
| from urllib.request import urlopen | |
| with urlopen(data['image_url']) as response: | |
| image_bytes = response.read() | |
| image = Image.open(io.BytesIO(image_bytes)).convert('RGB') | |
| logger.info(f"Loaded image from URL: {data['image_url']}") | |
| else: | |
| return json.dumps({"error": "No image data or URL provided"}) | |
| # Process the image | |
| inputs = processor( | |
| images=image, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| # Basic description prompt | |
| prompt_basic = "Describe this image briefly." | |
| input_ids_basic = tokenizer(prompt_basic, return_tensors="pt").input_ids.to(model.device) | |
| # Detailed description prompt | |
| prompt_detailed = "Analyze this image in detail. Describe the main elements, any text visible, the colors, and the overall composition." | |
| input_ids_detailed = tokenizer(prompt_detailed, return_tensors="pt").input_ids.to(model.device) | |
| # Technical analysis prompt | |
| prompt_technical = "What can you tell me about the technical aspects of this image?" | |
| input_ids_technical = tokenizer(prompt_technical, return_tensors="pt").input_ids.to(model.device) | |
| # Generate outputs for each prompt | |
| # Basic description | |
| with torch.no_grad(): | |
| output_basic = model.generate( | |
| **inputs, | |
| input_ids=input_ids_basic, | |
| max_new_tokens=150, | |
| do_sample=False | |
| ) | |
| basic_description = tokenizer.decode(output_basic[0], skip_special_tokens=True).replace(prompt_basic, "").strip() | |
| # Detailed description | |
| with torch.no_grad(): | |
| output_detailed = model.generate( | |
| **inputs, | |
| input_ids=input_ids_detailed, | |
| max_new_tokens=300, | |
| do_sample=False | |
| ) | |
| detailed_description = tokenizer.decode(output_detailed[0], skip_special_tokens=True).replace(prompt_detailed, "").strip() | |
| # Technical analysis | |
| with torch.no_grad(): | |
| output_technical = model.generate( | |
| **inputs, | |
| input_ids=input_ids_technical, | |
| max_new_tokens=200, | |
| do_sample=False | |
| ) | |
| technical_analysis = tokenizer.decode(output_technical[0], skip_special_tokens=True).replace(prompt_technical, "").strip() | |
| # Return the results | |
| return json.dumps({ | |
| "success": True, | |
| "basic_description": basic_description, | |
| "detailed_description": detailed_description, | |
| "technical_analysis": technical_analysis | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error processing image: {str(e)}", exc_info=True) | |
| return json.dumps({"error": f"Error generating description: {str(e)}"}) | |
| ``` | |
| ## Step 5: Register the Model | |
| 1. Create a model.yml file: | |
| ```yaml | |
| $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json | |
| name: qwen-vl-image-descriptor | |
| version: 1 | |
| description: Qwen2-VL-7B model for image description | |
| path: . | |
| ``` | |
| 2. Register the model: | |
| ```bash | |
| az ml model create --file model.yml \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg | |
| ``` | |
| ## Step 6: Deploy as an Online Endpoint | |
| 1. Create an endpoint.yml file: | |
| ```yaml | |
| $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json | |
| name: image-descriptor-endpoint | |
| description: Endpoint for image description | |
| auth_mode: key | |
| ``` | |
| 2. Create a deployment.yml file: | |
| ```yaml | |
| $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | |
| name: qwen-vl-deployment | |
| endpoint_name: image-descriptor-endpoint | |
| model: azureml:qwen-vl-image-descriptor:1 | |
| environment: | |
| conda_file: environment.yml | |
| image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:latest | |
| instance_type: Standard_NC6s_v3 | |
| instance_count: 1 | |
| request_settings: | |
| max_concurrent_requests_per_instance: 1 | |
| request_timeout_ms: 120000 | |
| ``` | |
| 3. Create the endpoint: | |
| ```bash | |
| az ml online-endpoint create --file endpoint.yml \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg | |
| ``` | |
| 4. Create the deployment: | |
| ```bash | |
| az ml online-deployment create --file deployment.yml \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg | |
| ``` | |
| 5. Allocate 100% traffic to the deployment: | |
| ```bash | |
| az ml online-endpoint update --name image-descriptor-endpoint \ | |
| --traffic "qwen-vl-deployment=100" \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg | |
| ``` | |
| ## Step 7: Test the Endpoint | |
| You can test the endpoint using the Azure ML SDK: | |
| ```python | |
| import json | |
| import base64 | |
| from azure.ai.ml import MLClient | |
| from azure.identity import DefaultAzureCredential | |
| from azure.ai.ml.entities import ManagedOnlineEndpoint | |
| # Get a handle to the workspace | |
| credential = DefaultAzureCredential() | |
| ml_client = MLClient( | |
| credential=credential, | |
| subscription_id="your-subscription-id", | |
| resource_group_name="image-descriptor-rg", | |
| workspace_name="image-descriptor-ws" | |
| ) | |
| # Get endpoint | |
| endpoint = ml_client.online_endpoints.get("image-descriptor-endpoint") | |
| # Load and encode the image | |
| with open('data_temp/page_2.png', 'rb') as f: | |
| image_data = f.read() | |
| image_b64 = base64.b64encode(image_data).decode('utf-8') | |
| # Create the request payload | |
| payload = { | |
| 'image_data': image_b64 | |
| } | |
| # Invoke the endpoint | |
| response = ml_client.online_endpoints.invoke( | |
| endpoint_name="image-descriptor-endpoint", | |
| request_file=json.dumps(payload), | |
| deployment_name="qwen-vl-deployment" | |
| ) | |
| # Parse the response | |
| result = json.loads(response) | |
| print(json.dumps(result, indent=2)) | |
| ``` | |
| ## Cost Optimization | |
| To optimize costs: | |
| 1. Use a smaller compute size if possible | |
| 2. Scale to zero instances when not in use | |
| 3. Set up autoscaling rules | |
| 4. Consider reserved instances for long-term deployments | |
| ## Monitoring | |
| Monitor your endpoint using: | |
| 1. Azure Monitor | |
| 2. Application Insights | |
| 3. Azure ML metrics dashboard | |
| 4. Set up alerts for anomalies | |
| ## Cleanup | |
| To avoid ongoing charges, delete resources when not in use: | |
| ```bash | |
| # Delete the endpoint | |
| az ml online-endpoint delete --name image-descriptor-endpoint \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg -y | |
| # Delete compute cluster | |
| az ml compute delete --name gpu-cluster \ | |
| --workspace-name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg -y | |
| # Delete workspace (optional) | |
| az ml workspace delete --name image-descriptor-ws \ | |
| --resource-group image-descriptor-rg -y | |
| # Delete resource group (optional) | |
| az group delete --name image-descriptor-rg -y | |
| ``` |