Spaces:

vivjay30
/

cdim

Running on Zero

App Files Files Community

vivjay30 commited on 16 days ago

Commit

89d5874

1 Parent(s): e1abd0f

Chi squared method

Browse files

Files changed (11) hide show

README.md +66 -11
app.py +15 -15
cdim/diffusion/diffusion_pipeline.py +94 -40
cdim/eta_scheduler.py +0 -61
cdim/eta_utils.py +369 -0
cdim/image_utils.py +208 -0
cdim/operators/gaussian_blur_operator.py +1 -1
cdim/operators/random_box_masker.py +28 -0
cdim/operators/random_pixel_masker.py +29 -0
inference.py +64 -29
requirements.txt +7 -10

README.md CHANGED Viewed

@@ -1,11 +1,66 @@
----
-title: CDIM
-emoji: 😃
-colorFrom: purple
-colorTo: blue
-sdk: gradio
-sdk_version: 5.1.0
-app_file: app.py
-pinned: true
-arxiv: https://arxiv.org/abs/2411.00359
----

+# Linearly Constrained Diffusion Implicit Models
+![alt text](Teaser.jpg)
+### Authors
+[Vivek Jayaram](http://www.vivekjayaram.com/), [John Thickstun](https://johnthickstun.com/), [Ira Kemelmacher-Shlizerman](https://homes.cs.washington.edu/~kemelmi/), and [Steve Seitz](https://homes.cs.washington.edu/~seitz/)
+### Links
+[[Gradio Demo]](https://huggingface.co/spaces/vivjay30/cdim) [[Project Page]](https://grail.cs.washington.edu/projects/cdim/) [[Paper]](https://arxiv.org/abs/2411.00359)
+### Summary
+We solve noisy linear inverse problems with diffusion models. The method is fast and addresses many problems like inpainting, super-resolution, gaussian deblur, and poisson noise.
+## Getting started
+Recommended environment: Python 3.11, Cuda 12, Conda. For lower verions please adjust the dependencies below.
+### 1) Clone the repository
+```
+git clone https://github.com/vivjay30/cdim
+cd cdim
+```
+### 2) Install dependencies
+```
+conda create -n cdim python=3.11
+conda activate cdim
+pip install -r requirements.txt
+pip install torch==2.4.1+cu124 torchvision-0.19.1+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
+```
+## Inference Examples
+(The underlying diffusion models will be automatically downloaded on the first run).
+#### CelebHQ Inpainting Example (T'=25 Denoising Steps)
+`python inference.py sample_images/celebhq/00001.jpg 25 operator_configs/box_inpainting_config.yaml noise_configs/gaussian_noise_config.yaml google/ddpm-celebahq-256`
+#### LSUN Churches Gaussian Deblur Example (T'=25 Denoising Steps)
+`python inference.py sample_images/lsun_church.png 25 operator_configs/gaussian_blur_config.yaml noise_configs/gaussian_noise_config.yaml google/ddpm-church-256`
+## FFHQ and Imagenet Models
+These models are generally not as strong as the google ddpm models, but are used for comparisons with baseline methods.
+From [this link](https://drive.google.com/drive/folders/1jElnRoFv7b31fG0v6pTSQkelbSX3xGZh?usp=sharing), download the checkpoints "ffhq_10m.pt" and "imagenet_256.pt" to models/
+#### Imagenet Super Resolution Example
+Here we set T'=50 to show the algorithm running slower
+`python inference.py sample_images/imagenet_val_00002.png 50 operator_configs/super_resolution_config.yaml noise_configs/gaussian_noise_config.yaml models/imagenet_model_config.yaml`
+#### FFHQ Random Inpainting (Faster)
+Here we set T'=10 to show the algorithm running faster
+`python inference.py sample_images/ffhq_00010.png 10 operator_configs/random_inpainting_config.yaml noise_configs/gaussian_noise_config.yaml models/ffhq_model_config.yaml`
+#### A Note on Exact Recovery
+If you set the measurement noise to 0 in gaussian_noise_config.yaml, then the recovered image should match the the observation y exactly (e.g. inpainting doesn't chance observed pixels). In practice, this doesn't happen because the diffusion schedule sets $\overline{\alpha}_0 = 0.999$ for numeric stability, meaning a tiny amount of noise is injected even at t=0.

app.py CHANGED Viewed

@@ -2,24 +2,21 @@ import gradio as gr
 import spaces
 import torch
 import yaml
-import os
 import numpy as np
 from PIL import Image
 from cdim.noise import get_noise
 from cdim.operators import get_operator
-from cdim.image_utils import save_to_image
-from cdim.dps_model.dps_unet import create_model
 from cdim.diffusion.scheduling_ddim import DDIMScheduler
 from cdim.diffusion.diffusion_pipeline import run_diffusion
-from cdim.eta_scheduler import EtaScheduler
 from diffusers import DiffusionPipeline
-# Global variables moved inside GPU-decorated functions
 model = None
 ddim_scheduler = None
 model_type = None
 curr_model_name = None
 def load_image(image_path):
     """Process input image to tensor format."""
     image = Image.open(image_path)
@@ -27,22 +24,26 @@ def load_image(image_path):
     original_image = torch.from_numpy(original_image).unsqueeze(0).permute(0, 3, 1, 2)
     return (original_image / 127.5 - 1.0).to(torch.float)[:, :3]
 def load_yaml(file_path: str) -> dict:
     with open(file_path) as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     return config
 def convert_to_np(torch_image):
     return ((torch_image.detach().clamp(-1, 1).cpu().numpy().transpose(1, 2, 0) + 1) * 127.5).astype(np.uint8)
 @spaces.GPU
-def process_image(image_choice, noise_sigma, operator_key, T, K):
-    """Combined function to handle both generation and restoration"""
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # Initialize model inside GPU-decorated function
     global model, curr_model_name, ddim_scheduler, model_type
-    model_name = "google/ddpm-celebahq-256" if "Celeb" in image_choice else "google/ddpm-church-256"
     if model is None or curr_model_name != model_name:
         model_type = "diffusers"
@@ -85,22 +86,22 @@ def process_image(image_choice, noise_sigma, operator_key, T, K):
     noisy_image = Image.fromarray(convert_to_np(noisy_measurement[0]))
     # Run restoration
-    eta_scheduler = EtaScheduler("gradnorm", operator.name, T, K, 'l2', noise_function, None)
     output_image = run_diffusion(
         model, ddim_scheduler, noisy_measurement, operator, noise_function, device,
-        eta_scheduler, num_inference_steps=T, K=K, model_type=model_type, loss_type='l2'
     )
     output_image = Image.fromarray(convert_to_np(output_image[0]))
     return noisy_image, output_image
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Noisy Image Restoration with Diffusion Models")
     with gr.Row():
-        T = gr.Slider(10, 200, value=25, step=1, label="Number of Inference Steps (T)")
-        K = gr.Slider(1, 10, value=2, step=1, label="K Value")
         noise_sigma = gr.Slider(0, 0.6, value=0.05, step=0.01, label="Noise Sigma")
     image_select = gr.Dropdown(
@@ -119,12 +120,11 @@ with gr.Blocks() as demo:
     noisy_image = gr.Image(label="Noisy Image")
     restored_image = gr.Image(label="Restored Image")
-    # Single function call instead of chaining
     run_button.click(
         fn=process_image,
-        inputs=[image_select, noise_sigma, operator_select, T, K],
         outputs=[noisy_image, restored_image]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import spaces
 import torch
 import yaml
 import numpy as np
 from PIL import Image
 from cdim.noise import get_noise
 from cdim.operators import get_operator
 from cdim.diffusion.scheduling_ddim import DDIMScheduler
 from cdim.diffusion.diffusion_pipeline import run_diffusion
 from diffusers import DiffusionPipeline
+# Global variables for model and scheduler (initialized inside GPU-decorated function)
 model = None
 ddim_scheduler = None
 model_type = None
 curr_model_name = None
 def load_image(image_path):
     """Process input image to tensor format."""
     image = Image.open(image_path)
     original_image = torch.from_numpy(original_image).unsqueeze(0).permute(0, 3, 1, 2)
     return (original_image / 127.5 - 1.0).to(torch.float)[:, :3]
 def load_yaml(file_path: str) -> dict:
+    """Load configurations from a YAML file."""
     with open(file_path) as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     return config
 def convert_to_np(torch_image):
     return ((torch_image.detach().clamp(-1, 1).cpu().numpy().transpose(1, 2, 0) + 1) * 127.5).astype(np.uint8)
 @spaces.GPU
+def process_image(image_choice, noise_sigma, operator_key, T, stopping_sigma):
+    """Combined function to handle both generation and restoration."""
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # Initialize model inside GPU-decorated function
     global model, curr_model_name, ddim_scheduler, model_type
+    model_name = "google/ddpm-celebahq-256" if "CelebA" in image_choice else "google/ddpm-church-256"
     if model is None or curr_model_name != model_name:
         model_type = "diffusers"
     noisy_image = Image.fromarray(convert_to_np(noisy_measurement[0]))
     # Run restoration
     output_image = run_diffusion(
         model, ddim_scheduler, noisy_measurement, operator, noise_function, device,
+        stopping_sigma, num_inference_steps=T, model_type=model_type
     )
     output_image = Image.fromarray(convert_to_np(output_image[0]))
     return noisy_image, output_image
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Noisy Image Restoration with Diffusion Models")
     with gr.Row():
+        T = gr.Slider(10, 200, value=50, step=1, label="Number of Inference Steps (T)")
+        stopping_sigma = gr.Slider(0.1, 5.0, value=0.1, step=0.1, label="Stopping Sigma (c)")
         noise_sigma = gr.Slider(0, 0.6, value=0.05, step=0.01, label="Noise Sigma")
     image_select = gr.Dropdown(
     noisy_image = gr.Image(label="Noisy Image")
     restored_image = gr.Image(label="Restored Image")
     run_button.click(
         fn=process_image,
+        inputs=[image_select, noise_sigma, operator_select, T, stopping_sigma],
         outputs=[noisy_image, restored_image]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

cdim/diffusion/diffusion_pipeline.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import torch
 from tqdm import tqdm
-from cdim.image_utils import randn_tensor
 from cdim.discrete_kl_loss import discrete_kl_loss
 def compute_kl_gaussian(residuals, sigma):
     # Only 0 centered for now
@@ -23,13 +25,13 @@ def run_diffusion(
         operator,
         noise_function,
         device,
-        eta_scheduler,
         num_inference_steps: int = 1000,
-        K=5,
         image_dim=256,
         image_channels=3,
         model_type="diffusers",
-        loss_type="l2"
     ):
     batch_size = noisy_observation.shape[0]
     image_shape = (batch_size, image_channels, image_dim, image_dim)
@@ -38,18 +40,60 @@ def run_diffusion(
     scheduler.set_timesteps(num_inference_steps, device=device)
     t_skip = scheduler.timesteps[0] - scheduler.timesteps[1]
     for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps), desc="Processing timesteps"):
          # 1. predict noise model_output
         model_output = model(image, t.unsqueeze(0).to(device))
         model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
         # 2. compute previous image: x_t -> x_t-1
         image = scheduler.step(model_output, t, image).prev_sample
         image.requires_grad_()
         alpha_prod_t_prev = scheduler.alphas_cumprod[t-t_skip] if t-t_skip >= 0 else 1
         beta_prod_t_prev = 1 - alpha_prod_t_prev
-        for j in range(K):
             if t <= 0: break
             with torch.enable_grad():
                 # Calculate x^hat_0
@@ -57,42 +101,52 @@ def run_diffusion(
                 model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
                 x_0 = (image - beta_prod_t_prev ** (0.5) * model_output) / alpha_prod_t_prev ** (0.5)
-                if loss_type == "l2" and noise_function.name == "gaussian":
-                    distance = operator(x_0) - noisy_observation
-                    if (distance ** 2).mean() < noise_function.sigma ** 2:
-                        break
-                    loss = ((distance) ** 2).mean()
-                    print(f"L2 loss {loss}")
-                    loss.backward()
-                elif loss_type == "kl" and noise_function.name == "gaussian":
-                    diff = (operator(x_0) - noisy_observation)  # Residuals
-                    kl_div = compute_kl_gaussian(diff, noise_function.sigma)
-                    kl_div.backward()
-                elif loss_type == "kl" and noise_function.name == "poisson":
-                    residuals = (operator(x_0) * noise_function.rate - noisy_observation * noise_function.rate) * 127.5  # Residuals
-                    x_0_pixel = operator((x_0 + 1) * 127.5)
-                    mask = x_0_pixel > 2 # Avoid numeric issues with pixel values near 0
-                    pearson = residuals[mask] / torch.sqrt(x_0_pixel[mask] * noise_function.rate)
-                    pearson_flat = pearson.view(-1)
-                    kl_div = compute_kl_gaussian(pearson_flat, 1.0)
-                    kl_div.backward()
-                elif loss_type == "categorical_kl" and noise_function.name == "bimodal":
-                    diff = (operator(x_0) - noisy_observation)
-                    indices = operator(torch.ones(image.shape).to(device))
-                    diff = diff[indices > 0]  # Don't consider masked out pixels in the distribution
-                    empirical_distribution = noise_function.sample_noise_distribution(image).to(device).view(-1)
-                    loss = discrete_kl_loss(diff, empirical_distribution, num_bins=15)
-                    print(f"Categorical KL {loss}")
-                    loss.backward()
-                else:
-                    raise ValueError(f"Unsupported combination: loss {loss_type} noise {noise_function.name}")
-            step_size = eta_scheduler.get_step_size(str(t.item()), torch.linalg.norm(image.grad))
             image -= step_size * image.grad
             image = image.detach().requires_grad_()
     return image

 import torch
 from tqdm import tqdm
+from cdim.image_utils import randn_tensor, trace_AAt, estimate_variance, save_to_image, compute_operator_distance
 from cdim.discrete_kl_loss import discrete_kl_loss
+from cdim.eta_utils import calculate_best_step_size, initial_guess_step_size
 def compute_kl_gaussian(residuals, sigma):
     # Only 0 centered for now
         operator,
         noise_function,
         device,
+        stopping_sigma,
         num_inference_steps: int = 1000,
+        K=20,
         image_dim=256,
         image_channels=3,
         model_type="diffusers",
+        original_image=None
     ):
     batch_size = noisy_observation.shape[0]
     image_shape = (batch_size, image_channels, image_dim, image_dim)
     scheduler.set_timesteps(num_inference_steps, device=device)
     t_skip = scheduler.timesteps[0] - scheduler.timesteps[1]
+    data = []
+    TOTAL_UPDATE_STEPS = 0
+    trace = trace_AAt(operator)
     for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps), desc="Processing timesteps"):
+        # Using GT image noised up if you want to debug anything
+        # image = original_image * scheduler.alphas_cumprod[t] ** 0.5 + torch.randn_like(original_image) * (1 - scheduler.alphas_cumprod[t]) ** 0.5
          # 1. predict noise model_output
         model_output = model(image, t.unsqueeze(0).to(device))
         model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
+        # Save image for debugging
+        # save_to_image(image, f"intermediates/{t}_xt.png")
         # 2. compute previous image: x_t -> x_t-1
         image = scheduler.step(model_output, t, image).prev_sample
         image.requires_grad_()
         alpha_prod_t_prev = scheduler.alphas_cumprod[t-t_skip] if t-t_skip >= 0 else 1
         beta_prod_t_prev = 1 - alpha_prod_t_prev
+        k = 0
+        while k < K:
             if t <= 0: break
+            a = scheduler.alphas_cumprod[t-t_skip]**0.5 - 1
+            # For inpainting, use the number of observed pixels
+            num_elements = operator.get_num_observed() if hasattr(operator, 'get_num_observed') else noisy_observation.numel()
+            # mu_{t-delta}(y) e.q. 14
+            target_distance = (a**2 * torch.linalg.norm(noisy_observation)**2 + (1 - scheduler.alphas_cumprod[t-t_skip]) * trace).item()
+            target_distance += num_elements * noise_function.sigma**2*(1-a**2)
+            # ||Ax_{t-delta} - y||^2
+            actual_distance = compute_operator_distance(operator, image, noisy_observation, squared=True).item()
+            # sigma^2_{t-delta}(y) e.q. 15
+            variance = estimate_variance(
+                operator,
+                noisy_observation,
+                scheduler.alphas_cumprod[t-t_skip],
+                image.shape,
+                trace=trace,
+                sigma_y=noise_function.sigma,
+                n_trace_samples=64,
+                n_y_samples=64,
+                device=image.device)
+            # c * sigma_{t-delta}(y)
+            threshold = stopping_sigma * variance**0.5
+            # print(f"Target Distance mean {target_distance} max {target_distance + threshold} actual distance {actual_distance}")
+            # R_{t-delta} is within rho_{t-delta} e.q. 16
+            if actual_distance <= target_distance + threshold:
+                break
             with torch.enable_grad():
                 # Calculate x^hat_0
                 model_output = model_output.sample if model_type == "diffusers" else model_output[:, :3]
                 x_0 = (image - beta_prod_t_prev ** (0.5) * model_output) / alpha_prod_t_prev ** (0.5)
+                # Save Tweedie's estimate for debugging
+                # save_to_image(x_0, f"intermediates/{t}_x0.png")
+                loss = compute_operator_distance(operator, x_0, noisy_observation, squared=True).mean()
+                # print(f"L2 loss {compute_operator_distance(operator, x_0, noisy_observation, squared=False)}")
+                data.append((t.item(), compute_operator_distance(operator, image, noisy_observation, squared=False).item()))
+                loss.backward()
+            initial_step_size = initial_guess_step_size(t.item(), torch.linalg.norm(image.grad)) # eta_scheduler.get_step_size(str(t.item()), torch.linalg.norm(image.grad))
+            with torch.no_grad():
+                # Set debug=True to see detailed step size search information
+                step_size = calculate_best_step_size(image, noisy_observation, operator, image.grad, target_distance, threshold, initial_step_size, debug=False)
+            # print(f"Step Size {step_size:.6e} initial guess {initial_step_size:.6e}")
             image -= step_size * image.grad
+            new_distance = compute_operator_distance(operator, image, noisy_observation, squared=True).item()
+            # print(f"New distance {new_distance}")
             image = image.detach().requires_grad_()
+            TOTAL_UPDATE_STEPS += 1
+            if step_size <= 1e-12: break
+            k += 1
+            # Check here because threshold is stochastic and can change from iteration to iteration
+            if new_distance <= target_distance + threshold:
+                break
+        # print("Step", t.item())
+        # Use num_elements for proper normalization with inpainting
+        num_elements = operator.get_num_observed() if hasattr(operator, 'get_num_observed') else noisy_observation.numel()
+        # print("Distance", 1 / num_elements * compute_operator_distance(operator, image, noisy_observation, squared=True).item())
+        # Print MAE if you want to track contraint error
+        if hasattr(operator, 'select'):
+            # Compute MAE over observed pixels only
+            Ax = operator.select(image).flatten()
+            y_selected = operator.select(noisy_observation).flatten()
+            # print("MAE", (torch.abs(Ax - y_selected).mean().item()))
+        else:
+            pass
+            # print("MAE", (torch.abs(operator(image) - noisy_observation).mean().item()))
+    print(f"Total Denoising {len(scheduler.timesteps)}")
+    print(f"Total Projection Steps {TOTAL_UPDATE_STEPS}")
+    print(f"Total NFEs {TOTAL_UPDATE_STEPS + len(scheduler.timesteps)}")
     return image

cdim/eta_scheduler.py DELETED Viewed

@@ -1,61 +0,0 @@
-import json
-class EtaScheduler:
-    def __init__(self, method, task, T, K, loss_type,
-                       noise_function, lambda_val=None):
-        self.task = task
-        self.T = T
-        self.K = K
-        self.loss_type = loss_type
-        self.lambda_val = lambda_val
-        self.method = method
-        self.precomputed_etas = self._load_precomputed_etas()
-        # Couldn't find expected gradnorm
-        if not self.precomputed_etas and method == "expected_gradnorm":
-            self.method = "gradnorm"
-            print("Etas for this configuration not found. Switching to gradnorm.")
-        # Precomputed gradients are only for gaussian noise
-        if noise_function.name != "gaussian" and method == "expected_gradnorm":
-            self.method = "gradnorm"
-            print("Precomputed gradients are only for gaussian noise. Switching to gradnorm.")
-        # Get the best lambda_val if it's not passed
-        if self.lambda_val is None:
-            if self.method == "expected_gradnorm":
-                self.lambda_val = self.precomputed_etas["lambda"]
-            else:
-                self.lambda_val = self.best_guess_lambda()
-            print(f"Using lambda {self.lambda_val}")
-    def _load_precomputed_etas(self):
-        steps_key = f"T{self.T}_K{self.K}"
-        with open("cdim/etas.json") as f:
-            all_etas = json.load(f)
-        return all_etas.get(self.task, {}).get(self.loss_type, {}).get(steps_key, {})
-    def get_step_size(self, t, grad_norm):
-        """Use either precomputed expected gradnorm or gradnorm."""
-        if self.method == "expected_gradnorm":
-            step_size = self.lambda_val * 1 / self.precomputed_etas["etas"][t]
-        else:
-            step_size = self.lambda_val * 1 / grad_norm
-        return step_size
-    def best_guess_lambda(self):
-        """Guess a lambda value if not provided. Based on trial and error"""
-        total_steps = self.T * self.K
-        # L2 tends to over optimize too aggressively, so the default lr is lower
-        if self.loss_type == "kl":
-            return 350 / total_steps
-        elif self.loss_type == "l2":
-            return 220 / total_steps
-        else:
-            raise ValueError(f"Please provide learning rate for loss type {self.loss_type}")

cdim/eta_utils.py ADDED Viewed

	@@ -0,0 +1,369 @@

+import json
+import torch
+class EtaScheduler:
+    def __init__(self, method, task, T, K, loss_type,
+                       noise_function, lambda_val=None):
+        self.task = task
+        self.T = T
+        self.K = K
+        self.loss_type = loss_type
+        self.lambda_val = lambda_val
+        self.method = method
+        self.precomputed_etas = self._load_precomputed_etas()
+        # Couldn't find expected gradnorm
+        if not self.precomputed_etas and method == "expected_gradnorm":
+            self.method = "gradnorm"
+            print("Etas for this configuration not found. Switching to gradnorm.")
+        # Precomputed gradients are only for gaussian noise
+        if noise_function.name != "gaussian" and method == "expected_gradnorm":
+            self.method = "gradnorm"
+            print("Precomputed gradients are only for gaussian noise. Switching to gradnorm.")
+        # Get the best lambda_val if it's not passed
+        if self.lambda_val is None:
+            if self.method == "expected_gradnorm":
+                self.lambda_val = self.precomputed_etas["lambda"]
+            else:
+                self.lambda_val = self.best_guess_lambda()
+            print(f"Using lambda {self.lambda_val}")
+    def _load_precomputed_etas(self):
+        steps_key = f"T{self.T}_K{self.K}"
+        with open("cdim/etas.json") as f:
+            all_etas = json.load(f)
+        return all_etas.get(self.task, {}).get(self.loss_type, {}).get(steps_key, {})
+    def get_step_size(self, t, grad_norm):
+        """Use either precomputed expected gradnorm or gradnorm."""
+        if self.method == "expected_gradnorm":
+            step_size = self.lambda_val * 1 / self.precomputed_etas["etas"][t]
+        else:
+            step_size = self.lambda_val * 1 / grad_norm
+        return step_size
+    def best_guess_lambda(self):
+        """Guess a lambda value if not provided. Based on trial and error"""
+        total_steps = self.T * self.K
+        # L2 tends to over optimize too aggressively, so the default lr is lower
+        if self.loss_type == "kl":
+            return 350 / total_steps
+        elif self.loss_type == "l2":
+            return 220 / total_steps
+        else:
+            raise ValueError(f"Please provide learning rate for loss type {self.loss_type}")
+def initial_guess_step_size(T, grad_norm):
+    best_guess_lambda = 220 / T
+    return best_guess_lambda / grad_norm
+# def calculate_best_step_size(image, y, operator, gradient, target_distance, initial_guess,
+#                              max_iters=20, tol=1e-4, bracket_factor=1.4):
+#     def compute_distance(eta):
+#         x_new = image - eta * gradient
+#         diff = operator(x_new) - y
+#         return torch.linalg.norm(diff)**2
+#     def objective(eta):
+#         return torch.abs(compute_distance(eta) - target_distance)
+#     # Try to bracket the root
+#     eta_low = initial_guess / bracket_factor
+#     eta_high = initial_guess * bracket_factor
+#     for _ in range(10):
+#         import pdb
+#         pdb.set_trace()
+#         dist_low = compute_distance(eta_low)
+#         dist_high = compute_distance(eta_high)
+#         if (dist_low - target_distance) * (dist_high - target_distance) < 0:
+#             break
+#         eta_low /= bracket_factor
+#         eta_high *= bracket_factor
+#     else:
+#         # Fallback: brute-force line search over eta to minimize distance
+#         best_eta = None
+#         best_val = float('inf')
+#         for eta in torch.linspace(0, initial_guess * 5, steps=100, device=image.device):
+#             val = objective(eta)
+#             # print(f"ETA {eta} distance {compute_distance(eta)}")
+#             if val < best_val:
+#                 best_val = val
+#                 best_eta = eta
+#         return best_eta.item()
+#     # Binary search
+#     for _ in range(max_iters):
+#         eta_mid = (eta_low + eta_high) / 2
+#         dist_mid = compute_distance(eta_mid)
+#         error = dist_mid - target_distance
+#         if abs(error) < tol:
+#             return eta_mid
+#         if (compute_distance(eta_low) - target_distance) * error < 0:
+#             eta_high = eta_mid
+#         else:
+#             eta_low = eta_mid
+#     return eta_mid
+import torch
+from cdim.image_utils import compute_operator_distance
+def calculate_best_step_size(
+    image: torch.Tensor,
+    y: torch.Tensor,
+    operator,
+    gradient: torch.Tensor,
+    target_distance: float,
+    threshold: float,
+    initial_guess: float,
+    *,
+    tol: float = 1e-4,
+    max_iters: int = 50,
+    debug: bool = False,
+):
+    """
+    Find the smallest η ≥ 0 that makes  ||A(x − η g) − y||²  ≈ target_distance + threshold.
+    Uses a robust grid search followed by golden section search for fine-grained optimization.
+    Note: For inpainting operators with 'select' method, distances are computed
+    over only the observed pixels.
+    Args:
+        debug: If True, prints detailed search information
+    """
+    target_boundary = target_distance + threshold
+    def distance(η: torch.Tensor) -> torch.Tensor:
+        return compute_operator_distance(operator, image - η * gradient, y, squared=True)
+    def error(η):
+        return distance(η) - target_boundary
+    # Phase 1: Coarse grid search to find promising regions
+    # Search from very small to larger step sizes
+    # Allow step sizes larger than 1 if needed
+    max_eta = initial_guess * 200.0 if initial_guess > 0 else 10.0
+    # Create a logarithmically-spaced grid for better coverage of small values
+    # This ensures we search finely near 0 and coarser at larger values
+    # Start from 1e-12 to handle cases with very large gradients
+    n_coarse = 100  # Increased for better resolution
+    eta_grid = torch.cat([
+        torch.tensor([0.0]),
+        torch.logspace(-12, torch.log10(torch.tensor(max_eta)), n_coarse - 1)
+    ]).to(image.device)
+    if debug:
+        print(f"[Step Size] Searching from {eta_grid[1]:.2e} to {eta_grid[-1]:.2e} ({len(eta_grid)} points)")
+        print(f"[Step Size] Sample grid points: {[f'{x:.2e}' for x in eta_grid[1:11].tolist()]}")
+    # Evaluate distances at all grid points
+    distances = torch.tensor([distance(eta).item() for eta in eta_grid])
+    errors = distances - target_boundary
+    dist_at_zero = distances[0].item()
+    # Strategy: Find the SMALLEST eta that gets us AT OR BELOW target_boundary
+    # Only consider non-zero etas
+    below_target_mask = distances[1:] <= target_boundary
+    if below_target_mask.any():
+        # Found etas that reach target - pick the SMALLEST one (most conservative)
+        below_indices = torch.where(below_target_mask)[0] + 1  # +1 because we excluded index 0
+        best_idx = below_indices[0].item()  # Smallest eta that reaches target
+        best_eta = eta_grid[best_idx].item()
+        best_distance = distances[best_idx].item()
+        if debug:
+            print(f"[Step Size] Coarse grid: found eta={best_eta:.2e} that reaches target")
+            print(f"[Step Size] Distance: {best_distance:.2f} (target: {target_boundary:.2f}, under by {target_boundary - best_distance:.2f})")
+    else:
+        # No eta reaches target - find the one that gets closest (minimize distance to target)
+        non_zero_distances = distances[1:]
+        closest_idx = torch.argmin(torch.abs(non_zero_distances - target_boundary)) + 1
+        best_idx = closest_idx
+        best_eta = eta_grid[best_idx].item()
+        best_distance = distances[best_idx].item()
+        if debug:
+            print(f"[Step Size] Coarse grid: cannot reach target, best eta={best_eta:.2e}")
+            print(f"[Step Size] Distance: {best_distance:.2f} (target: {target_boundary:.2f}, over by {best_distance - target_boundary:.2f})")
+    # Check if eta=0 is better (already at target)
+    if dist_at_zero <= target_boundary:
+        if debug:
+            print(f"[Step Size] Distance at eta=0: {dist_at_zero:.2f} - already at/below target")
+        return 0.0
+    if debug:
+        print(f"[Step Size] Distance at eta=0: {dist_at_zero:.2f} (need to step)")
+    # Phase 1.5: Fine search around the best point found
+    # If best_eta is not at the boundaries, do a fine search around it
+    if best_idx > 0 and best_idx < len(eta_grid) - 1:
+        eta_low_bound = eta_grid[best_idx - 1].item()
+        eta_high_bound = eta_grid[best_idx + 1].item()
+        # Create a very fine linear grid between the neighboring points
+        fine_grid = torch.linspace(eta_low_bound, eta_high_bound, 50).to(image.device)
+        fine_distances = torch.tensor([distance(eta).item() for eta in fine_grid])
+        # Find the SMALLEST eta in fine grid that gets us AT OR BELOW target
+        fine_below_mask = fine_distances <= target_boundary
+        if fine_below_mask.any():
+            # Found fine etas that reach target - pick the SMALLEST
+            fine_below_indices = torch.where(fine_below_mask)[0]
+            fine_best_idx = fine_below_indices[0].item()
+            fine_best_eta = fine_grid[fine_best_idx].item()
+            fine_best_distance = fine_distances[fine_best_idx].item()
+            # Only update if this is better (smaller eta that still reaches target, or gets closer)
+            if fine_best_distance <= target_boundary and (best_distance > target_boundary or fine_best_eta < best_eta):
+                best_eta = fine_best_eta
+                best_distance = fine_best_distance
+                best_idx = len(eta_grid) + fine_best_idx
+                if debug:
+                    print(f"[Step Size] Fine grid: improved to eta={best_eta:.2e}, distance={best_distance:.2f} (under by {target_boundary - best_distance:.2f})")
+        else:
+            # No fine eta reaches target - find closest
+            fine_best_idx = torch.argmin(torch.abs(fine_distances - target_boundary))
+            fine_best_eta = fine_grid[fine_best_idx].item()
+            fine_best_distance = fine_distances[fine_best_idx].item()
+            # Only update if closer to target than current best
+            if abs(fine_best_distance - target_boundary) < abs(best_distance - target_boundary):
+                best_eta = fine_best_eta
+                best_distance = fine_best_distance
+                best_idx = len(eta_grid) + fine_best_idx
+                if debug:
+                    print(f"[Step Size] Fine grid: improved to eta={best_eta:.2e}, distance={best_distance:.2f} (over by {best_distance - target_boundary:.2f})")
+        # Always update the grid for potential bracketing
+        distances = torch.cat([distances, fine_distances])
+        errors = torch.cat([errors, fine_distances - target_boundary])
+        eta_grid = torch.cat([eta_grid, fine_grid])
+    # If best_eta is 0 and we're already at or below target, return 0
+    if best_eta == 0.0 and dist_at_zero <= target_boundary:
+        if debug:
+            print(f"[Step Size] Already at target, no step needed")
+        return 0.0
+    # If we've reached target, we can return (no need for golden section)
+    if best_distance <= target_boundary:
+        if debug:
+            print(f"[Step Size] Reached target with eta={best_eta:.2e}, returning")
+        return best_eta
+    # Phase 2: Check for bracketing around the best point
+    # Look for a sign change (crossing the target boundary)
+    bracket_found = False
+    eta_lo, eta_hi = None, None
+    # Check neighbors of best point
+    for i in range(len(eta_grid) - 1):
+        if errors[i] * errors[i + 1] < 0:  # Sign change
+            eta_lo, eta_hi = eta_grid[i].item(), eta_grid[i + 1].item()
+            bracket_found = True
+            break
+    # Phase 3: Refine using golden section search
+    # Only refine if we haven't reached target yet and have a valid bracket
+    if best_eta > 0 and best_distance > target_boundary and best_idx > 0 and best_idx < len(eta_grid) - 1:
+        # Golden section search to find the smallest eta that reaches target_boundary
+        phi = (1 + 5**0.5) / 2  # Golden ratio
+        resphi = 2 - phi
+        # Search in a small window around best_eta
+        a = eta_grid[max(0, best_idx - 1)].item()
+        b = eta_grid[min(len(eta_grid) - 1, best_idx + 1)].item()
+        # Make sure we have a valid interval
+        if b - a < 1e-20:
+            if debug:
+                print(f"[Step Size] Interval too small for refinement, returning eta={best_eta:.2e}")
+            return best_eta
+        dist_a = distance(torch.tensor(a)).item()
+        dist_b = distance(torch.tensor(b)).item()
+        for _ in range(max_iters):
+            if abs(b - a) < 1e-20:  # Extremely tight tolerance
+                break
+            # Golden section points
+            x1 = a + resphi * (b - a)
+            x2 = b - resphi * (b - a)
+            dist_x1 = distance(torch.tensor(x1)).item()
+            dist_x2 = distance(torch.tensor(x2)).item()
+            # Priority: prefer points that reach target (dist <= target_boundary)
+            # Among those, prefer smaller eta
+            # If neither reaches, prefer closer to target
+            x1_reaches = dist_x1 <= target_boundary
+            x2_reaches = dist_x2 <= target_boundary
+            if x1_reaches and not x2_reaches:
+                # x1 reaches target, x2 doesn't -> prefer x1's half
+                b = x2
+                dist_b = dist_x2
+                if x1 < best_eta or not (best_distance <= target_boundary):
+                    best_eta = x1
+                    best_distance = dist_x1
+            elif x2_reaches and not x1_reaches:
+                # x2 reaches target, x1 doesn't -> prefer x2's half
+                a = x1
+                dist_a = dist_x1
+                if x2 < best_eta or not (best_distance <= target_boundary):
+                    best_eta = x2
+                    best_distance = dist_x2
+            elif x1_reaches and x2_reaches:
+                # Both reach target -> prefer smaller eta (which is x1)
+                b = x2
+                dist_b = dist_x2
+                best_eta = x1
+                best_distance = dist_x1
+            else:
+                # Neither reaches target -> prefer closer to target
+                if abs(dist_x1 - target_boundary) < abs(dist_x2 - target_boundary):
+                    b = x2
+                    dist_b = dist_x2
+                    if abs(dist_x1 - target_boundary) < abs(best_distance - target_boundary):
+                        best_eta = x1
+                        best_distance = dist_x1
+                else:
+                    a = x1
+                    dist_a = dist_x1
+                    if abs(dist_x2 - target_boundary) < abs(best_distance - target_boundary):
+                        best_eta = x2
+                        best_distance = dist_x2
+        if debug:
+            if best_distance <= target_boundary:
+                print(f"[Step Size] Final: eta={best_eta:.2e}, distance={best_distance:.2f} (under by {target_boundary - best_distance:.2f})")
+            else:
+                print(f"[Step Size] Final: eta={best_eta:.2e}, distance={best_distance:.2f} (over by {best_distance - target_boundary:.2f})")
+    else:
+        if debug:
+            print(f"[Step Size] No refinement needed, returning best: eta={best_eta:.2e}")
+    return best_eta

cdim/image_utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from typing import List, Optional, Tuple, Union
 import torch
 from torchvision.transforms import ToPILImage
 def save_to_image(tensor, filename):
     """
@@ -66,3 +68,209 @@ def randn_tensor(
         latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
     return latents

 from typing import List, Optional, Tuple, Union
 import torch
+from torch import Tensor
 from torchvision.transforms import ToPILImage
+from typing import Callable
 def save_to_image(tensor, filename):
     """
         latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
     return latents
+@torch.no_grad()
+def estimate_variance(
+    operator: Callable[[Tensor], Tensor],
+    y: Tensor,                    # Ax_0 + noise  (shape (m,))
+    alphabar_t: float,
+    in_shape: tuple[int, ...],    # e.g. (1, 3, 256, 256)
+    trace: float,                 # tr(AA^T)
+    sigma_y: float,
+    n_trace_samples: int = 64,
+    n_y_samples: int = 64,
+    device: torch.device | str = "cuda",
+    dtype: torch.dtype = torch.float32,
+) -> float:
+    """
+    Monte-Carlo estimator of  Var(||A x_t – y||^2)  without access to A^T.
+    Note: For inpainting operators with a 'select' method, this computes variance
+    over only the observed pixels.
+    """
+    use_select = hasattr(operator, 'select')
+    # For inpainting, select only observed pixels from y
+    if use_select:
+        y_selected = operator.select(y).flatten()
+        y = y_selected.to(device=device, dtype=dtype)
+    else:
+        y = y.to(device=device, dtype=dtype).flatten()
+    m = y.numel()
+    # ----------------  tr((AA^T)^2)
+    t1_acc = torch.zeros((), device=device, dtype=dtype)
+    for _ in range(n_trace_samples):
+        v = torch.randn(in_shape, device=device, dtype=dtype)
+        w = torch.randn(in_shape, device=device, dtype=dtype)
+        if use_select:
+            Av = operator.select(v).flatten()
+            Aw = operator.select(w).flatten()
+        else:
+            Av = operator(v).flatten()
+            Aw = operator(w).flatten()
+        s = torch.dot(Av, Aw)
+        t1_acc += s * s
+    T1 = t1_acc / n_trace_samples
+    # ----------------  y^T AA^T y
+    t2_acc = torch.zeros((), device=device, dtype=dtype)
+    for _ in range(n_y_samples):
+        v = torch.randn(in_shape, device=device, dtype=dtype)
+        if use_select:
+            Av = operator.select(v).flatten()
+        else:
+            Av = operator(v).flatten()
+        s = torch.dot(y, Av)
+        t2_acc += s * s
+    T2 = t2_acc / n_y_samples
+    # ----------------  assemble variance
+    alpha_bar = torch.as_tensor(alphabar_t, dtype=dtype, device=device)
+    sigma2     = torch.as_tensor(sigma_y**2,  dtype=dtype, device=device)
+    a2 = (torch.sqrt(alpha_bar) - 1.0).pow(2)  # (1-√ᾱ)^2
+    b  = 1.0 - alpha_bar                       # (1-ᾱ)
+    var = 2 * (b*b * T1 + 2 * b * sigma2 * trace + m * sigma2.pow(2)) \
+        + 4 * a2 * (b * (T2 - sigma2 * trace) + sigma2 * (y.pow(2).sum() - m * sigma2))
+    return var.item()
+def compute_operator_distance(
+    operator: Callable[[Tensor], Tensor],
+    x: Tensor,
+    y: Tensor,
+    squared: bool = True
+) -> Tensor:
+    """
+    Compute ||Ax - y||^2 (or ||Ax - y|| if squared=False).
+    For inpainting operators with a 'select' method, this computes the distance
+    over only the observed pixels. Otherwise uses the standard operator call.
+    Args:
+        operator: The forward operator A
+        x: Input tensor (e.g., image)
+        y: Measurement tensor (for inpainting, this should be the full masked measurement)
+        squared: If True, returns squared L2 norm. If False, returns L2 norm.
+    Returns:
+        Scalar tensor representing the distance
+    """
+    if hasattr(operator, 'select'):
+        # Use select method for inpainting operators
+        # Both x and y need to be selected to extract only observed pixels
+        Ax = operator.select(x).flatten()
+        y_selected = operator.select(y).flatten()
+    else:
+        # Standard operator application
+        Ax = operator(x).flatten()
+        y_selected = y.flatten()
+    diff = Ax - y_selected
+    if squared:
+        return (diff ** 2).sum()
+    else:
+        return torch.sqrt((diff ** 2).sum())
+def trace_AAt(
+    operator: Callable[[torch.Tensor], torch.Tensor],
+    input_shape = (1, 3, 256, 256),
+    num_samples: int = 256,
+    device: str = "cuda"            # or "cpu"
+) -> float:
+    """
+    Unbiased Monte-Carlo estimate of tr(A Aᵀ) for a black-box linear operator.
+    operator      : function that maps a (1,C,H,W) tensor → down-sampled tensor
+    input_shape   : shape expected by the operator
+    num_samples   : more samples → lower variance (error ≈ O(1/√num_samples))
+    Note: For inpainting operators with a 'select' method, this computes the trace
+    over only the observed pixels, not the full tensor with zeros.
+    """
+    total = 0.0
+    use_select = hasattr(operator, 'select')
+    for _ in range(num_samples):
+        # Rademacher noise (±1).  Use torch.randn for Gaussian instead.
+        z = torch.empty(input_shape, device=device).bernoulli_().mul_(2).sub_(1)
+        if use_select:
+            Az = operator.select(z).flatten()   # only observed pixels
+        else:
+            Az = operator(z).flatten()          # output can have any shape
+        total += torch.dot(Az, Az).item()       # ||Az||²
+    return total / num_samples
+# def trace_AAt_squared(
+#     operator: Callable[[torch.Tensor], torch.Tensor],
+#     input_shape: tuple = (1, 3, 256, 256),
+#     num_samples: int = 32,
+#     device: str = "cuda") -> float:
+#     """
+#     Estimates tr((A Aᵀ)^2) using Hutchinson's method and autograd for Aᵀ.
+#     """
+#     total = 0.0
+#     for _ in range(num_samples):
+#         # Sample z ~ N(0, I) (same shape as operator's *output*)
+#         z = torch.randn(operator(torch.zeros(input_shape, device=device)).shape, device=device)
+#         # Compute Aᵀz via gradient: ∇_w [⟨operator(w), z⟩] = Aᵀz
+#         w = torch.randn(input_shape, device=device, requires_grad=True)
+#         Az = operator(w).flatten()
+#         loss = torch.dot(Az, z.flatten())  # ⟨Az, z⟩ = ⟨w, Aᵀz⟩
+#         A_adj_z = torch.autograd.grad(loss, w, retain_graph=False)[0]
+#         # Compute AAᵀz = operator(Aᵀz)
+#         AA_adj_z = operator(A_adj_z.detach()).flatten()
+#         total += torch.dot(AA_adj_z, AA_adj_z).item()  # ||AAᵀz||²
+#     return total / num_samples
+# def compute_yAAy(
+#     operator: Callable[[torch.Tensor], torch.Tensor],
+#     y: torch.Tensor,
+#     input_shape: tuple = (1, 3, 256, 256),
+#     device: str = "cuda") -> float:
+#     """
+#     Computes yᵀ (A Aᵀ) y using autograd to get Aᵀy.
+#     """
+#     # Compute Aᵀy via gradient: ∇_w [⟨operator(w), y⟩] = Aᵀy
+#     w = torch.randn(input_shape, device=device, requires_grad=True)
+#     Az = operator(w).flatten()
+#     loss = torch.dot(Az, y.flatten())
+#     A_adj_y = torch.autograd.grad(loss, w, retain_graph=False)[0]
+#     # Compute A Aᵀ y = operator(Aᵀy)
+#     AA_adj_y = operator(A_adj_y.detach()).flatten()
+#     return torch.dot(AA_adj_y, y.flatten()).item()
+# def variance_Axt_minus_y_sq(
+#     operator: Callable[[torch.Tensor], torch.Tensor],
+#     y: torch.Tensor,
+#     alphabar_t: float,
+#     input_shape: tuple = (1, 3, 256, 256),
+#     num_samples_trace: int = 32,
+#     device: str = "cuda"
+# ) -> float:
+#     """
+#     Computes Var(||A𝐱ₜ - y||²) = 2(1-ᾱₜ)² tr((AAᵀ)²) + 4(1-ᾱₜ)(√ᾱₜ -1)² yᵀAAᵀy.
+#     """
+#     # Term 1: 2(1-ᾱₜ)^2 * tr((AAᵀ)^2)
+#     tr_AAt_sq = trace_AAt_squared(operator, input_shape, num_samples_trace, device)
+#     term1 = 2 * (1 - alphabar_t)**2 * tr_AAt_sq
+#     # Term 2: 4(1-ᾱₜ)(√ᾱₜ -1)^2 * yᵀAAᵀy
+#     yAAy = compute_yAAy(operator, y, input_shape, device)
+#     term2 = 4 * (1 - alphabar_t) * (torch.sqrt(torch.tensor(alphabar_t)) - 1)**2 * yAAy
+#     return term1 + term2

cdim/operators/gaussian_blur_operator.py CHANGED Viewed

@@ -4,7 +4,7 @@ from cdim.operators.blur_kernel import BlurKernel
 @register_operator(name='gaussian_blur')
 class GaussianBlurOperator:
-    def __init__(self, kernel_size, intensity, device='cpu'):
         self.device = device
         self.kernel_size = kernel_size
         self.conv = BlurKernel(blur_type='gaussian',

 @register_operator(name='gaussian_blur')
 class GaussianBlurOperator:
+    def __init__(self, kernel_size, intensity, device='cuda'):
         self.device = device
         self.kernel_size = kernel_size
         self.conv = BlurKernel(blur_type='gaussian',

cdim/operators/random_box_masker.py CHANGED Viewed

@@ -54,3 +54,31 @@ class RandomBoxMasker:
         # Apply the mask to the input tensor
         return tensor * self.mask

         # Apply the mask to the input tensor
         return tensor * self.mask
+    def select(self, tensor):
+        """
+        Extract only the observed pixels from the tensor (pixels outside the box).
+        Args:
+        tensor (torch.Tensor): Input tensor of shape (b, channels, height, width)
+        Returns:
+        torch.Tensor: Flattened tensor containing only observed pixels (b, num_observed)
+        """
+        b, c, h, w = tensor.shape
+        assert c == self.channels and h == self.height and w == self.width, \
+            f"Input tensor must be of shape (b, {self.channels}, {self.height}, {self.width})"
+        # Move the mask to the same device as the input tensor if necessary
+        if tensor.device != self.mask.device:
+            self.mask = self.mask.to(tensor.device)
+        # Extract only observed pixels (where mask is 1, outside the box)
+        observed = (tensor * self.mask).flatten(1)  # (b, c*h*w)
+        # Keep only non-zero elements
+        mask_flat = self.mask.flatten(1)  # (1, c*h*w)
+        return observed[:, mask_flat[0] > 0]  # (b, num_observed)
+    def get_num_observed(self):
+        """Return the number of observed elements."""
+        return int(self.mask.sum().item())

cdim/operators/random_pixel_masker.py CHANGED Viewed

@@ -56,3 +56,32 @@ class RandomPixelMasker:
         # Apply the mask to the input tensor
         return tensor * self.mask

         # Apply the mask to the input tensor
         return tensor * self.mask
+    def select(self, tensor):
+        """
+        Extract only the observed pixels from the tensor.
+        Args:
+        tensor (torch.Tensor): Input tensor of shape (b, channels, height, width)
+        Returns:
+        torch.Tensor: Flattened tensor containing only observed pixels (b, num_observed)
+        """
+        b, c, h, w = tensor.shape
+        assert c == self.channels and h == self.height and w == self.width, \
+            f"Input tensor must be of shape (b, {self.channels}, {self.height}, {self.width})"
+        # Move the mask to the same device as the input tensor if necessary
+        if tensor.device != self.mask.device:
+            self.mask = self.mask.to(tensor.device)
+        # Extract only observed pixels (where mask is 1)
+        # mask is (1, c, h, w), we want to select pixels across all channels
+        observed = (tensor * self.mask).flatten(1)  # (b, c*h*w)
+        # Keep only non-zero elements
+        mask_flat = self.mask.flatten(1)  # (1, c*h*w)
+        return observed[:, mask_flat[0] > 0]  # (b, num_observed)
+    def get_num_observed(self):
+        """Return the number of observed elements."""
+        return int(self.mask.sum().item())

inference.py CHANGED Viewed

@@ -2,6 +2,7 @@ import argparse
 import os
 import yaml
 import time
 from PIL import Image
 import numpy as np
@@ -15,7 +16,6 @@ from cdim.image_utils import save_to_image
 from cdim.dps_model.dps_unet import create_model
 from cdim.diffusion.scheduling_ddim import DDIMScheduler
 from cdim.diffusion.diffusion_pipeline import run_diffusion
-from cdim.eta_scheduler import EtaScheduler
 def load_image(path):
@@ -36,13 +36,39 @@ def load_yaml(file_path: str) -> dict:
     return config
 def main(args):
     device_str = f"cuda" if args.cuda and torch.cuda.is_available() else 'cpu'
     print(f"Using device {device_str}")
     device = torch.device(device_str)
     os.makedirs(args.output_dir, exist_ok=True)
-    original_image = load_image(args.input_image).to(device)
     # Load the noise function
     noise_config = load_yaml(args.noise_config)
@@ -78,43 +104,52 @@ def main(args):
         steps_offset=0,
     )
-    noisy_measurement = noise_function(operator(original_image))
-    save_to_image(noisy_measurement, os.path.join(args.output_dir, "noisy_measurement.png"))
-    eta_scheduler = EtaScheduler(args.eta_type, operator.name, args.T,
-        args.K, args.loss, noise_function, args.lambda_val)
-    t0 = time.time()
-    output_image = run_diffusion(
-        model, ddim_scheduler,
-        noisy_measurement, operator, noise_function, device,
-        eta_scheduler,
-        num_inference_steps=args.T,
-        K=args.K,
-        model_type=model_type,
-        loss_type=args.loss)
-    print(f"total time {time.time() - t0}")
-    save_to_image(output_image, os.path.join(args.output_dir, "output.png"))
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("input_image", type=str)
     parser.add_argument("T", type=int)
-    parser.add_argument("K", type=int)
     parser.add_argument("operator_config", type=str)
     parser.add_argument("noise_config", type=str)
     parser.add_argument("model_config", type=str)
-    parser.add_argument("--eta-type", type=str,
-        choices=['gradnorm', 'expected_gradnorm'],
-        default='expected_gradnorm')
     parser.add_argument("--lambda-val", type=float,
         default=None, help="Constant to scale learning rate. Leave empty to use a heuristic best guess.")
-    parser.add_argument("--output-dir", default=".", type=str)
-    parser.add_argument("--loss", type=str,
-        choices=['l2', 'kl', 'categorical_kl'], default='l2',
-        help="Algorithm to use. Options: 'l2', 'kl', 'categorical_kl'. Default is 'l2'."
-    )
     parser.add_argument("--cuda", default=True, action=argparse.BooleanOptionalAction)
     main(parser.parse_args())

 import os
 import yaml
 import time
+from pathlib import Path
 from PIL import Image
 import numpy as np
 from cdim.dps_model.dps_unet import create_model
 from cdim.diffusion.scheduling_ddim import DDIMScheduler
 from cdim.diffusion.diffusion_pipeline import run_diffusion
 def load_image(path):
     return config
+def process_image(image_path, output_dir, model, ddim_scheduler, operator, noise_function,
+                 device, args, model_type):
+    """
+    Process a single image with the given model and parameters
+    """
+    original_image = load_image(image_path).to(device)
+    # Get the base filename without extension
+    base_name = Path(image_path).stem
+    noisy_measurement = noise_function(operator(original_image))
+    save_to_image(noisy_measurement, os.path.join(output_dir, f"{base_name}_noisy_measurement.png"))
+    t0 = time.time()
+    output_image = run_diffusion(
+        model, ddim_scheduler,
+        noisy_measurement, operator, noise_function, device,
+        args.stopping_sigma,
+        num_inference_steps=args.T,
+        K=args.K,
+        model_type=model_type,
+        original_image=original_image)
+    print(f"Processing time for {base_name}: {time.time() - t0:.2f}s")
+    save_to_image(output_image, os.path.join(output_dir, f"{base_name}_output.png"))
 def main(args):
     device_str = f"cuda" if args.cuda and torch.cuda.is_available() else 'cpu'
     print(f"Using device {device_str}")
     device = torch.device(device_str)
     os.makedirs(args.output_dir, exist_ok=True)
     # Load the noise function
     noise_config = load_yaml(args.noise_config)
         steps_offset=0,
     )
+    # Process input (either a single image or all images in a directory)
+    input_path = Path(args.input)
+    if input_path.is_file():
+        # Process a single image
+        print(f"Processing single image: {input_path.name}")
+        process_image(
+            str(input_path), args.output_dir, model, ddim_scheduler,
+            operator, noise_function, device, args, model_type
+        )
+    elif input_path.is_dir():
+        # Process all images in the directory
+        image_files = [
+            f for f in input_path.iterdir()
+            if not f.name.startswith('.') and f.suffix.lower() in ['.png', '.jpg', '.jpeg']
+        ]
+        image_files = sorted(image_files)
+        print(f"Found {len(image_files)} images to process")
+        for image_file in image_files:
+            print(f"Processing {image_file.name}...")
+            # Optional, recreate operator (uncomment to use same operator)
+            operator = get_operator(**operator_config)
+            process_image(
+                str(image_file), args.output_dir, model, ddim_scheduler,
+                operator, noise_function, device, args, model_type
+            )
+    else:
+        raise ValueError(f"Input path '{input_path}' is neither a file nor a directory")
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
+    parser.add_argument("input", type=str, help="Path to input image or folder containing input images")
     parser.add_argument("T", type=int)
     parser.add_argument("operator_config", type=str)
     parser.add_argument("noise_config", type=str)
     parser.add_argument("model_config", type=str)
+    parser.add_argument("--stopping-sigma", type=float, default=0.1, help="How many std deviations away to stop")
     parser.add_argument("--lambda-val", type=float,
         default=None, help="Constant to scale learning rate. Leave empty to use a heuristic best guess.")
+    parser.add_argument("--output-dir", default="output", type=str)
     parser.add_argument("--cuda", default=True, action=argparse.BooleanOptionalAction)
+    parser.add_argument("--K", type=int, default=20,
+        help="Cap the number of steps K at any iteration. Helps avoid edge cases or cap NFEs.")
     main(parser.parse_args())

requirements.txt CHANGED Viewed

@@ -1,11 +1,8 @@
-accelerate
-diffusers
-gradio
-numpy
 Pillow
-PyYAML
-scipy
---extra-index-url https://download.pytorch.org/whl/cu113
-torch
-torchvision
-tqdm

+diffusers==0.30.3
+gradio==5.3.0
+numpy==2.1.2
 Pillow
+PyYAML==6.0.2
+scipy==1.14.1
+tqdm==4.66.5
+accelerate