Spaces:

danhtran2mind
/

gray2color-landscape-from-scratch

Sleeping

App Files Files Community

danhtran2mind commited on Aug 1

Commit

e89a371

verified ·

1 Parent(s): 437b632

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -18

app.py CHANGED Viewed

@@ -5,14 +5,18 @@ import numpy as np
 import tensorflow as tf
 import requests
 from skimage.color import lab2rgb
-from tensorflow.keras.optimizers import Adam
 from models.autoencoder_gray2color import SpatialAttention
 from models.unet_gray2color import SelfAttentionLayer
 # Set float32 policy
 tf.keras.mixed_precision.set_global_policy('float32')
-WIDTH, HEIGHT = 512, 512
 # Define model paths
 load_model_paths = [
@@ -51,11 +55,10 @@ for path in load_model_paths:
     models[model_name] = tf.keras.models.load_model(
         path,
         custom_objects=custom_objects[model_name],
-        compile=False  # Skip optimizer state
     )
-    # Recompile the model
     models[model_name].compile(
-        optimizer=Adam(learning_rate=7e-5),
         loss=tf.keras.losses.MeanSquaredError()
     )
     print(f"{model_name} model loaded.")
@@ -65,30 +68,32 @@ print("All models loaded.")
 def process_image(input_img, model_name):
     # Store original input dimensions
     original_width, original_height = input_img.size
     # Convert PIL Image to grayscale and resize to model input size
-    img = input_img.convert("L")  # Convert to grayscale
-    img = img.resize((WIDTH, HEIGHT))  # Resize to 512x512
-    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0  # Normalize to [0, 1]
-    img_array = img_array[None, ..., 0:1]  # Add batch dimension, shape: (1, 512, 512, 1)
     # Select model
     selected_model = models[model_name.lower()]
     # Run inference
-    output_array = selected_model.predict(img_array)  # Shape: (1, 512, 512, 2) for a*b*
-    # Extract L* (grayscale input) and a*b* (model output)
     L_channel = img_array[0, :, :, 0] * 100.0  # Denormalize L* to [0, 100]
     ab_channels = output_array[0] * 128.0  # Denormalize a*b* to [-128, 128]
-    # Combine L*, a*, b* into a 3-channel L*a*b* image
-    lab_image = np.stack([L_channel, ab_channels[:, :, 0], ab_channels[:, :, 1]], axis=-1)  # Shape: (512, 512, 3)
-    # Convert L*a*b* to RGB
-    rgb_array = lab2rgb(lab_image)  # Convert to RGB, output in [0, 1]
-    rgb_array = np.clip(rgb_array, 0, 1) * 255.0  # Scale to [0, 255]
-    rgb_image = Image.fromarray(rgb_array.astype(np.uint8), mode="RGB")  # Create RGB PIL image
-    # Resize output image to match input resolution
     rgb_image = rgb_image.resize((original_width, original_height), Image.Resampling.LANCZOS)
     return rgb_image

 import tensorflow as tf
 import requests
 from skimage.color import lab2rgb
 from models.autoencoder_gray2color import SpatialAttention
 from models.unet_gray2color import SelfAttentionLayer
 # Set float32 policy
 tf.keras.mixed_precision.set_global_policy('float32')
+# Model-specific input shapes
+MODEL_INPUT_SHAPES = {
+    "autoencoder": (512, 512),
+    "unet": (1024, 1024),
+    "transformer": (1024, 1024)
+}
 # Define model paths
 load_model_paths = [
     models[model_name] = tf.keras.models.load_model(
         path,
         custom_objects=custom_objects[model_name],
+        compile=False
     )
     models[model_name].compile(
+        optimizer=tf.keras.optimizers.Adam(learning_rate=7e-5),
         loss=tf.keras.losses.MeanSquaredError()
     )
     print(f"{model_name} model loaded.")
 def process_image(input_img, model_name):
     # Store original input dimensions
     original_width, original_height = input_img.size
+    # Get model-specific input shape
+    width, height = MODEL_INPUT_SHAPES[model_name.lower()]
     # Convert PIL Image to grayscale and resize to model input size
+    img = input_img.convert("L")
+    img = img.resize((width, height))
+    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
+    img_array = img_array[None, ..., 0:1]  # Shape: (1, height, width, 1)
     # Select model
     selected_model = models[model_name.lower()]
     # Run inference
+    output_array = selected_model.predict(img_array)  # Shape: (1, height, width, 2)
+    # Extract L* and a*b*
     L_channel = img_array[0, :, :, 0] * 100.0  # Denormalize L* to [0, 100]
     ab_channels = output_array[0] * 128.0  # Denormalize a*b* to [-128, 128]
+    # Combine L*, a*, b*
+    lab_image = np.stack([L_channel, ab_channels[:, :, 0], ab_channels[:, :, 1]], axis=-1)
+    # Convert to RGB
+    rgb_array = lab2rgb(lab_image)
+    rgb_array = np.clip(rgb_array, 0, 1) * 255.0
+    rgb_image = Image.fromarray(rgb_array.astype(np.uint8), mode="RGB")
+    # Resize output to original resolution
     rgb_image = rgb_image.resize((original_width, original_height), Image.Resampling.LANCZOS)
     return rgb_image