Spaces:

danhtran2mind
/

grayscale2color-landscape

Sleeping

App Files Files Community

danhtran2mind commited on 29 days ago

Commit

055cdae

verified ·

1 Parent(s): 3ebaa82

Update models/unet_gray2color.py

Browse files

Files changed (1) hide show

models/unet_gray2color.py +17 -45

models/unet_gray2color.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.layers import (
-    Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Concatenate,
-    BatchNormalization, LayerNormalization, Dropout, MultiHeadAttention, Add, Reshape, Layer
 )
 from tensorflow.keras.models import Model
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
-from tensorflow.keras.mixed_precision import set_global_policy
 import cv2
 import glob
 import os
@@ -15,6 +14,9 @@ from skimage.color import rgb2lab, lab2rgb
 from skimage.metrics import peak_signal_noise_ratio
 import matplotlib.pyplot as plt
 # Custom self-attention layer with serialization support
 @tf.keras.utils.register_keras_serializable()
 class SelfAttentionLayer(Layer):
@@ -27,7 +29,6 @@ class SelfAttentionLayer(Layer):
     def build(self, input_shape):
         # input_shape: (batch_size, height, width, channels)
-        # For self-attention, query, key, and value have the same shape
         batch_size, height, width, channels = input_shape
         attention_shape = (batch_size, height * width, channels)  # Shape after reshape
         # Build MultiHeadAttention with query, key, and value shapes
@@ -40,7 +41,7 @@ class SelfAttentionLayer(Layer):
         b, h, w, c = tf.shape(x)[0], x.shape[1], x.shape[2], x.shape[3]
         attention_input = tf.reshape(x, [b, h * w, c])
         attention_output = self.mha(attention_input, attention_input)
-        # Cast attention_output to match x's dtype to avoid type mismatch
         attention_output = tf.cast(attention_output, dtype=x.dtype)
         attention_output = tf.reshape(attention_output, [b, h, w, c])
         return self.ln(x + attention_output)
@@ -55,59 +56,35 @@ class SelfAttentionLayer(Layer):
             'key_dim': self.key_dim
         })
         return config
-# class SelfAttentionLayer(Layer):
-#     def __init__(self, num_heads, key_dim, **kwargs):
-#         super(SelfAttentionLayer, self).__init__(**kwargs)
-#         self.num_heads = num_heads
-#         self.key_dim = key_dim
-#         self.mha = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
-#         self.ln = LayerNormalization()
-#     def call(self, x):
-#         b, h, w, c = tf.shape(x)[0], x.shape[1], x.shape[2], x.shape[3]
-#         attention_input = tf.reshape(x, [b, h * w, c])
-#         attention_output = self.mha(attention_input, attention_input)
-#         attention_output = tf.reshape(attention_output, [b, h, w, c])
-#         return self.ln(x + attention_output)
-#     def get_config(self):
-#         config = super(SelfAttentionLayer, self).get_config()
-#         config.update({
-#             'num_heads': self.num_heads,
-#             'key_dim': self.key_dim
-#         })
-#         return config
 def attention_unet_model(input_shape=(256, 256, 1)):
     inputs = Input(input_shape)
     # Encoder with reduced filters
     c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
     c1 = BatchNormalization()(c1)
     c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
     c1 = BatchNormalization()(c1)
     p1 = MaxPooling2D((2, 2))(c1)
     c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
     c2 = BatchNormalization()(c2)
     c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
     c2 = BatchNormalization()(c2)
     p2 = MaxPooling2D((2, 2))(c2)
     c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
     c3 = BatchNormalization()(c3)
     c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
     c3 = BatchNormalization()(c3)
     p3 = MaxPooling2D((2, 2))(c3)
     # Bottleneck with reduced filters and attention
     c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
     c4 = BatchNormalization()(c4)
     c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(c4)
     c4 = BatchNormalization()(c4)
-    c4 = SelfAttentionLayer(num_heads=2, key_dim=32)(c4)  # Reduced heads and key_dim
     # Attention gate
     def attention_gate(g, s, num_filters):
         g_conv = Conv2D(num_filters, (1, 1), padding='same')(g)
@@ -116,7 +93,7 @@ def attention_unet_model(input_shape=(256, 256, 1)):
         attn = tf.keras.layers.Activation('relu')(attn)
         attn = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(attn)
         return s * attn
     # Decoder with reduced filters
     u5 = UpSampling2D((2, 2))(c4)
     a5 = attention_gate(u5, c3, 64)
@@ -125,7 +102,7 @@ def attention_unet_model(input_shape=(256, 256, 1)):
     c5 = BatchNormalization()(c5)
     c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(c5)
     c5 = BatchNormalization()(c5)
     u6 = UpSampling2D((2, 2))(c5)
     a6 = attention_gate(u6, c2, 32)
     u6 = Concatenate()([u6, a6])
@@ -133,7 +110,7 @@ def attention_unet_model(input_shape=(256, 256, 1)):
     c6 = BatchNormalization()(c6)
     c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(c6)
     c6 = BatchNormalization()(c6)
     u7 = UpSampling2D((2, 2))(c6)
     a7 = attention_gate(u7, c1, 16)
     u7 = Concatenate()([u7, a7])
@@ -141,21 +118,16 @@ def attention_unet_model(input_shape=(256, 256, 1)):
     c7 = BatchNormalization()(c7)
     c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(c7)
     c7 = BatchNormalization()(c7)
     # Output layer
     outputs = Conv2D(2, (1, 1), activation='tanh', padding='same')(c7)
     model = Model(inputs, outputs)
     return model
-# # Instantiate and compile the model
-# model = attention_unet_model(input_shape=(HEIGHT, WIDTH, 1))
-# model.summary()
 if __name__ == "__main__":
     # Define constants
-    HEIGHT, WIDTH = 1024, 1024
     # Compile model
     model = attention_unet_model(input_shape=(HEIGHT, WIDTH, 1))
     model.summary()
-    model.compile(optimizer=Adam(learning_rate=7e-5), loss=tf.keras.losses.MeanSquaredError())

 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.layers import (
+    Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, BatchNormalization,
+    LayerNormalization, Dropout, MultiHeadAttention, Add, Reshape, Layer
 )
 from tensorflow.keras.models import Model
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
 import cv2
 import glob
 import os
 from skimage.metrics import peak_signal_noise_ratio
 import matplotlib.pyplot as plt
+# Disable mixed precision to avoid dtype mismatches
+tf.keras.mixed_precision.set_global_policy('float32')
 # Custom self-attention layer with serialization support
 @tf.keras.utils.register_keras_serializable()
 class SelfAttentionLayer(Layer):
     def build(self, input_shape):
         # input_shape: (batch_size, height, width, channels)
         batch_size, height, width, channels = input_shape
         attention_shape = (batch_size, height * width, channels)  # Shape after reshape
         # Build MultiHeadAttention with query, key, and value shapes
         b, h, w, c = tf.shape(x)[0], x.shape[1], x.shape[2], x.shape[3]
         attention_input = tf.reshape(x, [b, h * w, c])
         attention_output = self.mha(attention_input, attention_input)
+        # Cast attention_output to match x's dtype
         attention_output = tf.cast(attention_output, dtype=x.dtype)
         attention_output = tf.reshape(attention_output, [b, h, w, c])
         return self.ln(x + attention_output)
             'key_dim': self.key_dim
         })
         return config
 def attention_unet_model(input_shape=(256, 256, 1)):
     inputs = Input(input_shape)
     # Encoder with reduced filters
     c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
     c1 = BatchNormalization()(c1)
     c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
     c1 = BatchNormalization()(c1)
     p1 = MaxPooling2D((2, 2))(c1)
     c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
     c2 = BatchNormalization()(c2)
     c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
     c2 = BatchNormalization()(c2)
     p2 = MaxPooling2D((2, 2))(c2)
     c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
     c3 = BatchNormalization()(c3)
     c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
     c3 = BatchNormalization()(c3)
     p3 = MaxPooling2D((2, 2))(c3)
     # Bottleneck with reduced filters and attention
     c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
     c4 = BatchNormalization()(c4)
     c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(c4)
     c4 = BatchNormalization()(c4)
+    c4 = SelfAttentionLayer(num_heads=2, key_dim=32)(c4)
     # Attention gate
     def attention_gate(g, s, num_filters):
         g_conv = Conv2D(num_filters, (1, 1), padding='same')(g)
         attn = tf.keras.layers.Activation('relu')(attn)
         attn = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(attn)
         return s * attn
     # Decoder with reduced filters
     u5 = UpSampling2D((2, 2))(c4)
     a5 = attention_gate(u5, c3, 64)
     c5 = BatchNormalization()(c5)
     c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(c5)
     c5 = BatchNormalization()(c5)
     u6 = UpSampling2D((2, 2))(c5)
     a6 = attention_gate(u6, c2, 32)
     u6 = Concatenate()([u6, a6])
     c6 = BatchNormalization()(c6)
     c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(c6)
     c6 = BatchNormalization()(c6)
     u7 = UpSampling2D((2, 2))(c6)
     a7 = attention_gate(u7, c1, 16)
     u7 = Concatenate()([u7, a7])
     c7 = BatchNormalization()(c7)
     c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(c7)
     c7 = BatchNormalization()(c7)
     # Output layer
     outputs = Conv2D(2, (1, 1), activation='tanh', padding='same')(c7)
     model = Model(inputs, outputs)
     return model
 if __name__ == "__main__":
     # Define constants
+    HEIGHT, WIDTH = 256, 256  # Match function definition
     # Compile model
     model = attention_unet_model(input_shape=(HEIGHT, WIDTH, 1))
     model.summary()
+    model.compile(optimizer=Adam(learning_rate=7e-5), loss=tf.keras.losses.MeanSquaredError())