text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14

Commit

c9d2859

1 Parent(s): ec7bfd1

Fix colorization: Add ResNet generator architecture and fix minimum image size to prevent kernel errors

Browse files

Files changed (1) hide show

app/pytorch_colorizer.py +116 -20

app/pytorch_colorizer.py CHANGED Viewed

@@ -16,6 +16,74 @@ from huggingface_hub import hf_hub_download
 logger = logging.getLogger(__name__)
 class UNetGenerator(nn.Module):
     """
     U-Net Generator for Image Colorization
@@ -143,34 +211,51 @@ class PyTorchColorizer:
                 # Log state dict keys to understand model structure
                 if isinstance(state_dict, dict):
-                    keys = list(state_dict.keys())[:10]  # First 10 keys
                     logger.info(f"Model state_dict keys (sample): {keys}")
                     logger.info(f"Total state_dict keys: {len(state_dict.keys())}")
             except Exception as e:
                 logger.error(f"Failed to load model file: {e}")
                 raise
             # Try different model architectures with state_dict
             model_configs = [
-                {"input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 64},
-                {"input_nc": 1, "output_nc": 3, "num_downs": 7, "ngf": 64},
-                {"input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 32},
-                {"input_nc": 1, "output_nc": 3, "num_downs": 6, "ngf": 64},
             ]
             loaded = False
             for config in model_configs:
                 try:
-                    model = UNetGenerator(**config)
                     # Try strict loading first
                     try:
                         model.load_state_dict(state_dict, strict=True)
-                        logger.info(f"✅ Successfully loaded model with strict matching: {config}")
                     except:
                         # If strict fails, try non-strict
                         model.load_state_dict(state_dict, strict=False)
-                        logger.info(f"✅ Successfully loaded model with non-strict matching: {config}")
                     model.eval()
                     model.to(self.device)
@@ -178,25 +263,25 @@ class PyTorchColorizer:
                     loaded = True
                     break
                 except Exception as e:
-                    logger.debug(f"Failed to load with config {config}: {e}")
                     continue
             if not loaded:
-                # Last resort: try with default config and non-strict loading
                 try:
-                    logger.warning("Attempting to load model with default config and non-strict matching")
-                    model = UNetGenerator(input_nc=1, output_nc=3, num_downs=8, ngf=64)
                     model.load_state_dict(state_dict, strict=False)
                     model.eval()
                     model.to(self.device)
                     self.model = model
-                    logger.info("✅ Model loaded with fallback method")
                 except Exception as e:
                     logger.error(f"Failed to load model: {e}")
                     raise RuntimeError(
-                        f"Could not load PyTorch model. Tried multiple architectures. "
                         f"Last error: {e}. "
-                        f"The model architecture may not match the expected U-Net structure."
                     )
         except Exception as e:
@@ -222,16 +307,27 @@ class PyTorchColorizer:
         if image.mode != "L":
             image = image.convert("L")
-        # Try to maintain aspect ratio and use a better resize
-        # Many GAN models work better with 256x256 or 512x512
-        target_size = 256
-        if max(original_size) > 512:
-            # Scale down proportionally but keep max dimension reasonable
             scale = target_size / max(original_size)
             new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
         else:
             new_size = original_size
         # Transform to tensor
         # GAN colorization models typically expect normalized input
         transform = transforms.Compose([

 logger = logging.getLogger(__name__)
+class ResNetBlock(nn.Module):
+    """ResNet block with skip connection"""
+    def __init__(self, dim):
+        super(ResNetBlock, self).__init__()
+        self.conv_block = self.build_conv_block(dim)
+    def build_conv_block(self, dim):
+        conv_block = []
+        conv_block += [nn.ReflectionPad2d(1)]
+        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True)]
+        conv_block += [nn.InstanceNorm2d(dim)]
+        conv_block += [nn.ReLU(True)]
+        conv_block += [nn.ReflectionPad2d(1)]
+        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=0, bias=True)]
+        conv_block += [nn.InstanceNorm2d(dim)]
+        return nn.Sequential(*conv_block)
+    def forward(self, x):
+        out = x + self.conv_block(x)
+        return out
+class ResNetGenerator(nn.Module):
+    """
+    ResNet Generator for Image Colorization
+    Architecture with sequential layers (matches 'layers.X.X' structure)
+    """
+    def __init__(self, input_nc=1, output_nc=3, ngf=64, n_blocks=9):
+        super(ResNetGenerator, self).__init__()
+        model = []
+        # Initial convolution block
+        model += [nn.ReflectionPad2d(3)]
+        model += [nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=True)]
+        model += [nn.InstanceNorm2d(ngf)]
+        model += [nn.ReLU(True)]
+        # Downsampling
+        n_downsampling = 2
+        for i in range(n_downsampling):
+            mult = 2 ** i
+            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=True)]
+            model += [nn.InstanceNorm2d(ngf * mult * 2)]
+            model += [nn.ReLU(True)]
+        # ResNet blocks
+        mult = 2 ** n_downsampling
+        for i in range(n_blocks):
+            model += [ResNetBlock(ngf * mult)]
+        # Upsampling
+        for i in range(n_downsampling):
+            mult = 2 ** (n_downsampling - i)
+            model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=True)]
+            model += [nn.InstanceNorm2d(int(ngf * mult / 2))]
+            model += [nn.ReLU(True)]
+        # Output layer
+        model += [nn.ReflectionPad2d(3)]
+        model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
+        model += [nn.Tanh()]
+        self.model = nn.Sequential(*model)
+    def forward(self, input):
+        return self.model(input)
 class UNetGenerator(nn.Module):
     """
     U-Net Generator for Image Colorization
                 # Log state dict keys to understand model structure
                 if isinstance(state_dict, dict):
+                    keys = list(state_dict.keys())[:20]  # First 20 keys
                     logger.info(f"Model state_dict keys (sample): {keys}")
                     logger.info(f"Total state_dict keys: {len(state_dict.keys())}")
+                    # Try to infer architecture from key names
+                    if any('down' in k.lower() or 'up' in k.lower() for k in keys):
+                        logger.info("Detected U-Net style architecture")
+                    if any('resnet' in k.lower() for k in keys):
+                        logger.info("Detected ResNet style architecture")
             except Exception as e:
                 logger.error(f"Failed to load model file: {e}")
                 raise
             # Try different model architectures with state_dict
+            # Based on state_dict keys showing "layers" structure, try ResNet first
             model_configs = [
+                # ResNet Generator (matches "layers" structure)
+                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 9},
+                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 32, "n_blocks": 6},
+                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 6},
+                # U-Net Generator (fallback)
+                {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 64},
+                {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 7, "ngf": 64},
+                {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 32},
             ]
             loaded = False
             for config in model_configs:
                 try:
+                    config_copy = config.copy()  # Don't modify original
+                    model_type = config_copy.pop("type")
+                    if model_type == "resnet":
+                        model = ResNetGenerator(**config_copy)
+                    else:
+                        model = UNetGenerator(**config_copy)
                     # Try strict loading first
                     try:
                         model.load_state_dict(state_dict, strict=True)
+                        logger.info(f"✅ Successfully loaded {model_type} model with strict matching: {config_copy}")
                     except:
                         # If strict fails, try non-strict
                         model.load_state_dict(state_dict, strict=False)
+                        logger.info(f"✅ Successfully loaded {model_type} model with non-strict matching: {config_copy}")
                     model.eval()
                     model.to(self.device)
                     loaded = True
                     break
                 except Exception as e:
+                    logger.debug(f"Failed to load {config.get('type', 'unknown')} model with config {config}: {e}")
                     continue
             if not loaded:
+                # Last resort: try with default ResNet config and non-strict loading
                 try:
+                    logger.warning("Attempting to load model with default ResNet config and non-strict matching")
+                    model = ResNetGenerator(input_nc=1, output_nc=3, ngf=64, n_blocks=9)
                     model.load_state_dict(state_dict, strict=False)
                     model.eval()
                     model.to(self.device)
                     self.model = model
+                    logger.info("✅ Model loaded with fallback ResNet method")
                 except Exception as e:
                     logger.error(f"Failed to load model: {e}")
                     raise RuntimeError(
+                        f"Could not load PyTorch model. Tried multiple architectures (ResNet and U-Net). "
                         f"Last error: {e}. "
+                        f"The model architecture may not match the expected structures."
                     )
         except Exception as e:
         if image.mode != "L":
             image = image.convert("L")
+        # Ensure minimum size - models need at least 64x64, preferably 256x256
+        # Many GAN models work better with 256x256
+        min_size = 64  # Minimum size to avoid kernel errors
+        target_size = 256  # Preferred size for GAN models
+        # Calculate new size maintaining aspect ratio
+        if max(original_size) < min_size:
+            # If image is too small, scale it up
+            scale = min_size / max(original_size)
+            new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
+        elif max(original_size) > 512:
+            # If image is too large, scale it down
             scale = target_size / max(original_size)
             new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
         else:
+            # Use original size if it's in a reasonable range
             new_size = original_size
+        # Ensure minimum dimensions
+        new_size = (max(new_size[0], min_size), max(new_size[1], min_size))
         # Transform to tensor
         # GAN colorization models typically expect normalized input
         transform = transforms.Compose([