text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14

Commit

845bd8d

1 Parent(s): ae29148

Fix: Try ResNet first, improve size validation, and skip configs with too many mismatches

Browse files

Files changed (1) hide show

app/pytorch_colorizer.py +27 -16

app/pytorch_colorizer.py CHANGED Viewed

@@ -231,15 +231,16 @@ class PyTorchColorizer:
             # Try different model architectures with state_dict
             # Based on state_dict keys showing "layers" structure, try ResNet first
             model_configs = [
-                # ResNet Generator (matches "layers" structure)
                 {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 9},
-                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 32, "n_blocks": 6},
                 {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 6},
-                # U-Net Generator (fallback)
                 {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 64},
                 {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 7, "ngf": 64},
-                {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 32},
             ]
             loaded = False
@@ -259,8 +260,9 @@ class PyTorchColorizer:
                             logger.info(f"✅ Successfully loaded {model_type} model with perfect matching: {config_copy}")
                         else:
                             logger.warning(f"⚠️ Loaded {model_type} model with mismatches - Missing: {len(missing_keys)}, Unexpected: {len(unexpected_keys)}")
-                            if len(missing_keys) > len(state_dict) * 0.5:  # If more than 50% missing, skip
-                                logger.warning(f"Skipping this config - too many missing keys ({len(missing_keys)}/{len(state_dict)})")
                                 continue
                     except Exception as e:
                         logger.debug(f"Failed to load state_dict: {e}")
@@ -318,24 +320,33 @@ class PyTorchColorizer:
         # Ensure minimum size - models need at least 64x64, preferably 256x256
         # Many GAN models work better with 256x256
-        min_size = 64  # Minimum size to avoid kernel errors
         target_size = 256  # Preferred size for GAN models
         # Calculate new size maintaining aspect ratio
-        if max(original_size) < min_size:
             # If image is too small, scale it up
-            scale = min_size / max(original_size)
-            new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
-        elif max(original_size) > 512:
             # If image is too large, scale it down
-            scale = target_size / max(original_size)
-            new_size = (int(original_size[0] * scale), int(original_size[1] * scale))
         else:
-            # Use original size if it's in a reasonable range
-            new_size = original_size
-        # Ensure minimum dimensions
         new_size = (max(new_size[0], min_size), max(new_size[1], min_size))
         # Transform to tensor
         # GAN colorization models typically expect normalized input

             # Try different model architectures with state_dict
             # Based on state_dict keys showing "layers" structure, try ResNet first
+            # The keys like 'layers.0.4.0.conv1.weight' suggest ResNet blocks in a Sequential
             model_configs = [
+                # ResNet Generator (matches "layers" structure) - try these first
                 {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 9},
                 {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 64, "n_blocks": 6},
+                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 32, "n_blocks": 9},
+                {"type": "resnet", "input_nc": 1, "output_nc": 3, "ngf": 32, "n_blocks": 6},
+                # U-Net Generator (fallback only if ResNet fails)
                 {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 64},
                 {"type": "unet", "input_nc": 1, "output_nc": 3, "num_downs": 7, "ngf": 64},
             ]
             loaded = False
                             logger.info(f"✅ Successfully loaded {model_type} model with perfect matching: {config_copy}")
                         else:
                             logger.warning(f"⚠️ Loaded {model_type} model with mismatches - Missing: {len(missing_keys)}, Unexpected: {len(unexpected_keys)}")
+                            # If more than 30% missing or if unexpected keys > 50% of state_dict, skip
+                            if len(missing_keys) > len(state_dict) * 0.3 or len(unexpected_keys) > len(state_dict) * 0.5:
+                                logger.warning(f"Skipping this config - too many mismatches (Missing: {len(missing_keys)}/{len(state_dict)}, Unexpected: {len(unexpected_keys)}/{len(state_dict)})")
                                 continue
                     except Exception as e:
                         logger.debug(f"Failed to load state_dict: {e}")
         # Ensure minimum size - models need at least 64x64, preferably 256x256
         # Many GAN models work better with 256x256
+        min_size = 64  # Minimum size to avoid kernel errors (must be >= 4 for kernel size)
         target_size = 256  # Preferred size for GAN models
         # Calculate new size maintaining aspect ratio
+        width, height = original_size
+        max_dim = max(width, height)
+        min_dim = min(width, height)
+        if max_dim < min_size:
             # If image is too small, scale it up
+            scale = min_size / max_dim
+            new_width = max(int(width * scale), min_size)
+            new_height = max(int(height * scale), min_size)
+            new_size = (new_width, new_height)
+        elif max_dim > 512:
             # If image is too large, scale it down
+            scale = target_size / max_dim
+            new_width = max(int(width * scale), min_size)
+            new_height = max(int(height * scale), min_size)
+            new_size = (new_width, new_height)
         else:
+            # Use original size but ensure minimum dimensions
+            new_size = (max(width, min_size), max(height, min_size))
+        # Double-check minimum dimensions are met
         new_size = (max(new_size[0], min_size), max(new_size[1], min_size))
+        logger.debug(f"Resizing image from {original_size} to {new_size}")
         # Transform to tensor
         # GAN colorization models typically expect normalized input