text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14

Commit

ae9bbd0

1 Parent(s): 807fb92

Refactor to use Hugging Face Inference API with fal-ai provider - Replace local model loading with InferenceClient API - Remove heavy SDXL/ControlNet/BLIP model dependencies - Use FLUX.1-Kontext-dev model via API - Keep FastAPI and Firebase authentication - Significantly reduce memory usage (no local models)

Browse files

Files changed (2) hide show

app/config.py +3 -1
app/main_sdxl.py +69 -171

app/config.py CHANGED Viewed

@@ -44,8 +44,10 @@ class Settings(BaseSettings):
         "FASTAI_OUTPUT_CAPTION",
         "Colorized using GAN-Colorization-Model"
     )
-    INFERENCE_PROVIDER: str = os.getenv("INFERENCE_PROVIDER", "hf-inference")
     INFERENCE_TIMEOUT: int = int(os.getenv("INFERENCE_TIMEOUT", "180"))
     # Storage settings
     UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "uploads")

         "FASTAI_OUTPUT_CAPTION",
         "Colorized using GAN-Colorization-Model"
     )
+    INFERENCE_PROVIDER: str = os.getenv("INFERENCE_PROVIDER", "fal-ai")
+    INFERENCE_MODEL: str = os.getenv("INFERENCE_MODEL", "black-forest-labs/FLUX.1-Kontext-dev")
     INFERENCE_TIMEOUT: int = int(os.getenv("INFERENCE_TIMEOUT", "180"))
+    HF_TOKEN: str = os.getenv("HF_TOKEN", "")
     # Storage settings
     UPLOAD_DIR: str = os.getenv("UPLOAD_DIR", "uploads")

app/main_sdxl.py CHANGED Viewed

@@ -1,17 +1,8 @@
 """
-FastAPI application for Text-Guided Image Colorization using SDXL + ControlNet
-Based on fffiloni/text-guided-image-colorization
 """
 import os
-# Set environment variables BEFORE any imports
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["HF_HOME"] = "/tmp/hf_cache"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
-os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"
-os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/hf_cache"
-os.environ["XDG_CACHE_HOME"] = "/tmp/hf_cache"
-os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_config"
 import io
 import uuid
 import logging
@@ -25,23 +16,11 @@ from fastapi.staticfiles import StaticFiles
 import firebase_admin
 from firebase_admin import credentials, app_check, auth as firebase_auth
 from PIL import Image
-import torch
 import uvicorn
 import gradio as gr
-# SDXL + ControlNet imports
-from accelerate import Accelerator
-from diffusers import (
-    AutoencoderKL,
-    StableDiffusionXLControlNetPipeline,
-    ControlNetModel,
-    UNet2DConditionModel,
-)
-from transformers import (
-    BlipProcessor, BlipForConditionalGeneration,
-)
-from safetensors.torch import load_file
-from huggingface_hub import hf_hub_download, snapshot_download
 from app.config import settings
@@ -102,12 +81,8 @@ RESULT_DIR = Path("/tmp/colorize_results")
 app.mount("/results", StaticFiles(directory=str(RESULT_DIR)), name="results")
 app.mount("/uploads", StaticFiles(directory=str(UPLOAD_DIR)), name="uploads")
-# Global model variables
-pipe = None
-caption_model = None
-processor = None
-device = None
-weight_dtype = None
 model_load_error: Optional[str] = None
 # ========== Utility Functions ==========
@@ -177,110 +152,29 @@ def remove_unlikely_words(prompt: str) -> str:
 @app.on_event("startup")
 async def startup_event():
-    """Load SDXL + ControlNet models on startup"""
-    global pipe, caption_model, processor, device, weight_dtype, model_load_error
     try:
-        logger.info("🔄 Loading SDXL + ControlNet colorization models...")
-        # Use writable directory for model downloads
-        controlnet_dir = "/tmp/sdxl_light_caption_output"
-        try:
-            os.makedirs(controlnet_dir, exist_ok=True)
-            # Test write permissions
-            test_file = os.path.join(controlnet_dir, ".test_write")
-            with open(test_file, "w") as f:
-                f.write("test")
-            os.remove(test_file)
-            logger.info(f"Using directory: {controlnet_dir}")
-        except PermissionError as e:
-            logger.error(f"Permission denied for directory {controlnet_dir}: {e}")
-            raise
-        except Exception as e:
-            logger.error(f"Failed to create directory {controlnet_dir}: {e}")
-            raise
-        # Download controlnet model snapshot
-        controlnet_path = os.path.join(controlnet_dir, "checkpoint-30000", "controlnet")
-        if os.path.exists(controlnet_path):
-            logger.info(f"ControlNet model already exists at {controlnet_path}")
-        else:
-            try:
-                logger.info("Downloading ControlNet model...")
-                snapshot_download(
-                    repo_id='nickpai/sdxl_light_caption_output',
-                    local_dir=controlnet_dir
-                )
-                logger.info("ControlNet model downloaded successfully")
-            except Exception as e:
-                logger.error(f"Could not download controlnet snapshot: {e}")
-                if not os.path.exists(controlnet_path):
-                    raise
-        # Device and precision setup
-        accelerator = Accelerator(mixed_precision="fp16")
-        weight_dtype = torch.float16 if accelerator.mixed_precision == "fp16" else torch.float32
-        device = accelerator.device
-        logger.info(f"Using device: {device}, dtype: {weight_dtype}")
-        # Pretrained paths
-        base_model_path = settings.BASE_MODEL_ID
-        safetensors_ckpt = settings.LIGHTNING_WEIGHTS
-        # controlnet_path already defined above
-        # Load diffusion components
-        logger.info("Loading VAE...")
-        vae = AutoencoderKL.from_pretrained(base_model_path, subfolder="vae")
-        # Enable VAE slicing for memory efficiency
-        vae.enable_slicing()
-        vae.enable_tiling()
-        logger.info("Loading UNet...")
-        unet = UNet2DConditionModel.from_config(base_model_path, subfolder="unet")
-        unet.load_state_dict(load_file(hf_hub_download("ByteDance/SDXL-Lightning", safetensors_ckpt)))
-        # Enable attention slicing for memory efficiency
-        unet.set_attention_slice("max")
-        logger.info("Loading ControlNet...")
-        controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=weight_dtype)
-        # Enable attention slicing for ControlNet
-        controlnet.set_attention_slice("max")
-        logger.info("Creating pipeline...")
-        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            base_model_path, vae=vae, unet=unet, controlnet=controlnet, torch_dtype=weight_dtype
         )
-        pipe.safety_checker = None
-        # Enable sequential CPU offloading to reduce memory usage
-        logger.info("Enabling CPU offloading for memory efficiency...")
-        pipe.enable_sequential_cpu_offload()
-        # Alternative: use model CPU offload (moves entire model to CPU when not in use)
-        # pipe.enable_model_cpu_offload()
-        logger.info("Memory optimizations enabled")
-        # Load BLIP captioning model (use base to save memory)
-        logger.info("Loading BLIP captioning model (using base model for memory efficiency)...")
-        caption_model_name = "blip-image-captioning-base"
-        try:
-            processor = BlipProcessor.from_pretrained(f"Salesforce/{caption_model_name}")
-            caption_model = BlipForConditionalGeneration.from_pretrained(
-                f"Salesforce/{caption_model_name}", torch_dtype=weight_dtype
-            )
-            # Keep BLIP on CPU and move to device only during inference
-            caption_model.eval()
-        except Exception as e:
-            logger.error(f"Failed to load BLIP model: {e}")
-            raise
-        logger.info("✅ All models loaded successfully!")
         model_load_error = None
     except Exception as e:
         error_msg = str(e)
-        logger.error(f"❌ Failed to load models: {error_msg}")
         model_load_error = error_msg
         # Don't raise - allow health check to work
@@ -288,11 +182,9 @@ async def startup_event():
 @app.on_event("shutdown")
 async def shutdown_event():
     """Cleanup on shutdown"""
-    global pipe, caption_model
-    if pipe:
-        del pipe
-    if caption_model:
-        del caption_model
     logger.info("Application shutdown")
@@ -356,9 +248,9 @@ async def health_check():
     """Health check endpoint"""
     response = {
         "status": "healthy",
-        "model_loaded": pipe is not None and caption_model is not None,
-        "model_type": "sdxl_controlnet",
-        "device": str(device) if device else None
     }
     if model_load_error:
         response["model_error"] = model_load_error
@@ -373,7 +265,7 @@ def colorize_image_sdxl(
     num_inference_steps: int = 8
 ) -> Tuple[Image.Image, str]:
     """
-    Colorize a grayscale or low-color image using SDXL + ControlNet.
     Args:
         image: PIL Image to colorize
@@ -385,46 +277,52 @@ def colorize_image_sdxl(
     Returns:
         Tuple of (colorized PIL Image, caption string)
     """
-    if pipe is None or caption_model is None:
-        raise RuntimeError("Models not loaded")
-    torch.manual_seed(seed)
     original_size = image.size
-    control_image = image.convert("L").convert("RGB").resize((512, 512))
-    # Image captioning - keep BLIP on CPU to save memory
-    input_text = settings.CAPTION_PREFIX
-    # Use CPU for BLIP to save GPU memory
-    blip_device = torch.device("cpu")
-    inputs = processor(control_image, input_text, return_tensors="pt").to(blip_device)
-    with torch.no_grad():
-        caption_ids = caption_model.generate(**inputs, max_length=50, num_beams=3)
-    caption = processor.decode(caption_ids[0], skip_special_tokens=True)
-    caption = remove_unlikely_words(caption)
-    # Construct final prompt
-    if positive_prompt:
-        final_prompt = f"{positive_prompt}, {caption}"
     else:
-        final_prompt = caption
-    # Inference with memory-efficient settings
-    with torch.no_grad():
-        result = pipe(
             prompt=final_prompt,
-            negative_prompt=negative_prompt or settings.NEGATIVE_PROMPT,
-            num_inference_steps=num_inference_steps,
-            generator=torch.manual_seed(seed),
-            image=control_image,
-            guidance_scale=7.5,  # Lower guidance scale uses less memory
         )
-    # Clear cache after inference
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    colorized = apply_color(control_image, result.images[0]).resize(original_size)
-    return colorized, caption
 @app.post("/colorize")
@@ -440,8 +338,8 @@ async def colorize_api(
     Upload a grayscale image -> returns colorized image.
     Uses SDXL + ControlNet with automatic captioning.
     """
-    if pipe is None or caption_model is None:
-        raise HTTPException(status_code=503, detail="Colorization models not loaded")
     if not file.content_type or not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
@@ -505,8 +403,8 @@ def gradio_colorize(image, positive_prompt=None, negative_prompt=None, seed=123)
     if image is None:
         return None, ""
     try:
-        if pipe is None or caption_model is None:
-            return None, "Models not loaded"
         colorized, caption = colorize_image_sdxl(
             image,
             positive_prompt=positive_prompt,
@@ -520,7 +418,7 @@ def gradio_colorize(image, positive_prompt=None, negative_prompt=None, seed=123)
 title = "🎨 Text-Guided Image Colorization"
-description = "Upload a grayscale image and generate a color version guided by automatic captioning using SDXL + ControlNet."
 iface = gr.Interface(
     fn=gradio_colorize,

 """
+FastAPI application for Text-Guided Image Colorization using Hugging Face Inference API
+Uses fal-ai provider for memory-efficient inference
 """
 import os
 import io
 import uuid
 import logging
 import firebase_admin
 from firebase_admin import credentials, app_check, auth as firebase_auth
 from PIL import Image
 import uvicorn
 import gradio as gr
+# Hugging Face Inference API
+from huggingface_hub import InferenceClient
 from app.config import settings
 app.mount("/results", StaticFiles(directory=str(RESULT_DIR)), name="results")
 app.mount("/uploads", StaticFiles(directory=str(UPLOAD_DIR)), name="uploads")
+# Global Inference API client
+inference_client = None
 model_load_error: Optional[str] = None
 # ========== Utility Functions ==========
 @app.on_event("startup")
 async def startup_event():
+    """Initialize Hugging Face Inference API client"""
+    global inference_client, model_load_error
     try:
+        logger.info("🔄 Initializing Hugging Face Inference API client...")
+        # Get HF token from environment or settings
+        hf_token = os.getenv("HF_TOKEN") or settings.HF_TOKEN
+        if not hf_token:
+            raise ValueError("HF_TOKEN environment variable is required for Inference API")
+        # Initialize InferenceClient with fal-ai provider
+        inference_client = InferenceClient(
+            provider="fal-ai",
+            api_key=hf_token,
         )
+        logger.info("✅ Inference API client initialized successfully!")
         model_load_error = None
     except Exception as e:
         error_msg = str(e)
+        logger.error(f"❌ Failed to initialize Inference API client: {error_msg}")
         model_load_error = error_msg
         # Don't raise - allow health check to work
 @app.on_event("shutdown")
 async def shutdown_event():
     """Cleanup on shutdown"""
+    global inference_client
+    if inference_client:
+        inference_client = None
     logger.info("Application shutdown")
     """Health check endpoint"""
     response = {
         "status": "healthy",
+        "model_loaded": inference_client is not None,
+        "model_type": "hf_inference_api",
+        "provider": "fal-ai"
     }
     if model_load_error:
         response["model_error"] = model_load_error
     num_inference_steps: int = 8
 ) -> Tuple[Image.Image, str]:
     """
+    Colorize a grayscale or low-color image using Hugging Face Inference API.
     Args:
         image: PIL Image to colorize
     Returns:
         Tuple of (colorized PIL Image, caption string)
     """
+    if inference_client is None:
+        raise RuntimeError("Inference API client not initialized")
     original_size = image.size
+    # Resize to 512x512 for inference (FLUX models work well at this size)
+    control_image = image.convert("RGB").resize((512, 512))
+    # Convert image to bytes for API
+    img_bytes = io.BytesIO()
+    control_image.save(img_bytes, format="PNG")
+    img_bytes.seek(0)
+    input_image = img_bytes.read()
+    # Construct prompt
+    base_prompt = positive_prompt or "colorize this image with vibrant natural colors, high quality"
+    if negative_prompt:
+        # Note: Some models may not support negative_prompt directly
+        final_prompt = f"{base_prompt}. Avoid: {negative_prompt}"
     else:
+        final_prompt = base_prompt
+    # Use Inference API for image-to-image generation
+    model_name = settings.INFERENCE_MODEL
+    logger.info(f"Calling Inference API with model {model_name}, prompt: {final_prompt}")
+    try:
+        result_image = inference_client.image_to_image(
+            input_image,
             prompt=final_prompt,
+            model=model_name,
         )
+        # Resize back to original size
+        if isinstance(result_image, Image.Image):
+            colorized = result_image.resize(original_size)
+        else:
+            # If it's bytes, convert to PIL Image
+            colorized = Image.open(io.BytesIO(result_image)).resize(original_size)
+        # Generate a simple caption from the prompt
+        caption = final_prompt[:100]  # Truncate for display
+        return colorized, caption
+    except Exception as e:
+        logger.error(f"Inference API error: {e}")
+        raise RuntimeError(f"Failed to colorize image: {str(e)}")
 @app.post("/colorize")
     Upload a grayscale image -> returns colorized image.
     Uses SDXL + ControlNet with automatic captioning.
     """
+    if inference_client is None:
+        raise HTTPException(status_code=503, detail="Inference API client not initialized")
     if not file.content_type or not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
     if image is None:
         return None, ""
     try:
+        if inference_client is None:
+            return None, "Inference API client not initialized"
         colorized, caption = colorize_image_sdxl(
             image,
             positive_prompt=positive_prompt,
 title = "🎨 Text-Guided Image Colorization"
+description = "Upload a grayscale image and generate a color version using Hugging Face Inference API (fal-ai provider)."
 iface = gr.Interface(
     fn=gradio_colorize,