"""Z-Image-Turbo v2.3 - Multilingual Support""" import os import logging import torch import spaces import gradio as gr import requests import io import base64 from typing import Tuple, Optional, Dict from PIL import Image from diffusers import DiffusionPipeline, ZImageImg2ImgPipeline from openai import OpenAI # Configure logging (replaces debug print statements) logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') logger = logging.getLogger(__name__) # ============================================================================= # MULTILINGUAL SUPPORT # ============================================================================= LANGUAGES = ["English", "Español", "Português (BR)", "العربية", "हिंदी"] TRANSLATIONS: Dict[str, Dict[str, str]] = { "English": { # Header "title": "Z Image Turbo + GLM-4.6V", "subtitle": "AI Image Generation & Transformation powered by DeepSeek Reasoning", "like_msg": "If you liked it, please ❤️ like it. Thank you!", # Tabs "tab_generate": "Generate", "tab_assistant": "AI Assistant", "tab_transform": "Transform", # Generate tab "prompt": "Prompt", "prompt_placeholder": "Describe your image in detail...", "polish_checkbox": "Prompt+ by deepseek-reasoner", "style": "Style", "aspect_ratio": "Aspect Ratio", "advanced_settings": "Advanced Settings", "steps": "Steps", "seed": "Seed", "random_seed": "Random Seed", "generate_btn": "Generate", "generated_image": "Generated Image", "enhanced_prompt": "Enhanced Prompt", "seed_used": "Seed Used", "share": "Share", # AI Assistant tab "ai_description": "**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.", "upload_image": "Upload Image", "analyze_btn": "Analyze Image", "image_description": "Image Description", "changes_request": "What changes do you want?", "changes_placeholder": "e.g., 'watercolor style' or 'dramatic sunset lighting'", "target_style": "Target Style", "generate_prompt_btn": "Generate Prompt", "generated_prompt": "Generated Prompt", "send_to_transform": "Send to Transform Tab", "how_to_use": "How to Use", "how_to_use_content": """1. **Upload** an image and click "Analyze Image" 2. **Describe** the changes you want 3. **Generate** an optimized prompt 4. **Send** to Transform tab to apply changes""", # Transform tab "transform_description": "**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.", "transformation_prompt": "Transformation Prompt", "transform_placeholder": "e.g., 'oil painting style, vibrant colors'", "strength": "Strength", "transform_btn": "Transform", "transformed_image": "Transformed Image", "example_prompts": "Example Prompts", # Footer "models": "Models", "by": "by", }, "Español": { "title": "Z Image Turbo + GLM-4.6V", "subtitle": "Generación y Transformación de Imágenes con IA impulsado por DeepSeek Reasoning", "like_msg": "Si te gustó, por favor dale me gusta. ¡Gracias!", "tab_generate": "Generar", "tab_assistant": "Asistente IA", "tab_transform": "Transformar", "prompt": "Prompt", "prompt_placeholder": "Describe tu imagen en detalle...", "polish_checkbox": "Prompt+ por deepseek-reasoner", "style": "Estilo", "aspect_ratio": "Relación de Aspecto", "advanced_settings": "Configuración Avanzada", "steps": "Pasos", "seed": "Semilla", "random_seed": "Semilla Aleatoria", "generate_btn": "Generar", "generated_image": "Imagen Generada", "enhanced_prompt": "Prompt Mejorado", "seed_used": "Semilla Usada", "share": "Compartir", "ai_description": "**Generador de Prompts con IA** - Sube una imagen, analízala con GLM-4.6V, y genera prompts optimizados.", "upload_image": "Subir Imagen", "analyze_btn": "Analizar Imagen", "image_description": "Descripción de la Imagen", "changes_request": "¿Qué cambios quieres?", "changes_placeholder": "ej., 'estilo acuarela' o 'iluminación de atardecer dramático'", "target_style": "Estilo Objetivo", "generate_prompt_btn": "Generar Prompt", "generated_prompt": "Prompt Generado", "send_to_transform": "Enviar a Transformar", "how_to_use": "Cómo Usar", "how_to_use_content": """1. **Sube** una imagen y haz clic en "Analizar Imagen" 2. **Describe** los cambios que quieres 3. **Genera** un prompt optimizado 4. **Envía** a la pestaña Transformar para aplicar cambios""", "transform_description": "**Transforma tu imagen** - Sube y describe la transformación. Menor fuerza = sutil, mayor = dramático.", "transformation_prompt": "Prompt de Transformación", "transform_placeholder": "ej., 'estilo pintura al óleo, colores vibrantes'", "strength": "Fuerza", "transform_btn": "Transformar", "transformed_image": "Imagen Transformada", "example_prompts": "Prompts de Ejemplo", "models": "Modelos", "by": "por", }, "Português (BR)": { "title": "Z Image Turbo + GLM-4.6V", "subtitle": "Geração e Transformação de Imagens com IA alimentado por DeepSeek Reasoning", "like_msg": "Se você gostou, por favor curta. Obrigado!", "tab_generate": "Gerar", "tab_assistant": "Assistente IA", "tab_transform": "Transformar", "prompt": "Prompt", "prompt_placeholder": "Descreva sua imagem em detalhes...", "polish_checkbox": "Prompt+ por deepseek-reasoner", "style": "Estilo", "aspect_ratio": "Proporção", "advanced_settings": "Configurações Avançadas", "steps": "Passos", "seed": "Semente", "random_seed": "Semente Aleatória", "generate_btn": "Gerar", "generated_image": "Imagem Gerada", "enhanced_prompt": "Prompt Aprimorado", "seed_used": "Semente Usada", "share": "Compartilhar", "ai_description": "**Gerador de Prompts com IA** - Envie uma imagem, analise com GLM-4.6V, e gere prompts otimizados.", "upload_image": "Enviar Imagem", "analyze_btn": "Analisar Imagem", "image_description": "Descrição da Imagem", "changes_request": "Quais mudanças você quer?", "changes_placeholder": "ex., 'estilo aquarela' ou 'iluminação dramática de pôr do sol'", "target_style": "Estilo Alvo", "generate_prompt_btn": "Gerar Prompt", "generated_prompt": "Prompt Gerado", "send_to_transform": "Enviar para Transformar", "how_to_use": "Como Usar", "how_to_use_content": """1. **Envie** uma imagem e clique em "Analisar Imagem" 2. **Descreva** as mudanças que você quer 3. **Gere** um prompt otimizado 4. **Envie** para a aba Transformar para aplicar mudanças""", "transform_description": "**Transforme sua imagem** - Envie e descreva a transformação. Menor força = sutil, maior = dramático.", "transformation_prompt": "Prompt de Transformação", "transform_placeholder": "ex., 'estilo pintura a óleo, cores vibrantes'", "strength": "Força", "transform_btn": "Transformar", "transformed_image": "Imagem Transformada", "example_prompts": "Prompts de Exemplo", "models": "Modelos", "by": "por", }, "العربية": { "title": "Z Image Turbo + GLM-4.6V", "subtitle": "توليد وتحويل الصور بالذكاء الاصطناعي مدعوم من DeepSeek Reasoning", "like_msg": "إذا أعجبك، يرجى الإعجاب. شكراً لك!", "tab_generate": "توليد", "tab_assistant": "مساعد الذكاء الاصطناعي", "tab_transform": "تحويل", "prompt": "الوصف", "prompt_placeholder": "صف صورتك بالتفصيل...", "polish_checkbox": "تحسين+ بواسطة deepseek-reasoner", "style": "النمط", "aspect_ratio": "نسبة العرض", "advanced_settings": "إعدادات متقدمة", "steps": "الخطوات", "seed": "البذرة", "random_seed": "بذرة عشوائية", "generate_btn": "توليد", "generated_image": "الصورة المولدة", "enhanced_prompt": "الوصف المحسن", "seed_used": "البذرة المستخدمة", "share": "مشاركة", "ai_description": "**مولد الأوصاف بالذكاء الاصطناعي** - ارفع صورة، حللها باستخدام GLM-4.6V، ثم أنشئ أوصافاً محسنة.", "upload_image": "رفع صورة", "analyze_btn": "تحليل الصورة", "image_description": "وصف الصورة", "changes_request": "ما التغييرات التي تريدها؟", "changes_placeholder": "مثال: 'نمط ألوان مائية' أو 'إضاءة غروب درامية'", "target_style": "النمط المستهدف", "generate_prompt_btn": "توليد الوصف", "generated_prompt": "الوصف المولد", "send_to_transform": "إرسال إلى التحويل", "how_to_use": "كيفية الاستخدام", "how_to_use_content": """1. **ارفع** صورة وانقر على "تحليل الصورة" 2. **صف** التغييرات التي تريدها 3. **أنشئ** وصفاً محسناً 4. **أرسل** إلى تبويب التحويل لتطبيق التغييرات""", "transform_description": "**حوّل صورتك** - ارفع وصف التحويل. قوة أقل = تغيير طفيف، قوة أكبر = تغيير جذري.", "transformation_prompt": "وصف التحويل", "transform_placeholder": "مثال: 'نمط لوحة زيتية، ألوان نابضة'", "strength": "القوة", "transform_btn": "تحويل", "transformed_image": "الصورة المحولة", "example_prompts": "أمثلة الأوصاف", "models": "النماذج", "by": "بواسطة", }, "हिंदी": { "title": "Z Image Turbo + GLM-4.6V", "subtitle": "DeepSeek Reasoning द्वारा संचालित AI छवि निर्माण और रूपांतरण", "like_msg": "अगर आपको पसंद आया, तो कृपया लाइक करें। धन्यवाद!", "tab_generate": "बनाएं", "tab_assistant": "AI सहायक", "tab_transform": "रूपांतरित करें", "prompt": "प्रॉम्प्ट", "prompt_placeholder": "अपनी छवि का विस्तार से वर्णन करें...", "polish_checkbox": "Prompt+ by deepseek-reasoner", "style": "शैली", "aspect_ratio": "पक्षानुपात", "advanced_settings": "उन्नत सेटिंग्स", "steps": "चरण", "seed": "बीज", "random_seed": "यादृच्छिक बीज", "generate_btn": "बनाएं", "generated_image": "बनाई गई छवि", "enhanced_prompt": "उन्नत प्रॉम्प्ट", "seed_used": "प्रयुक्त बीज", "share": "साझा करें", "ai_description": "**AI-संचालित प्रॉम्प्ट जनरेटर** - एक छवि अपलोड करें, GLM-4.6V से विश्लेषण करें, फिर अनुकूलित प्रॉम्प्ट बनाएं।", "upload_image": "छवि अपलोड करें", "analyze_btn": "छवि विश्लेषण करें", "image_description": "छवि विवरण", "changes_request": "आप क्या बदलाव चाहते हैं?", "changes_placeholder": "उदा., 'वॉटरकलर शैली' या 'नाटकीय सूर्यास्त प्रकाश'", "target_style": "लक्ष्य शैली", "generate_prompt_btn": "प्रॉम्प्ट बनाएं", "generated_prompt": "बनाया गया प्रॉम्प्ट", "send_to_transform": "रूपांतरण टैब पर भेजें", "how_to_use": "कैसे उपयोग करें", "how_to_use_content": """1. **अपलोड** करें एक छवि और "छवि विश्लेषण करें" पर क्लिक करें 2. **वर्णन** करें जो बदलाव आप चाहते हैं 3. **बनाएं** एक अनुकूलित प्रॉम्प्ट 4. **भेजें** रूपांतरण टैब पर बदलाव लागू करने के लिए""", "transform_description": "**अपनी छवि रूपांतरित करें** - अपलोड करें और रूपांतरण का वर्णन करें। कम शक्ति = सूक्ष्म, अधिक = नाटकीय।", "transformation_prompt": "रूपांतरण प्रॉम्प्ट", "transform_placeholder": "उदा., 'तेल चित्रकला शैली, जीवंत रंग'", "strength": "शक्ति", "transform_btn": "रूपांतरित करें", "transformed_image": "रूपांतरित छवि", "example_prompts": "उदाहरण प्रॉम्प्ट", "models": "मॉडल", "by": "द्वारा", }, } def get_text(lang: str, key: str) -> str: """Get translated text for a key.""" return TRANSLATIONS.get(lang, TRANSLATIONS["English"]).get(key, key) def change_language(lang_name: str): """Update all component labels when language changes.""" t = TRANSLATIONS.get(lang_name, TRANSLATIONS["English"]) return [ # Generate tab gr.update(label=t["prompt"], placeholder=t["prompt_placeholder"]), gr.update(label=t["polish_checkbox"]), gr.update(label=t["style"]), gr.update(label=t["aspect_ratio"]), gr.update(label=t["steps"]), gr.update(label=t["seed"]), gr.update(label=t["random_seed"]), gr.update(value=t["generate_btn"]), gr.update(label=t["generated_image"]), gr.update(label=t["enhanced_prompt"]), gr.update(label=t["seed_used"]), gr.update(value=t["share"]), # AI Assistant tab gr.update(value=t["ai_description"]), gr.update(label=t["upload_image"]), gr.update(value=t["analyze_btn"]), gr.update(label=t["image_description"]), gr.update(label=t["changes_request"], placeholder=t["changes_placeholder"]), gr.update(label=t["target_style"]), gr.update(value=t["generate_prompt_btn"]), gr.update(label=t["generated_prompt"]), gr.update(value=t["send_to_transform"]), gr.update(value=t["how_to_use_content"]), # Transform tab gr.update(value=t["transform_description"]), gr.update(label=t["upload_image"]), gr.update(label=t["transformation_prompt"], placeholder=t["transform_placeholder"]), gr.update(label=t["polish_checkbox"]), gr.update(label=t["style"]), gr.update(label=t["strength"]), gr.update(label=t["steps"]), gr.update(label=t["seed"]), gr.update(label=t["random_seed"]), gr.update(value=t["transform_btn"]), gr.update(label=t["transformed_image"]), gr.update(label=t["enhanced_prompt"]), gr.update(label=t["seed_used"]), gr.update(value=t["share"]), ] # ============================================================================= # Constants (replaces magic numbers) MIN_IMAGE_DIM = 512 MAX_IMAGE_DIM = 2048 IMAGE_ALIGNMENT = 16 API_TIMEOUT = 90.0 API_MAX_RETRIES = 2 MAX_DESCRIPTION_LENGTH = 1200 # For GLM prompt generation # Enable optimized backends (SDPA uses FlashAttention when available) torch.backends.cuda.enable_flash_sdp(True) torch.backends.cuda.enable_mem_efficient_sdp(True) torch.backends.cudnn.benchmark = True # Enable TF32 for better performance on Ampere+ GPUs torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True # Singleton clients with timeout and retry _deepseek_client: Optional[OpenAI] = None _glm_client: Optional[OpenAI] = None def get_deepseek_client() -> Optional[OpenAI]: """Get DeepSeek API client (singleton with timeout).""" global _deepseek_client if _deepseek_client is None: api_key = os.environ.get("DEEPSEEK_API_KEY") if not api_key: logger.warning("DEEPSEEK_API_KEY not configured") return None _deepseek_client = OpenAI( base_url="https://api.deepseek.com", api_key=api_key, timeout=API_TIMEOUT, max_retries=API_MAX_RETRIES, ) return _deepseek_client def polish_prompt(original_prompt: str, mode: str = "generate") -> str: """Expand short prompts into detailed, high-quality prompts using deepseek-reasoner.""" if not original_prompt or not original_prompt.strip(): if mode == "transform": return "high quality, enhanced details, professional finish" return "Ultra HD, 4K, cinematic composition, highly detailed" client = get_deepseek_client() if not client: return original_prompt if mode == "transform": system_prompt = """ROLE: Expert prompt engineer for AI image-to-image transformation. TASK: Rewrite the user's input into a precise, technical prompt describing the target visual result. STRICT RULES: - MAXIMUM 80 WORDS (strict limit, count carefully) - Focus on: artistic style, color palette, lighting, texture, rendering technique, mood - Describe HOW the image should look, not what to change - No action words like "transform", "convert", "change" - Present tense, as if describing the final image OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count.""" else: system_prompt = """ROLE: Expert prompt engineer for AI image generation. TASK: Expand the user's input into a detailed, expressive prompt for stunning image generation. STRICT RULES: - MAXIMUM 100 WORDS (strict limit, count carefully) - Be descriptive about: subject, lighting, atmosphere, style, composition, details - Use vivid, specific language - Include artistic style references when appropriate OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count.""" try: response = client.chat.completions.create( model="deepseek-reasoner", max_tokens=350, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": original_prompt} ], ) msg = response.choices[0].message content = msg.content if msg.content else "" # If content is empty, try to extract final answer from reasoning_content if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content: text = msg.reasoning_content.strip() paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] if paragraphs: content = paragraphs[-1] if content: content = content.strip().replace("\n", " ") if "" in content: content = content.split("")[-1].strip() if content.startswith('"') and content.endswith('"'): content = content[1:-1] max_words = 80 if mode == "transform" else 100 words = content.split() if len(words) > max_words: content = " ".join(words[:max_words]) return content return original_prompt except Exception as e: logger.error(f"polish_prompt failed: {type(e).__name__}: {str(e)}") return original_prompt # GLM-4V Vision AI functions (runs on CPU - API calls) def get_glm_client() -> Optional[OpenAI]: """Get GLM API client (singleton with timeout).""" global _glm_client if _glm_client is None: api_key = os.environ.get("GLM_API_KEY") if not api_key: return None _glm_client = OpenAI( base_url="https://api.z.ai/api/paas/v4", api_key=api_key, timeout=API_TIMEOUT, max_retries=API_MAX_RETRIES, ) return _glm_client def encode_image_base64(image: Optional[Image.Image]) -> Optional[str]: """Convert PIL image to base64 with proper memory cleanup.""" if image is None: return None buf = io.BytesIO() try: image.save(buf, format='JPEG', quality=90) # JPEG is faster for API calls buf.seek(0) return base64.b64encode(buf.getvalue()).decode('utf-8') finally: buf.close() def clean_glm_response(text: str) -> str: """Remove GLM special tokens and clean up text.""" if not text: return "" text = text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '') text = text.strip() return text def is_thinking_text(text: str) -> bool: """Check if text looks like GLM thinking/reasoning rather than actual content.""" if not text: return True text_lower = text.lower().strip() # Reject if starts with planning/markdown headers planning_starts = ( '**plan', '## plan', '# plan', 'plan:', '**step', '## step', '# step', '**analysis', '**approach', '**strategy', 'here is my', 'here\'s my', ) if any(text_lower.startswith(pat) for pat in planning_starts): return True # Reject if starts with clear meta-language thinking_starts = ( 'let me ', 'i need to', 'i should ', 'i will ', "i'll ", 'got it', 'okay, ', 'okay ', 'alright, ', 'alright ', 'the user ', 'the request ', 'based on ', 'following the ', 'now i ', 'my prompt ', 'for this task', 'considering ', 'understood', 'i understand', 'sure, ', 'sure ', '1. ', '1) ', # Numbered lists = planning ) if any(text_lower.startswith(pat) for pat in thinking_starts): return True # Check for planning phrases ANYWHERE in text (these are NEVER in good prompts) planning_phrases = ( 'i need to describe', 'i should ', 'i\'ll describe', 'i\'ll keep', 'i will describe', 'i will keep', 'this includes', 'the key change', 'key part of the scene', 'is a defining feature', 'is crucial', 'is important', 'should remain', 'should be', '**main subject:**', '**weapon:**', '**setting:**', '**mood:**', '**colors', '**lighting', '**plan:**', ) if any(phrase in text_lower for phrase in planning_phrases): return True return False def analyze_image_with_glm(image: Optional[Image.Image]) -> str: """Analyze image using GLM-4V and return description. FIXED: Removed double filtering, lowered thresholds, added debug logging. """ if image is None: return "Please upload an image first." client = get_glm_client() if not client: return "GLM API key not configured. Please add GLM_API_KEY to space secrets." try: base64_image = encode_image_base64(image) response = client.chat.completions.create( model="glm-4.6v-flash", messages=[ { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} }, { "type": "text", "text": """Write a DETAILED image description. LENGTH: 400-500 TOKENS. This is important - DO NOT stop early, write a FULL detailed description until you reach 500 tokens. START DIRECTLY with the main subject. NO meta-language, NO preamble. WRONG starts: "This image shows...", "I can see...", "The image depicts...", "Here is..." CORRECT starts: "A woman in red dress...", "Golden sunset over mountains...", "Vintage car parked..." DESCRIBE IN DETAIL (use ALL 250-350 tokens): - Main subject: appearance, clothing, pose, expression, features - Setting: environment, location, architecture, objects nearby - Colors: specific hues, color palette, dominant colors - Lighting: source, quality, shadows, highlights, time of day - Textures: materials (silk, metal, wood, fabric, skin) - Atmosphere: mood, emotion, feeling, energy - Background: secondary elements, depth, perspective - Small details: accessories, decorations, patterns OUTPUT FORMAT: One continuous paragraph, 400-500 tokens. No bullet points, no sections. Keep writing until you reach 500 tokens. Write the complete detailed description now:""" } ] } ], max_tokens=1000, ) msg = response.choices[0].message raw_content = msg.content if msg.content else "" # Debug logging logger.debug(f"GLM Analyze: raw content length={len(raw_content)}") if raw_content: logger.debug(f"GLM Analyze preview: {raw_content[:200]}...") # For image descriptions, use the FULL content (don't split by paragraphs) # Only apply minimal cleaning result = clean_glm_response(raw_content) # Remove common bad starts but keep the rest bad_starts = ('here is', 'here\'s', 'the image shows', 'this image', 'i can see') result_lower = result.lower() for bad in bad_starts: if result_lower.startswith(bad): # Find the first period or comma and start after it for i, c in enumerate(result): if c in '.,:' and i < 50: result = result[i+1:].strip() break break # Strip quotes result = result.strip('"\'""') # If content is too short, try reasoning_content if len(result) < 100: if hasattr(msg, 'reasoning_content') and msg.reasoning_content: reasoning = clean_glm_response(msg.reasoning_content) # Take the longest paragraph from reasoning as fallback paragraphs = [p.strip() for p in reasoning.split('\n\n') if len(p.strip()) > 50] if paragraphs: longest = max(paragraphs, key=len) if len(longest) > len(result): result = longest.strip('"\'""') logger.debug(f"GLM Analyze: using reasoning content ({len(result)} chars)") if result and len(result) >= 50: logger.info(f"GLM Analyze: success ({len(result)} chars)") return result error_details = f"content_len={len(raw_content)}" logger.warning(f"GLM Analyze: result too short ({error_details})") return f"Description too short ({error_details}). Please try again." except Exception as e: logger.error(f"GLM Analyze exception: {type(e).__name__}: {str(e)}") return f"Error analyzing image: {str(e)}" def generate_prompt_with_glm(image_description: str, user_request: str, style: str) -> str: """Generate transformation prompt using GLM based on image description and user request. FIXED: Removed double filtering, lowered thresholds, added debug logging. """ if not image_description or image_description.startswith("Please") or image_description.startswith("Error") or image_description.startswith("GLM API") or image_description.startswith("Could not"): return "Please analyze the image first." if not user_request or not user_request.strip(): return "Please describe what changes you want." client = get_glm_client() if not client: return "GLM API key not configured. Please add GLM_API_KEY to space secrets." style_hint = f" Style: {style}." if style and style != "None" else "" desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description try: response = client.chat.completions.create( model="glm-4.6v-flash", messages=[ { "role": "user", "content": f"""TASK: Write an image prompt describing the FINAL transformed scene. ORIGINAL: {desc} CHANGE: {user_request}{style_hint} CRITICAL OUTPUT RULES: - Output ONLY the final prompt text (80-120 words) - Start directly with the main subject (e.g., "A cyberpunk samurai...") - NO planning, NO thinking, NO explanations, NO numbered lists - NO phrases like "I will", "I should", "The key change is" - ONE paragraph describing the final image as if it already exists OUTPUT THE PROMPT NOW (nothing else):""" } ], max_tokens=1000, ) msg = response.choices[0].message raw_content = msg.content if msg.content else "" # Debug logging logger.debug(f"GLM Prompt: raw content length={len(raw_content)}") if raw_content: logger.debug(f"GLM Prompt preview: {raw_content[:200]}...") # Use FULL content (don't split by paragraphs) result = clean_glm_response(raw_content) # Remove thinking starts but keep the rest result_lower = result.lower() bad_starts = ('here is', 'here\'s', 'sure,', 'sure ', 'okay,', 'okay ') for bad in bad_starts: if result_lower.startswith(bad): for i, c in enumerate(result): if c in '.,:' and i < 30: result = result[i+1:].strip() break break # Check if it's thinking text if is_thinking_text(result): # Try reasoning_content if hasattr(msg, 'reasoning_content') and msg.reasoning_content: reasoning = clean_glm_response(msg.reasoning_content) paragraphs = [p.strip() for p in reasoning.split('\n\n') if len(p.strip()) > 50 and not is_thinking_text(p)] if paragraphs: result = max(paragraphs, key=len) logger.debug(f"GLM Prompt: using reasoning ({len(result)} chars)") result = result.strip('"\'""') if result and len(result) >= 50: logger.info(f"GLM Prompt: success ({len(result)} chars)") return result error_details = f"content_len={len(raw_content)}" logger.warning(f"GLM Prompt: failed ({error_details})") return f"Could not generate prompt ({error_details}). Please try again." except Exception as e: logger.error(f"GLM Prompt exception: {type(e).__name__}: {str(e)}") return f"Error: {str(e)}" logger.info("Loading Z-Image-Turbo pipeline...") pipe_t2i = DiffusionPipeline.from_pretrained( "Tongyi-MAI/Z-Image-Turbo", torch_dtype=torch.bfloat16, # Set dtype at load time for efficiency ) pipe_t2i.to("cuda") # Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization pipe_i2i = ZImageImg2ImgPipeline( transformer=pipe_t2i.transformer, vae=pipe_t2i.vae, text_encoder=pipe_t2i.text_encoder, tokenizer=pipe_t2i.tokenizer, scheduler=pipe_t2i.scheduler, ) logger.info("Pipelines ready! (TF32 + SDPA optimizations enabled)") STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art", "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"] STYLE_SUFFIXES = { "None": "", "Photorealistic": ", photorealistic, ultra detailed, 8k, professional photography", "Cinematic": ", cinematic lighting, movie scene, dramatic atmosphere, film grain", "Anime": ", anime style, vibrant colors, cel shaded, studio ghibli inspired", "Digital Art": ", digital art, artstation trending, concept art, highly detailed", "Oil Painting": ", oil painting style, classical art, brush strokes visible", "Watercolor": ", watercolor painting, soft edges, artistic, delicate colors", "3D Render": ", 3D render, octane render, unreal engine 5, ray tracing", "Fantasy": ", fantasy art, magical, ethereal glow, mystical atmosphere", "Sci-Fi": ", science fiction, futuristic, advanced technology, neon accents", } RATIOS = [ "1:1 Square (1024x1024)", "16:9 Landscape (1344x768)", "9:16 Portrait (768x1344)", "4:3 Standard (1152x896)", "3:4 Vertical (896x1152)", "21:9 Cinematic (1536x640)", "3:2 Photo (1216x832)", "2:3 Photo Portrait (832x1216)", "1:1 XL (1536x1536)", "16:9 XL (1920x1088)", "9:16 XL (1088x1920)", "4:3 XL (1536x1152)", "3:4 XL (1152x1536)", "1:1 MAX (2048x2048)", "16:9 MAX (2048x1152)", "9:16 MAX (1152x2048)", "4:3 MAX (2048x1536)", "3:4 MAX (1536x2048)", ] RATIO_DIMS = { "1:1 Square (1024x1024)": (1024, 1024), "16:9 Landscape (1344x768)": (1344, 768), "9:16 Portrait (768x1344)": (768, 1344), "4:3 Standard (1152x896)": (1152, 896), "3:4 Vertical (896x1152)": (896, 1152), "21:9 Cinematic (1536x640)": (1536, 640), "3:2 Photo (1216x832)": (1216, 832), "2:3 Photo Portrait (832x1216)": (832, 1216), "1:1 XL (1536x1536)": (1536, 1536), "16:9 XL (1920x1088)": (1920, 1088), "9:16 XL (1088x1920)": (1088, 1920), "4:3 XL (1536x1152)": (1536, 1152), "3:4 XL (1152x1536)": (1152, 1536), "1:1 MAX (2048x2048)": (2048, 2048), "16:9 MAX (2048x1152)": (2048, 1152), "9:16 MAX (1152x2048)": (1152, 2048), "4:3 MAX (2048x1536)": (2048, 1536), "3:4 MAX (1536x2048)": (1536, 2048), } EXAMPLES_GENERATE = [ ["Ancient dragon perched on a crumbling Gothic cathedral at dusk, stormy purple sky with lightning bolts in the distance", "Fantasy", "1:1 Square (1024x1024)", 9, 42, True], ["Bioluminescent jellyfish drifting through a dark alien ocean, twin moons glowing above the water surface", "Digital Art", "9:16 Portrait (768x1344)", 9, 42, True], ["Elderly craftsman with weathered hands repairing an intricate brass clockwork mechanism, warm workshop light", "Photorealistic", "4:3 Standard (1152x896)", 9, 42, True], ["Cyberpunk samurai warrior standing in a neon-lit rainy alley, glowing armor circuits reflected in puddles", "Sci-Fi", "3:4 Vertical (896x1152)", 9, 42, True], ["Victorian lady in burgundy silk gown standing in a grand European ballroom, crystal chandeliers above marble floors, warm golden sunlight streaming through tall arched windows", "Oil Painting", "4:3 XL (1536x1152)", 9, 42, True], ["Abandoned Tokyo streets slowly reclaimed by wild nature, pink cherry blossoms covering cracked pavement, a small deer grazing near rusty cars, thick morning fog rolling between overgrown skyscrapers, green vines climbing broken windows, peaceful post-apocalyptic silence", "Cinematic", "16:9 XL (1920x1088)", 9, 42, True], ["Alien desert planet with giant floating rock islands in the sky connected by rope bridges, camel caravans walking on the sandy ground far below, dramatic sunset with orange purple and gold colors across the sky, epic wide landscape view", "Fantasy", "16:9 MAX (2048x1152)", 9, 42, True], ["Cozy witch cottage interior on a stormy autumn night, iron cauldrons bubbling with green smoke, wooden shelves packed with glowing potion bottles and ancient spell books, a sleepy black cat curled by the stone fireplace, bundles of dried herbs and garlic hanging from dark oak ceiling beams, warm amber candlelight flickering throughout the room", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True], ] EXAMPLES_TRANSFORM = [ ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True], ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True], ["Japanese anime style with vibrant colors and cel shading", "Anime", 0.75, 9, 42, True], ["Digital concept art style, trending on artstation", "Digital Art", 0.6, 9, 42, True], ["Classical oil painting with visible brush strokes and rich colors", "Oil Painting", 0.7, 9, 42, True], ["Soft watercolor painting with delicate washes and gentle edges", "Watercolor", 0.65, 9, 42, True], ["High quality 3D render with ray tracing and realistic materials", "3D Render", 0.7, 9, 42, True], ["Magical fantasy art with ethereal glow and mystical atmosphere", "Fantasy", 0.65, 9, 42, True], ["Futuristic sci-fi style with neon accents and advanced technology", "Sci-Fi", 0.7, 9, 42, True], ["Enhanced version with improved details and quality", "None", 0.4, 9, 42, True], ] def upload_to_hf_cdn(image: Optional[Image.Image]) -> str: """Upload image to HuggingFace CDN with proper memory cleanup.""" if image is None: return "No image to share" buf = io.BytesIO() try: image.save(buf, format='PNG') buf.seek(0) response = requests.post( "https://huggingface.co/uploads", headers={"Content-Type": "image/png"}, data=buf.getvalue(), timeout=30, ) if response.status_code == 200: return response.text.strip() return f"Upload failed: {response.status_code}" except requests.Timeout: return "Upload timed out. Please try again." except Exception as e: logger.error(f"upload_to_hf_cdn failed: {type(e).__name__}: {str(e)}") return "Upload error. Please try again." finally: buf.close() def do_polish_prompt(prompt: str, style: str, do_polish: bool, mode: str = "generate") -> Tuple[str, str]: """Polish prompt before generation (runs on CPU, before GPU allocation).""" if not prompt or not prompt.strip(): return "", "" base_prompt = prompt.strip() if do_polish: polished = polish_prompt(base_prompt, mode=mode) else: polished = base_prompt final_prompt = polished + STYLE_SUFFIXES.get(style, "") return final_prompt, polished def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tuple[str, str]: """Polish prompt for transformation (style-focused).""" if not do_polish: base = prompt.strip() if prompt else "high quality image" final = base + STYLE_SUFFIXES.get(style, "") return final, "" return do_polish_prompt(prompt, style, True, mode="transform") @spaces.GPU(duration=60) def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: """Generate image from text prompt.""" if randomize: seed = torch.randint(0, 2**32 - 1, (1,)).item() seed = int(seed) if not full_prompt.strip(): return None, seed try: w, h = RATIO_DIMS.get(ratio, (1024, 1024)) generator = torch.Generator("cuda").manual_seed(seed) image = pipe_t2i( prompt=full_prompt, height=h, width=w, num_inference_steps=int(steps), guidance_scale=0.0, generator=generator, ).images[0] return image, seed except Exception as e: logger.error(f"Generation failed: {type(e).__name__}: {str(e)}") return None, seed @spaces.GPU(duration=45) def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: """Transform image using prompt guidance.""" if input_image is None: return None, 0 if randomize: seed = torch.randint(0, 2**32 - 1, (1,)).item() seed = int(seed) if not full_prompt.strip(): full_prompt = "high quality image, enhanced details" try: input_image = input_image.convert("RGB") w, h = input_image.size w = (w // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT h = (h // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT w = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, w)) h = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, h)) input_image = input_image.resize((w, h), Image.Resampling.BILINEAR) strength = float(strength) effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) generator = torch.Generator("cuda").manual_seed(seed) image = pipe_i2i( prompt=full_prompt, image=input_image, strength=strength, num_inference_steps=effective_steps, guidance_scale=0.0, generator=generator, ).images[0] return image, seed except Exception as e: logger.error(f"Transform failed: {type(e).__name__}: {str(e)}") return None, seed css = r""" /* Google Fonts for multilingual support */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+Arabic:wght@400;500;600;700&family=Noto+Sans+Devanagari:wght@400;500;600;700&display=swap'); :root { --bg-primary: #0c0c0e; --bg-secondary: #141416; --bg-tertiary: #1c1c20; --surface: #232328; --surface-hover: #2a2a30; --accent-primary: #818cf8; --accent-secondary: #a78bfa; --accent-hover: #6366f1; --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); --accent-glow: rgba(99, 102, 241, 0.4); --text-primary: #f4f4f5; --text-secondary: #a1a1aa; --text-muted: #71717a; --border-subtle: rgba(255, 255, 255, 0.08); --border-default: rgba(255, 255, 255, 0.12); --success: #10b981; --warning: #f59e0b; --error: #ef4444; --shadow-sm: 0 1px 2px rgba(0,0,0,0.3); --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.4); --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.5); --shadow-glow: 0 0 20px var(--accent-glow); --radius-sm: 8px; --radius-md: 12px; --radius-lg: 16px; --transition: 0.2s ease; /* Font stacks */ --font-latin: 'Inter', -apple-system, BlinkMacSystemFont, system-ui, sans-serif; --font-arabic: 'Noto Sans Arabic', 'Tahoma', sans-serif; --font-hindi: 'Noto Sans Devanagari', 'Mangal', sans-serif; } /* Arabic font */ .lang-ar, .lang-ar * { font-family: var(--font-arabic) !important; } /* Hindi font */ .lang-hi, .lang-hi * { font-family: var(--font-hindi) !important; } /* RTL Support for Arabic */ [dir="rtl"], .rtl { direction: rtl; text-align: right; } [dir="rtl"] .tab-nav { flex-direction: row-reverse; } [dir="rtl"] .gr-row, [dir="rtl"] [class*="row"] { flex-direction: row-reverse; } [dir="rtl"] input, [dir="rtl"] textarea { text-align: right; direction: rtl; } [dir="rtl"] input[type="number"] { direction: ltr; text-align: left; } [dir="rtl"] label, [dir="rtl"] .gr-label { text-align: right; } [dir="rtl"] .gr-checkbox { flex-direction: row-reverse; } [dir="rtl"] .gr-slider { direction: ltr; } [dir="rtl"] .gr-markdown ul, [dir="rtl"] .gr-markdown ol { padding-left: 0; padding-right: 1.5em; } /* Language selector in header */ .lang-selector-row { display: flex; justify-content: flex-end; margin-bottom: 8px; } [dir="rtl"] .lang-selector-row { justify-content: flex-start; } .gradio-container { background: var(--bg-primary) !important; min-height: 100vh; color: var(--text-primary); } .tabs { background: transparent !important; padding: 8px 0; } .tab-nav { background: var(--bg-secondary) !important; border: 1px solid var(--border-subtle) !important; border-radius: var(--radius-lg); padding: 6px; gap: 6px; margin-bottom: 20px; display: flex; justify-content: center; flex-wrap: wrap; } .tab-nav > button { background: transparent !important; color: var(--text-secondary) !important; border: none !important; border-radius: var(--radius-md); padding: 12px 24px; font-weight: 500; font-size: 0.95rem; cursor: pointer; transition: all var(--transition); } .tab-nav > button:hover { background: var(--bg-tertiary) !important; color: var(--text-primary) !important; } .tab-nav > button.selected, .tab-nav > button[aria-selected="true"], [role="tab"][aria-selected="true"] { background: var(--accent-gradient) !important; color: white !important; font-weight: 600; box-shadow: var(--shadow-glow); } button.primary, .primary { background: var(--accent-gradient) !important; border: none !important; border-radius: var(--radius-md); font-weight: 600; padding: 12px 24px; color: white !important; cursor: pointer; transition: all var(--transition); box-shadow: var(--shadow-md); } button.primary:hover, .primary:hover { box-shadow: var(--shadow-glow), var(--shadow-lg); filter: brightness(1.1); } button.secondary, .secondary { background: var(--surface) !important; color: var(--text-primary) !important; border: 1px solid var(--border-default) !important; border-radius: var(--radius-sm); cursor: pointer; transition: all var(--transition); } button.secondary:hover, .secondary:hover { background: var(--surface-hover) !important; border-color: var(--accent-primary) !important; } .block { background: var(--bg-secondary) !important; border: 1px solid var(--border-subtle) !important; border-radius: var(--radius-lg) !important; box-shadow: var(--shadow-sm); padding: 20px; margin: 8px 0; transition: all var(--transition); } .tabitem { background: transparent !important; padding: 16px 0; } input, textarea, .gr-input, .gr-textbox textarea { background: var(--bg-tertiary) !important; border: 1px solid var(--border-default) !important; border-radius: var(--radius-sm) !important; color: var(--text-primary) !important; transition: all var(--transition); } input:focus, textarea:focus { border-color: var(--accent-primary) !important; box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; outline: none !important; } .gr-dropdown, select { background: var(--bg-tertiary) !important; border: 1px solid var(--border-default) !important; border-radius: var(--radius-sm) !important; color: var(--text-primary) !important; } .gr-slider input[type="range"] { accent-color: var(--accent-primary); } .gr-checkbox input[type="checkbox"] { accent-color: var(--accent-primary); } label, .gr-label { color: var(--text-secondary) !important; font-weight: 500; } .gr-image, .image-container { background: var(--bg-tertiary) !important; border: 2px dashed var(--border-default) !important; border-radius: var(--radius-lg) !important; transition: all var(--transition); } .gr-image:hover { border-color: var(--accent-primary) !important; } .gr-image img { border-radius: var(--radius-md); } /* Examples table - Dark theme (stable selectors only) */ .examples, .gr-examples, [class*="example"], [class*="Example"], div[class*="example"], div[class*="sample"], .sample-table, [data-testid="examples"], [data-testid*="example"] { background: var(--bg-secondary) !important; border-radius: var(--radius-lg) !important; } /* Table itself */ .examples table, .gr-examples table, [class*="example"] table, [data-testid="examples"] table { background: var(--bg-secondary) !important; border-collapse: collapse !important; width: 100% !important; } /* All rows */ .examples tr, .gr-examples tr, [class*="example"] tr, [data-testid="examples"] tr { background: var(--bg-secondary) !important; border-bottom: 1px solid var(--border-default) !important; } /* Row hover */ .examples tr:hover, .gr-examples tr:hover, [class*="example"] tr:hover, [data-testid="examples"] tr:hover { background: var(--surface) !important; } /* Table cells */ .examples td, .gr-examples td, [class*="example"] td, [data-testid="examples"] td { color: var(--text-secondary) !important; background: transparent !important; } /* First column (prompts) - emphasized */ .examples td:first-child, [class*="example"] td:first-child, [data-testid="examples"] td:first-child { color: var(--text-primary) !important; font-weight: 500 !important; } /* Headers */ .examples th, .gr-examples th, [class*="example"] th, [data-testid="examples"] th { background: var(--surface) !important; color: var(--text-primary) !important; font-weight: 600 !important; border-bottom: 1px solid var(--border-default) !important; } /* Wrapper divs */ .examples > div, [class*="example"] > div { background: var(--bg-secondary) !important; } h1, h2, h3, h4 { color: var(--text-primary) !important; } h1 { font-size: clamp(1.5rem, 4vw, 2.2rem); font-weight: 700; } .markdown-text, .gr-markdown { color: var(--text-secondary) !important; } .gr-markdown a { color: var(--accent-primary) !important; } .gr-group { background: var(--surface) !important; border: 1px solid var(--border-subtle) !important; border-radius: var(--radius-lg) !important; padding: 16px !important; } .gr-accordion { background: var(--bg-secondary) !important; border: 1px solid var(--border-subtle) !important; border-radius: var(--radius-md) !important; } .footer-no-box { background: transparent !important; border: none !important; box-shadow: none !important; padding: 0; } .gradio-container > footer { background: var(--bg-secondary) !important; border-top: 1px solid var(--border-subtle) !important; padding: 12px 20px; } .gradio-container > footer span, .gradio-container > footer p { color: var(--text-muted) !important; } .gradio-container > footer a { color: var(--accent-primary) !important; } .progress-bar { background: var(--bg-tertiary) !important; border-radius: 4px; } .progress-bar > div { background: var(--accent-gradient) !important; border-radius: 4px; } @media (prefers-reduced-motion: reduce) { *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } } @media (max-width: 768px) { .tab-nav { padding: 4px; gap: 4px; } .tab-nav > button { padding: 10px 16px; font-size: 0.85rem; } .block { padding: 12px; margin: 6px 0; } button.primary { padding: 10px 16px; width: 100%; } h1 { font-size: 1.4rem !important; } } /* Accessibility - keyboard focus indicators */ button:focus-visible, input:focus-visible, textarea:focus-visible, select:focus-visible, [role="button"]:focus-visible { outline: 2px solid var(--accent-primary) !important; outline-offset: 2px !important; } .gr-image:focus-visible, [role="tab"]:focus-visible { outline: 2px solid var(--accent-primary) !important; outline-offset: 2px !important; } ::-webkit-scrollbar { width: 8px; height: 8px; } ::-webkit-scrollbar-track { background: var(--bg-secondary); } ::-webkit-scrollbar-thumb { background: var(--bg-tertiary); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: var(--surface); } /* Tab navigation text */ .tab-nav button, .tab-nav > button, button[role="tab"], .tabs button { color: var(--text-primary) !important; } /* Labels and spans */ label, .gr-label, .label-wrap, .label-wrap span, .gr-box label, .gr-form label, .gr-group label { color: var(--text-secondary) !important; } .gr-block span, .gr-box span, .gr-form span, .gr-group span, .block span { color: var(--text-secondary) !important; } /* Table overrides */ table thead, table thead tr, table thead th, [class*="examples"] thead th { background: var(--surface) !important; color: var(--text-primary) !important; } table tbody td, [class*="examples"] td { color: var(--text-secondary) !important; } /* Accordion and markdown */ .gr-accordion summary, .gr-accordion button, details summary, summary span { color: var(--text-primary) !important; } .gr-markdown, .gr-markdown p, .gr-markdown li, .markdown-text, .prose { color: var(--text-secondary) !important; } /* Input placeholders and buttons */ input::placeholder, textarea::placeholder { color: var(--text-muted) !important; } button.secondary, .secondary { color: var(--text-primary) !important; } /* Dropdown menus - dark theme */ .gr-dropdown ul, .gr-dropdown li, [data-testid="dropdown"] ul, .svelte-select-list, .dropdown-menu, select option, [role="listbox"], [role="listbox"] [role="option"] { background: var(--bg-tertiary) !important; color: var(--text-primary) !important; } /* Dropdown hover/selected states */ .gr-dropdown li:hover, select option:hover, [role="option"]:hover, [role="option"][aria-selected="true"] { background: var(--surface) !important; } /* Portal dropdowns (rendered outside .gradio-container) */ [data-testid="dropdown-list"], [role="listbox"]:not(.gradio-container [role="listbox"]) { background-color: var(--bg-tertiary) !important; color: var(--text-primary) !important; border: 1px solid var(--border-default) !important; border-radius: var(--radius-sm) !important; } /* Slider and checkbox labels */ .gr-slider span, .gr-slider output, .range-wrap span, input[type="range"] + span { color: var(--text-primary) !important; } .gr-checkbox label, .gr-checkbox span, input[type="checkbox"] + span { color: var(--text-secondary) !important; } /* Image upload text */ .gr-image span, .gr-image p, .upload-text, [data-testid="image"] span { color: var(--text-secondary) !important; } .gr-image svg, .upload-icon { fill: var(--text-muted) !important; } /* Error/warning states */ .gr-error, [class*="error"] { background: rgba(239,68,68,0.15) !important; color: var(--error) !important; border-color: var(--error) !important; } .gr-info, [class*="info-msg"] { background: rgba(129,140,248,0.15) !important; color: var(--accent-primary) !important; } /* Copy buttons and icons */ .gr-textbox button, button svg, .copy-button { color: var(--text-secondary) !important; fill: var(--text-secondary) !important; } .gr-textbox button:hover { color: var(--text-primary) !important; } /* Tooltips */ [role="tooltip"], .gr-tooltip, .tooltip { background: var(--surface) !important; color: var(--text-primary) !important; border: 1px solid var(--border-default) !important; } /* Progress/loading text */ .progress-text, .loading-text, [class*="loading"] span, [class*="progress"] span { color: var(--text-secondary) !important; } /* Number input spinners */ input[type="number"]::-webkit-inner-spin-button, input[type="number"]::-webkit-outer-spin-button { filter: invert(0.8); } """ # Create custom dark theme dark_theme = gr.themes.Base( primary_hue=gr.themes.colors.indigo, secondary_hue=gr.themes.colors.purple, neutral_hue=gr.themes.colors.zinc, ).set( # Backgrounds body_background_fill="#0c0c0e", body_background_fill_dark="#0c0c0e", background_fill_primary="#141416", background_fill_primary_dark="#141416", background_fill_secondary="#1c1c20", background_fill_secondary_dark="#1c1c20", # Borders border_color_primary="rgba(255,255,255,0.12)", border_color_primary_dark="rgba(255,255,255,0.12)", # Text body_text_color="#e5e5e5", body_text_color_dark="#e5e5e5", body_text_color_subdued="#a1a1aa", body_text_color_subdued_dark="#a1a1aa", # Blocks block_background_fill="#141416", block_background_fill_dark="#141416", block_border_color="rgba(255,255,255,0.08)", block_border_color_dark="rgba(255,255,255,0.08)", block_label_background_fill="#1c1c20", block_label_background_fill_dark="#1c1c20", block_label_text_color="#a1a1aa", block_label_text_color_dark="#a1a1aa", # Inputs input_background_fill="#1c1c20", input_background_fill_dark="#1c1c20", input_border_color="rgba(255,255,255,0.12)", input_border_color_dark="rgba(255,255,255,0.12)", # Buttons button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", button_primary_background_fill_dark="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", button_primary_text_color="white", button_primary_text_color_dark="white", button_secondary_background_fill="#232328", button_secondary_background_fill_dark="#232328", button_secondary_text_color="#e5e5e5", button_secondary_text_color_dark="#e5e5e5", # Table/Examples - CRITICAL for fixing white background table_even_background_fill="#1a1a1e", table_even_background_fill_dark="#1a1a1e", table_odd_background_fill="#1a1a1e", table_odd_background_fill_dark="#1a1a1e", table_row_focus="#252528", table_row_focus_dark="#252528", ) with gr.Blocks(title="Z Image Turbo", css=css, theme=dark_theme) as demo: # Language selector at top with gr.Row(elem_classes="lang-selector-row"): lang_selector = gr.Dropdown( choices=LANGUAGES, value="English", label="🌐 Language", scale=0, min_width=160, interactive=True ) gr.HTML("""

Z-Image Turbo + GLM-4.6V / DeepSeek-3.2 Thinking

Image Gen & Edit with GLM-4.6V + DeepSeek-3.2

If you liked it, please ❤️ like it. Thank you!

""") with gr.Tabs(): # TAB 1: Generate Image with gr.Tab("Generate"): with gr.Row(): with gr.Column(scale=2): gen_prompt = gr.Textbox(label="Prompt", placeholder="Describe your image in detail...", lines=4) gen_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) with gr.Row(): gen_style = gr.Dropdown(choices=STYLES, value="None", label="Style") gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="Aspect Ratio") with gr.Accordion("Advanced Settings", open=False): gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") with gr.Row(): gen_seed = gr.Number(label="Seed", value=42, precision=0) gen_randomize = gr.Checkbox(label="Random Seed", value=True) gen_btn = gr.Button("Generate", variant="primary", size="lg") with gr.Column(scale=3): gen_output = gr.Image(label="Generated Image", type="pil", interactive=False, height=512) gen_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4) gen_seed_out = gr.Number(label="Seed Used", interactive=False) with gr.Row(): gen_share_btn = gr.Button("Share", variant="secondary") gen_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) gen_full_prompt = gr.State("") gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize]) gen_btn.click( fn=do_polish_prompt, inputs=[gen_prompt, gen_style, gen_polish], outputs=[gen_full_prompt, gen_polished_prompt] ).then( fn=generate, inputs=[gen_full_prompt, gen_polished_prompt, gen_ratio, gen_steps, gen_seed, gen_randomize], outputs=[gen_output, gen_seed_out] ) gen_prompt.submit( fn=do_polish_prompt, inputs=[gen_prompt, gen_style, gen_polish], outputs=[gen_full_prompt, gen_polished_prompt] ).then( fn=generate, inputs=[gen_full_prompt, gen_polished_prompt, gen_ratio, gen_steps, gen_seed, gen_randomize], outputs=[gen_output, gen_seed_out] ) gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link]) # TAB 2: AI Vision Assistant with gr.Tab("AI Assistant"): ai_desc_md = gr.Markdown("**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.") with gr.Row(): with gr.Column(scale=1): ai_image = gr.Image(label="Upload Image", type="pil", height=300) ai_analyze_btn = gr.Button("Analyze Image", variant="primary") ai_description = gr.Textbox(label="Image Description", lines=5, interactive=False) with gr.Column(scale=1): ai_request = gr.Textbox(label="What changes do you want?", placeholder="e.g., 'watercolor style' or 'dramatic sunset lighting'", lines=2) ai_style = gr.Dropdown(choices=STYLES, value="None", label="Target Style") ai_generate_btn = gr.Button("Generate Prompt", variant="primary") ai_generated_prompt = gr.Textbox(label="Generated Prompt", lines=6, interactive=False) ai_send_btn = gr.Button("Send to Transform Tab", variant="primary") with gr.Accordion("How to Use", open=False): ai_howto_md = gr.Markdown(""" 1. **Upload** an image and click "Analyze Image" 2. **Describe** the changes you want 3. **Generate** an optimized prompt 4. **Send** to Transform tab to apply changes """) ai_analyze_btn.click( fn=analyze_image_with_glm, inputs=[ai_image], outputs=[ai_description] ) ai_generate_btn.click( fn=generate_prompt_with_glm, inputs=[ai_description, ai_request, ai_style], outputs=[ai_generated_prompt] ) # TAB 3: Transform Image with gr.Tab("Transform"): trans_desc_md = gr.Markdown("**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.") with gr.Row(): with gr.Column(scale=2): trans_input = gr.Image(label="Upload Image", type="pil", height=300) trans_prompt = gr.Textbox(label="Transformation Prompt", placeholder="e.g., 'oil painting style, vibrant colors'", lines=3) trans_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) with gr.Row(): trans_style = gr.Dropdown(choices=STYLES, value="None", label="Style") trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="Strength") with gr.Accordion("Advanced Settings", open=False): trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") with gr.Row(): trans_seed = gr.Number(label="Seed", value=42, precision=0) trans_randomize = gr.Checkbox(label="Random Seed", value=True) trans_btn = gr.Button("Transform", variant="primary", size="lg") with gr.Column(scale=3): trans_output = gr.Image(label="Transformed Image", type="pil", interactive=False, height=512) trans_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4) trans_seed_out = gr.Number(label="Seed Used", interactive=False) with gr.Row(): trans_share_btn = gr.Button("Share", variant="secondary") trans_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) trans_full_prompt = gr.State("") with gr.Accordion("Example Prompts", open=False): gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize]) trans_btn.click( fn=do_polish_transform_prompt, inputs=[trans_prompt, trans_style, trans_polish], outputs=[trans_full_prompt, trans_polished_prompt] ).then( fn=transform, inputs=[trans_input, trans_full_prompt, trans_polished_prompt, trans_strength, trans_steps, trans_seed, trans_randomize], outputs=[trans_output, trans_seed_out] ) trans_prompt.submit( fn=do_polish_transform_prompt, inputs=[trans_prompt, trans_style, trans_polish], outputs=[trans_full_prompt, trans_polished_prompt] ).then( fn=transform, inputs=[trans_input, trans_full_prompt, trans_polished_prompt, trans_strength, trans_steps, trans_seed, trans_randomize], outputs=[trans_output, trans_seed_out] ) trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link]) # Cross-tab handler ai_send_btn.click( fn=lambda prompt, img: (prompt, img), inputs=[ai_generated_prompt, ai_image], outputs=[trans_prompt, trans_input] ) # Language selector - update all UI labels when language changes lang_selector.change( fn=change_language, inputs=[lang_selector], outputs=[ # Generate tab (12 components) gen_prompt, gen_polish, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize, gen_btn, gen_output, gen_polished_prompt, gen_seed_out, gen_share_btn, # AI Assistant tab (10 components) ai_desc_md, ai_image, ai_analyze_btn, ai_description, ai_request, ai_style, ai_generate_btn, ai_generated_prompt, ai_send_btn, ai_howto_md, # Transform tab (14 components) trans_desc_md, trans_input, trans_prompt, trans_polish, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize, trans_btn, trans_output, trans_polished_prompt, trans_seed_out, trans_share_btn, ] ) gr.HTML( """
Models: Z-Image-Turbo | GLM-4.6V | by @lulavc
""", elem_classes="footer-no-box" ) demo.launch()