Spaces:

Csplk
/

moondream2-batch-processing

Paused

File size: 11,259 Bytes

import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from PIL import Image
import requests
import json
import base64
from io import BytesIO

# Check for CUDA availability for PyTorch
if torch.cuda.is_available():
    device, dtype = "cuda", torch.bfloat16
else:
    device, dtype = "cpu", torch.float32

# Load Moondream3 Preview for image analysis
moondream3_model_id = "moondream/moondream3-preview"
tokenizer_moondream3 = AutoTokenizer.from_pretrained(moondream3_model_id)
moondream3 = AutoModelForCausalLM.from_pretrained(
    moondream3_model_id,
    trust_remote_code=True,
    torch_dtype=dtype,
    device_map={"": device}
).eval()
moondream3.compile()  # Optional: speeds up inference

# Initialize DeepSeek-V2 for chat completion
deepseek_model_name = "deepseek-ai/DeepSeek-V2"
tokenizer_deepseek = AutoTokenizer.from_pretrained(deepseek_model_name)
deepseek_model = AutoModelForCausalLM.from_pretrained(
    deepseek_model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
chat_pipe = pipeline(
    "text-generation",
    model=deepseek_model,
    tokenizer=tokenizer_deepseek,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
)

def deepseek_chat(user_message, is_json=False):
    """Chat completion using DeepSeek-V2."""
    prompt = f"<|BeginOfUtterance|>User: {user_message}<|EndOfUtterance|><|BeginOfUtterance|>Assistant:"
    response = chat_pipe(prompt, return_full_text=False)[0]["generated_text"]
    assistant_response = response.split("<|BeginOfUtterance|>Assistant:")[-1].strip()
    return assistant_response

# Extract features from images using Moondream3
def extract_features(image_tuples):
    headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
    data = []

    image_embeds = [img[0] for img in image_tuples if img[0] is not None]
    prompts = [
        "Describe the spatial arrangement of furniture, walls, and other elements in this image.",
        "What type, style, and arrangement of decorative elements are present in this image?",
        "What mood, ambiance, and overall feeling does this image evoke?",
        "What type, intensity, placement, and direction of lighting is present in this image?",
        "What are the dominant colors, color palette, and color harmony in this image?",
        "What type, shape, material, and arrangement of furniture is present in this image?"
    ]

    answers = []
    for prompt in prompts:
        image_answers = moondream3.batch_answer(
            images=[img.convert("RGB") for img in image_embeds],
            prompts=[prompt] * len(image_embeds),
            tokenizer=tokenizer_moondream3,
        )
        answers.append(image_answers)

    for i in range(len(image_tuples)):
        image_name = f"image{i+1}"
        image_answers = [answer[i] for answer in answers]
        print(f"image{i+1}_answers \n {image_answers} \n")
        data.append([image_name] + image_answers)

    result = {'headers': headers, 'data': data}
    return result

# Describe room from image using Moondream3
def describe_room(image):
    headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
    data = []

    image_embeds = [image.convert("RGB")] * 6
    prompts = [
        "Describe the spatial arrangement of furniture, walls, and other elements in this image.",
        "What type, style, and arrangement of decorative elements are present in this image?",
        "What mood, ambiance, and overall feeling does this image evoke?",
        "What type, intensity, placement, and direction of lighting is present in this image?",
        "What are the dominant colors, color palette, and color harmony in this image?",
        "What type, shape, material, and arrangement of furniture is present in this image?"
    ]

    answers = moondream3.batch_answer(
        images=image_embeds,
        prompts=prompts,
        tokenizer=tokenizer_moondream3,
    )

    image_name = "ClientRoom"
    print(f"ClientRoom_answers \n {answers} \n")
    data.append([image_name] + answers)

    result = {'headers': headers, 'data': data}
    return result

def merge_features(inspiration_features):
    preferenec_map_extraction = f"""
You are one of the worlds most knowledgeable minds in the field of both theoretical and applied interior design.
- You are detailed
- You are meticulous
- You can distil a large potentially unstructured potentially multimodal range of input data sources into a highly accurate all encompassing representation of the interior design concept preferences of the input source by mapping input data using a model of fundamental interior design component definitions
- You can come up with professionally structured, fully detailed, well thought out and all encompassing applied interior design proposals from initial conceptualization and planning to a complete and finished interior design of real world space
- Generally you can help answer any question or assist in any task asked of you relating to anything in the realm of applied and theoretical design and interior design

Your task is to analyze the interior design style given information after <<<>>> and merge the analysis results together to generate a comprehensive design style preference map representation for the user who uploaded some images. Return as JSON

<<<
{inspiration_features}
>>>
"""
    print(f"\npreference_map_extraction prompt\n{preferenec_map_extraction}\n")
    prefmap = deepseek_chat(preferenec_map_extraction, is_json=True)
    print(f"\n merge_features chat_response\n{prefmap}\n")
    return prefmap

def create_design_concept_report(room_description, inspiration_features):
    design_report_prompt = f"""
    Generate a detailed interior design plan proposal report structured as markdown
    - The report should include three design plan concepts for the clients space based on the clients interior design component preference representation generated from the inspirational images they uploaded clients room that is the target of the project and the design preference map generated from the inspirational design images they uploaded
    - The report should have an introduction, sections on Style Preference, Color Scheme, Furniture Style, Lighting, Atmosphere, Decor, and Layout for each concept, as well as a placeholder for a mood board image starting each concept section.
    - Finally, the report should have a summary to conclude the design plan.

    Very detailed information about the clients room based on the photo they uploaded:
    {room_description}

    Design preference map generated from the inspirational design images they uploaded:
    {inspiration_features}
"""
    print(f"\ndesign_report_prompt\n{design_report_prompt}\n")
    designreport = deepseek_chat(design_report_prompt)
    print(f"\ndesign concept chat_response\n{designreport}\n")
    return designreport

def queryllm(payload):
    response = requests.post(textgen_API_URL, headers=headers, json=payload)
    print(response)
    return response.json()

def generate_mood_board_image(prompt):
    payload = {"inputs": prompt}
    response = requests.post(texttoimage_API_URL, headers=headers, json=payload)
    return response.content

def getmoodboardprompts(designreport):
    mood_board_descriptions_prompt = f"""
    ### interior design report plan
    {designreport}
    ###

    Generate a text prompt for each of the interior design concepts described in the interior design report plan that can be sent to a text-to-image model and receive a design project mood board.
    The prompt should clearly describe what should go onto the moodboard for each design concept and be structured JSON. For example:
    {{
        "Concept1": "Create a mood board for a modern cozy retreat bedroom with a warm and inviting atmosphere. Include a white and brown color palette, modern and contemporary furniture with clean lines, a cozy and functional bed, nightstands with elegant designs, a bench at the foot of the bed with storage, sheer curtains on the window, floor lamps and table lamps with layered lighting effects, potted plants, a vase with branches and twigs, a bowl, a clock, and books on the nightstands.",
        "Concept2": "Create a mood board for another concept..."
    }}
    Only output the JSON, nothing else, no explanations or commentary.
    """
    print(f"\nmood_board_descriptions_prompt:\n{mood_board_descriptions_prompt}\n")
    mood_board_descriptions = deepseek_chat(mood_board_descriptions_prompt)
    print(f"\nmood_board_descriptions_prompt chat_response\n{mood_board_descriptions}\n")
    return json.loads(mood_board_descriptions)

def generate_moodboards(mb_prompts):
    moodboard_images = {}
    for concept, prompt in mb_prompts.items():
        image_data = generate_mood_board_image(prompt)
        file_path = f"moodboard_{concept}.jpg"
        with open(file_path, "wb") as f:
            f.write(image_data)
        moodboard_images[concept] = file_path
    return moodboard_images

def add_moodboards_to_report(moodboard_images, report):
    add_moodboards_prompt = f"""
    mood board images
    <<<
    {moodboard_images}
    >>>

    report
    <<<
    {report}
    >>>

    Insert paths for each mood board image into the respective placeholder for each in the report and respond with the revised report with moodboard images inserted only, no explanations or commentary
    """
    print(f"\nadd_moodboards_prompt\n{add_moodboards_prompt}\n")
    revised_report = deepseek_chat(add_moodboards_prompt)
    print(f"\nrevised_report\n{revised_report}\n")
    return revised_report

# Gradio Interface
def process_images(design_images, room_image):
    design_descriptions = extract_features(design_images)
    room_description = describe_room(room_image)

    preference_map = merge_features(design_descriptions)
    print(f"\npreference_map\n{preference_map}\n")

    design_report = create_design_concept_report(room_description, preference_map)
    print(f"\ndesign_report\n{design_report}\n")

    mb_prompts = getmoodboardprompts(design_report)
    print(f"\nmb_prompts\n{mb_prompts}\n")

    moodboard_images = generate_moodboards(mb_prompts)
    print(f"\nmoodboard_images\n{moodboard_images}\n")

    revised_report = add_moodboards_to_report(moodboard_images, design_report)
    print("revised_report")
    print(revised_report)
    print("preference map")
    print(preference_map)
    return revised_report, preference_map

gallery = gr.components.Gallery(label="Upload Images of Preferred Design Styles", type="pil")
image_input = gr.components.Image(label="Upload Image of Your Room", type="pil")
report_output = gr.components.Markdown(label="Design Concept Report with Mood Boards")
json_output = gr.components.JSON(label="Design Preference Map")

interface = gr.Interface(
    fn=process_images,
    inputs=[gallery, image_input],
    outputs=[report_output, json_output],
    title="Interior Design Assistant",
    description="Upload images of your preferred interior design styles and a photo of your room to receive a custom design concept report and preference map."
)

interface.launch()