File size: 11,259 Bytes
9414970
6a8ca1f
 
9414970
 
 
 
 
 
0aec354
9414970
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
 
 
 
 
 
 
 
 
 
 
 
 
aae971d
9414970
 
 
 
 
 
 
 
b8e5afc
9414970
db2ea29
9414970
b8e5afc
 
9414970
 
 
 
 
 
 
 
 
b8e5afc
9414970
 
3f71d24
9414970
 
 
 
6a8ca1f
9414970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ab08cb
9414970
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from PIL import Image
import requests
import json
import base64
from io import BytesIO

# Check for CUDA availability for PyTorch
if torch.cuda.is_available():
    device, dtype = "cuda", torch.bfloat16
else:
    device, dtype = "cpu", torch.float32

# Load Moondream3 Preview for image analysis
moondream3_model_id = "moondream/moondream3-preview"
tokenizer_moondream3 = AutoTokenizer.from_pretrained(moondream3_model_id)
moondream3 = AutoModelForCausalLM.from_pretrained(
    moondream3_model_id,
    trust_remote_code=True,
    torch_dtype=dtype,
    device_map={"": device}
).eval()
moondream3.compile()  # Optional: speeds up inference

# Initialize DeepSeek-V2 for chat completion
deepseek_model_name = "deepseek-ai/DeepSeek-V2"
tokenizer_deepseek = AutoTokenizer.from_pretrained(deepseek_model_name)
deepseek_model = AutoModelForCausalLM.from_pretrained(
    deepseek_model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
chat_pipe = pipeline(
    "text-generation",
    model=deepseek_model,
    tokenizer=tokenizer_deepseek,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
)

def deepseek_chat(user_message, is_json=False):
    """Chat completion using DeepSeek-V2."""
    prompt = f"<|BeginOfUtterance|>User: {user_message}<|EndOfUtterance|><|BeginOfUtterance|>Assistant:"
    response = chat_pipe(prompt, return_full_text=False)[0]["generated_text"]
    assistant_response = response.split("<|BeginOfUtterance|>Assistant:")[-1].strip()
    return assistant_response

# Extract features from images using Moondream3
def extract_features(image_tuples):
    headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
    data = []

    image_embeds = [img[0] for img in image_tuples if img[0] is not None]
    prompts = [
        "Describe the spatial arrangement of furniture, walls, and other elements in this image.",
        "What type, style, and arrangement of decorative elements are present in this image?",
        "What mood, ambiance, and overall feeling does this image evoke?",
        "What type, intensity, placement, and direction of lighting is present in this image?",
        "What are the dominant colors, color palette, and color harmony in this image?",
        "What type, shape, material, and arrangement of furniture is present in this image?"
    ]

    answers = []
    for prompt in prompts:
        image_answers = moondream3.batch_answer(
            images=[img.convert("RGB") for img in image_embeds],
            prompts=[prompt] * len(image_embeds),
            tokenizer=tokenizer_moondream3,
        )
        answers.append(image_answers)

    for i in range(len(image_tuples)):
        image_name = f"image{i+1}"
        image_answers = [answer[i] for answer in answers]
        print(f"image{i+1}_answers \n {image_answers} \n")
        data.append([image_name] + image_answers)

    result = {'headers': headers, 'data': data}
    return result

# Describe room from image using Moondream3
def describe_room(image):
    headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
    data = []

    image_embeds = [image.convert("RGB")] * 6
    prompts = [
        "Describe the spatial arrangement of furniture, walls, and other elements in this image.",
        "What type, style, and arrangement of decorative elements are present in this image?",
        "What mood, ambiance, and overall feeling does this image evoke?",
        "What type, intensity, placement, and direction of lighting is present in this image?",
        "What are the dominant colors, color palette, and color harmony in this image?",
        "What type, shape, material, and arrangement of furniture is present in this image?"
    ]

    answers = moondream3.batch_answer(
        images=image_embeds,
        prompts=prompts,
        tokenizer=tokenizer_moondream3,
    )

    image_name = "ClientRoom"
    print(f"ClientRoom_answers \n {answers} \n")
    data.append([image_name] + answers)

    result = {'headers': headers, 'data': data}
    return result

def merge_features(inspiration_features):
    preferenec_map_extraction = f"""
You are one of the worlds most knowledgeable minds in the field of both theoretical and applied interior design.
- You are detailed
- You are meticulous
- You can distil a large potentially unstructured potentially multimodal range of input data sources into a highly accurate all encompassing representation of the interior design concept preferences of the input source by mapping input data using a model of fundamental interior design component definitions
- You can come up with professionally structured, fully detailed, well thought out and all encompassing applied interior design proposals from initial conceptualization and planning to a complete and finished interior design of real world space
- Generally you can help answer any question or assist in any task asked of you relating to anything in the realm of applied and theoretical design and interior design

Your task is to analyze the interior design style given information after <<<>>> and merge the analysis results together to generate a comprehensive design style preference map representation for the user who uploaded some images. Return as JSON

<<<
{inspiration_features}
>>>
"""
    print(f"\npreference_map_extraction prompt\n{preferenec_map_extraction}\n")
    prefmap = deepseek_chat(preferenec_map_extraction, is_json=True)
    print(f"\n merge_features chat_response\n{prefmap}\n")
    return prefmap

def create_design_concept_report(room_description, inspiration_features):
    design_report_prompt = f"""
    Generate a detailed interior design plan proposal report structured as markdown
    - The report should include three design plan concepts for the clients space based on the clients interior design component preference representation generated from the inspirational images they uploaded clients room that is the target of the project and the design preference map generated from the inspirational design images they uploaded
    - The report should have an introduction, sections on Style Preference, Color Scheme, Furniture Style, Lighting, Atmosphere, Decor, and Layout for each concept, as well as a placeholder for a mood board image starting each concept section.
    - Finally, the report should have a summary to conclude the design plan.

    Very detailed information about the clients room based on the photo they uploaded:
    {room_description}

    Design preference map generated from the inspirational design images they uploaded:
    {inspiration_features}
"""
    print(f"\ndesign_report_prompt\n{design_report_prompt}\n")
    designreport = deepseek_chat(design_report_prompt)
    print(f"\ndesign concept chat_response\n{designreport}\n")
    return designreport

def queryllm(payload):
    response = requests.post(textgen_API_URL, headers=headers, json=payload)
    print(response)
    return response.json()

def generate_mood_board_image(prompt):
    payload = {"inputs": prompt}
    response = requests.post(texttoimage_API_URL, headers=headers, json=payload)
    return response.content

def getmoodboardprompts(designreport):
    mood_board_descriptions_prompt = f"""
    ### interior design report plan
    {designreport}
    ###

    Generate a text prompt for each of the interior design concepts described in the interior design report plan that can be sent to a text-to-image model and receive a design project mood board.
    The prompt should clearly describe what should go onto the moodboard for each design concept and be structured JSON. For example:
    {{
        "Concept1": "Create a mood board for a modern cozy retreat bedroom with a warm and inviting atmosphere. Include a white and brown color palette, modern and contemporary furniture with clean lines, a cozy and functional bed, nightstands with elegant designs, a bench at the foot of the bed with storage, sheer curtains on the window, floor lamps and table lamps with layered lighting effects, potted plants, a vase with branches and twigs, a bowl, a clock, and books on the nightstands.",
        "Concept2": "Create a mood board for another concept..."
    }}
    Only output the JSON, nothing else, no explanations or commentary.
    """
    print(f"\nmood_board_descriptions_prompt:\n{mood_board_descriptions_prompt}\n")
    mood_board_descriptions = deepseek_chat(mood_board_descriptions_prompt)
    print(f"\nmood_board_descriptions_prompt chat_response\n{mood_board_descriptions}\n")
    return json.loads(mood_board_descriptions)

def generate_moodboards(mb_prompts):
    moodboard_images = {}
    for concept, prompt in mb_prompts.items():
        image_data = generate_mood_board_image(prompt)
        file_path = f"moodboard_{concept}.jpg"
        with open(file_path, "wb") as f:
            f.write(image_data)
        moodboard_images[concept] = file_path
    return moodboard_images

def add_moodboards_to_report(moodboard_images, report):
    add_moodboards_prompt = f"""
    mood board images
    <<<
    {moodboard_images}
    >>>

    report
    <<<
    {report}
    >>>

    Insert paths for each mood board image into the respective placeholder for each in the report and respond with the revised report with moodboard images inserted only, no explanations or commentary
    """
    print(f"\nadd_moodboards_prompt\n{add_moodboards_prompt}\n")
    revised_report = deepseek_chat(add_moodboards_prompt)
    print(f"\nrevised_report\n{revised_report}\n")
    return revised_report

# Gradio Interface
def process_images(design_images, room_image):
    design_descriptions = extract_features(design_images)
    room_description = describe_room(room_image)

    preference_map = merge_features(design_descriptions)
    print(f"\npreference_map\n{preference_map}\n")

    design_report = create_design_concept_report(room_description, preference_map)
    print(f"\ndesign_report\n{design_report}\n")

    mb_prompts = getmoodboardprompts(design_report)
    print(f"\nmb_prompts\n{mb_prompts}\n")

    moodboard_images = generate_moodboards(mb_prompts)
    print(f"\nmoodboard_images\n{moodboard_images}\n")

    revised_report = add_moodboards_to_report(moodboard_images, design_report)
    print("revised_report")
    print(revised_report)
    print("preference map")
    print(preference_map)
    return revised_report, preference_map

gallery = gr.components.Gallery(label="Upload Images of Preferred Design Styles", type="pil")
image_input = gr.components.Image(label="Upload Image of Your Room", type="pil")
report_output = gr.components.Markdown(label="Design Concept Report with Mood Boards")
json_output = gr.components.JSON(label="Design Preference Map")

interface = gr.Interface(
    fn=process_images,
    inputs=[gallery, image_input],
    outputs=[report_output, json_output],
    title="Interior Design Assistant",
    description="Upload images of your preferred interior design styles and a photo of your room to receive a custom design concept report and preference map."
)

interface.launch()