File size: 11,259 Bytes
9414970 6a8ca1f 9414970 0aec354 9414970 7ab08cb 9414970 7ab08cb 9414970 aae971d 9414970 b8e5afc 9414970 db2ea29 9414970 b8e5afc 9414970 b8e5afc 9414970 3f71d24 9414970 6a8ca1f 9414970 7ab08cb 9414970 7ab08cb 9414970 7ab08cb 9414970 7ab08cb 9414970 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from PIL import Image
import requests
import json
import base64
from io import BytesIO
# Check for CUDA availability for PyTorch
if torch.cuda.is_available():
device, dtype = "cuda", torch.bfloat16
else:
device, dtype = "cpu", torch.float32
# Load Moondream3 Preview for image analysis
moondream3_model_id = "moondream/moondream3-preview"
tokenizer_moondream3 = AutoTokenizer.from_pretrained(moondream3_model_id)
moondream3 = AutoModelForCausalLM.from_pretrained(
moondream3_model_id,
trust_remote_code=True,
torch_dtype=dtype,
device_map={"": device}
).eval()
moondream3.compile() # Optional: speeds up inference
# Initialize DeepSeek-V2 for chat completion
deepseek_model_name = "deepseek-ai/DeepSeek-V2"
tokenizer_deepseek = AutoTokenizer.from_pretrained(deepseek_model_name)
deepseek_model = AutoModelForCausalLM.from_pretrained(
deepseek_model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
chat_pipe = pipeline(
"text-generation",
model=deepseek_model,
tokenizer=tokenizer_deepseek,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True,
)
def deepseek_chat(user_message, is_json=False):
"""Chat completion using DeepSeek-V2."""
prompt = f"<|BeginOfUtterance|>User: {user_message}<|EndOfUtterance|><|BeginOfUtterance|>Assistant:"
response = chat_pipe(prompt, return_full_text=False)[0]["generated_text"]
assistant_response = response.split("<|BeginOfUtterance|>Assistant:")[-1].strip()
return assistant_response
# Extract features from images using Moondream3
def extract_features(image_tuples):
headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
data = []
image_embeds = [img[0] for img in image_tuples if img[0] is not None]
prompts = [
"Describe the spatial arrangement of furniture, walls, and other elements in this image.",
"What type, style, and arrangement of decorative elements are present in this image?",
"What mood, ambiance, and overall feeling does this image evoke?",
"What type, intensity, placement, and direction of lighting is present in this image?",
"What are the dominant colors, color palette, and color harmony in this image?",
"What type, shape, material, and arrangement of furniture is present in this image?"
]
answers = []
for prompt in prompts:
image_answers = moondream3.batch_answer(
images=[img.convert("RGB") for img in image_embeds],
prompts=[prompt] * len(image_embeds),
tokenizer=tokenizer_moondream3,
)
answers.append(image_answers)
for i in range(len(image_tuples)):
image_name = f"image{i+1}"
image_answers = [answer[i] for answer in answers]
print(f"image{i+1}_answers \n {image_answers} \n")
data.append([image_name] + image_answers)
result = {'headers': headers, 'data': data}
return result
# Describe room from image using Moondream3
def describe_room(image):
headers = ["Image", "Layout", "Decor", "Atmosphere", "Lighting", "Color scheme", "Furniture style"]
data = []
image_embeds = [image.convert("RGB")] * 6
prompts = [
"Describe the spatial arrangement of furniture, walls, and other elements in this image.",
"What type, style, and arrangement of decorative elements are present in this image?",
"What mood, ambiance, and overall feeling does this image evoke?",
"What type, intensity, placement, and direction of lighting is present in this image?",
"What are the dominant colors, color palette, and color harmony in this image?",
"What type, shape, material, and arrangement of furniture is present in this image?"
]
answers = moondream3.batch_answer(
images=image_embeds,
prompts=prompts,
tokenizer=tokenizer_moondream3,
)
image_name = "ClientRoom"
print(f"ClientRoom_answers \n {answers} \n")
data.append([image_name] + answers)
result = {'headers': headers, 'data': data}
return result
def merge_features(inspiration_features):
preferenec_map_extraction = f"""
You are one of the worlds most knowledgeable minds in the field of both theoretical and applied interior design.
- You are detailed
- You are meticulous
- You can distil a large potentially unstructured potentially multimodal range of input data sources into a highly accurate all encompassing representation of the interior design concept preferences of the input source by mapping input data using a model of fundamental interior design component definitions
- You can come up with professionally structured, fully detailed, well thought out and all encompassing applied interior design proposals from initial conceptualization and planning to a complete and finished interior design of real world space
- Generally you can help answer any question or assist in any task asked of you relating to anything in the realm of applied and theoretical design and interior design
Your task is to analyze the interior design style given information after <<<>>> and merge the analysis results together to generate a comprehensive design style preference map representation for the user who uploaded some images. Return as JSON
<<<
{inspiration_features}
>>>
"""
print(f"\npreference_map_extraction prompt\n{preferenec_map_extraction}\n")
prefmap = deepseek_chat(preferenec_map_extraction, is_json=True)
print(f"\n merge_features chat_response\n{prefmap}\n")
return prefmap
def create_design_concept_report(room_description, inspiration_features):
design_report_prompt = f"""
Generate a detailed interior design plan proposal report structured as markdown
- The report should include three design plan concepts for the clients space based on the clients interior design component preference representation generated from the inspirational images they uploaded clients room that is the target of the project and the design preference map generated from the inspirational design images they uploaded
- The report should have an introduction, sections on Style Preference, Color Scheme, Furniture Style, Lighting, Atmosphere, Decor, and Layout for each concept, as well as a placeholder for a mood board image starting each concept section.
- Finally, the report should have a summary to conclude the design plan.
Very detailed information about the clients room based on the photo they uploaded:
{room_description}
Design preference map generated from the inspirational design images they uploaded:
{inspiration_features}
"""
print(f"\ndesign_report_prompt\n{design_report_prompt}\n")
designreport = deepseek_chat(design_report_prompt)
print(f"\ndesign concept chat_response\n{designreport}\n")
return designreport
def queryllm(payload):
response = requests.post(textgen_API_URL, headers=headers, json=payload)
print(response)
return response.json()
def generate_mood_board_image(prompt):
payload = {"inputs": prompt}
response = requests.post(texttoimage_API_URL, headers=headers, json=payload)
return response.content
def getmoodboardprompts(designreport):
mood_board_descriptions_prompt = f"""
### interior design report plan
{designreport}
###
Generate a text prompt for each of the interior design concepts described in the interior design report plan that can be sent to a text-to-image model and receive a design project mood board.
The prompt should clearly describe what should go onto the moodboard for each design concept and be structured JSON. For example:
{{
"Concept1": "Create a mood board for a modern cozy retreat bedroom with a warm and inviting atmosphere. Include a white and brown color palette, modern and contemporary furniture with clean lines, a cozy and functional bed, nightstands with elegant designs, a bench at the foot of the bed with storage, sheer curtains on the window, floor lamps and table lamps with layered lighting effects, potted plants, a vase with branches and twigs, a bowl, a clock, and books on the nightstands.",
"Concept2": "Create a mood board for another concept..."
}}
Only output the JSON, nothing else, no explanations or commentary.
"""
print(f"\nmood_board_descriptions_prompt:\n{mood_board_descriptions_prompt}\n")
mood_board_descriptions = deepseek_chat(mood_board_descriptions_prompt)
print(f"\nmood_board_descriptions_prompt chat_response\n{mood_board_descriptions}\n")
return json.loads(mood_board_descriptions)
def generate_moodboards(mb_prompts):
moodboard_images = {}
for concept, prompt in mb_prompts.items():
image_data = generate_mood_board_image(prompt)
file_path = f"moodboard_{concept}.jpg"
with open(file_path, "wb") as f:
f.write(image_data)
moodboard_images[concept] = file_path
return moodboard_images
def add_moodboards_to_report(moodboard_images, report):
add_moodboards_prompt = f"""
mood board images
<<<
{moodboard_images}
>>>
report
<<<
{report}
>>>
Insert paths for each mood board image into the respective placeholder for each in the report and respond with the revised report with moodboard images inserted only, no explanations or commentary
"""
print(f"\nadd_moodboards_prompt\n{add_moodboards_prompt}\n")
revised_report = deepseek_chat(add_moodboards_prompt)
print(f"\nrevised_report\n{revised_report}\n")
return revised_report
# Gradio Interface
def process_images(design_images, room_image):
design_descriptions = extract_features(design_images)
room_description = describe_room(room_image)
preference_map = merge_features(design_descriptions)
print(f"\npreference_map\n{preference_map}\n")
design_report = create_design_concept_report(room_description, preference_map)
print(f"\ndesign_report\n{design_report}\n")
mb_prompts = getmoodboardprompts(design_report)
print(f"\nmb_prompts\n{mb_prompts}\n")
moodboard_images = generate_moodboards(mb_prompts)
print(f"\nmoodboard_images\n{moodboard_images}\n")
revised_report = add_moodboards_to_report(moodboard_images, design_report)
print("revised_report")
print(revised_report)
print("preference map")
print(preference_map)
return revised_report, preference_map
gallery = gr.components.Gallery(label="Upload Images of Preferred Design Styles", type="pil")
image_input = gr.components.Image(label="Upload Image of Your Room", type="pil")
report_output = gr.components.Markdown(label="Design Concept Report with Mood Boards")
json_output = gr.components.JSON(label="Design Preference Map")
interface = gr.Interface(
fn=process_images,
inputs=[gallery, image_input],
outputs=[report_output, json_output],
title="Interior Design Assistant",
description="Upload images of your preferred interior design styles and a photo of your room to receive a custom design concept report and preference map."
)
interface.launch()
|