Spaces:

Csplk
/

moondream2-batch-processing

Paused

App Files Files Community

Csplk commited on 22 days ago

Commit

7ab08cb

verified ·

1 Parent(s): 347e03a

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -13

app.py CHANGED Viewed

@@ -6,20 +6,60 @@ from threading import Thread
 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 from PIL import ImageDraw
 from torchvision.transforms.v2 import Resize
-#import subprocess
-#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-#subprocess.run('cp -r moondream/torch clients/python/moondream/torch')
-#subprocess.run('pip install moondream[gpu]')
 #model_id = "vikhyatk/moondream2"
 #revision = "2025-01-09"
 #def load_moondream():
-#    """Load Moondream model and tokenizer."""
-#    model = AutoModelForCausalLM.from_pretrained(
 #        "vikhyatk/moondream2", trust_remote_code=True, device_map={"": "cuda"}
 #    )
 #    tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
@@ -33,14 +73,13 @@ from torchvision.transforms.v2 import Resize
 #moondream.eval()
-"""Load Moondream model and tokenizer."""
-moondream = AutoModelForCausalLM.from_pretrained(
-  "vikhyatk/moondream2",
-  revision="2025-01-09",
-  trust_remote_code=True,
-  device_map={"": "cuda"},
 )
-tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
 @spaces.GPU(durtion="150")
 def answer_questions(image_tuples, prompt_text):
@@ -68,6 +107,18 @@ def answer_questions(image_tuples, prompt_text):
     #print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result
 with gr.Blocks() as demo:
     gr.Markdown("# moondream2 unofficial batch processing demo")
     gr.Markdown("1. Select images\n2. Enter one or more prompts separated by commas. Ex: Describe this image, What is in this image?\n\n")

 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 from PIL import ImageDraw
 from torchvision.transforms.v2 import Resize
+from transformers import AutoModelForCausalLM
+moondream = AutoModelForCausalLM.from_pretrained(
+    "moondream/moondream3-preview",
+    trust_remote_code=True,
+    dtype=torch.bfloat16,
+    device_map={"": "cuda"},
+)
+moondream.compile()
+# Encode image once
+image = Image.open("complex_scene.jpg")
+encoded = moondream.encode_image(image)
+# Reuse the encoding for multiple queries
+questions = [
+    "How many people are in this image?",
+    "What time of day was this taken?",
+    "What's the weather like?"
+]
+for q in questions:
+    result = moondream.query(image=encoded, question=q, reasoning=False)
+    print(f"Q: {q}")
+    print(f"A: {result['answer']}\n")
+# Also works with other skills
+caption = moondream.caption(encoded, length="normal")
+objects = moondream.detect(encoded, "poop")
+pointe = moondream.point(encoded, "grass")
+print(f"caption: {e}, objects:{g}, point:{h}")
+# Segment an object
+result = moondream.segment(image, "cat")
+svg_path = result["path"]
+bbox = result["bbox"]
+print(f"SVG Path: {svg_path[:100]}...")
+print(f"Bounding box: {bbox}")
+# With spatial hint (point) to guide segmentation
+result = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
+# With spatial hint (bounding box)
+result = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
+"""
 #model_id = "vikhyatk/moondream2"
 #revision = "2025-01-09"
 #def load_moondream():
+#    Load Moondream model and tokenizer.
+ #    model = AutoModelForCausalLM.from_pretrained(
 #        "vikhyatk/moondream2", trust_remote_code=True, device_map={"": "cuda"}
 #    )
 #    tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
 #moondream.eval()
+model = AutoModelForCausalLM.from_pretrained(
+    "vikhyatk/moondream2",
+    trust_remote_code=True,
+    dtype=torch.bfloat16,
+    device_map="cuda", # "cuda" on Nvidia GPUs
 )
+"""
 @spaces.GPU(durtion="150")
 def answer_questions(image_tuples, prompt_text):
     #print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result
+"""
+Load Moondream model and tokenizer.
+moondream = AutoModelForCausalLM.from_pretrained(
+  "vikhyatk/moondream2",
+  revision="2025-01-09",
+  trust_remote_code=True,
+  device_map={"": "cuda"},
+)
+tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
+"""
 with gr.Blocks() as demo:
     gr.Markdown("# moondream2 unofficial batch processing demo")
     gr.Markdown("1. Select images\n2. Enter one or more prompts separated by commas. Ex: Describe this image, What is in this image?\n\n")