Alina Lozovskaya commited on
Commit
24c038d
·
1 Parent(s): 4457c15

Moved visual init to vision.py

Browse files
src/reachy_mini_conversation_demo/vision.py CHANGED
@@ -2,6 +2,7 @@ import base64
2
  import logging
3
  import os
4
  import time
 
5
  import asyncio
6
  from typing import Dict, Any
7
  import threading
@@ -11,7 +12,7 @@ import cv2
11
  import numpy as np
12
  import torch
13
  from transformers import AutoModelForImageTextToText, AutoProcessor
14
-
15
 
16
  logger = logging.getLogger(__name__)
17
 
@@ -300,3 +301,68 @@ class VisionManager:
300
  "device": self.processor.device,
301
  },
302
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import logging
3
  import os
4
  import time
5
+ import sys
6
  import asyncio
7
  from typing import Dict, Any
8
  import threading
 
12
  import numpy as np
13
  import torch
14
  from transformers import AutoModelForImageTextToText, AutoProcessor
15
+ from huggingface_hub import snapshot_download
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
301
  "device": self.processor.device,
302
  },
303
  }
304
+
305
+
306
+
307
+ def init_camera(camera_index = 0, simulation=True):
308
+
309
+ api_preference = cv2.CAP_AVFOUNDATION if sys.platform == "darwin" else 0
310
+
311
+ if simulation:
312
+ # Default build-in camera in SIM
313
+ # TODO: please, test on Linux and Windows
314
+ # TODO simulation in find_camera
315
+ camera = cv2.VideoCapture(
316
+ 0, api_preference
317
+ )
318
+ else:
319
+ # TODO handle macos in find_camera
320
+ if sys.platform == "darwin":
321
+ camera = cv2.VideoCapture(camera_index, cv2.CAP_AVFOUNDATION)
322
+ else:
323
+ camera = find_camera()
324
+
325
+ return camera
326
+
327
+
328
+ def init_vision(camera: cv2.VideoCapture) -> VisionManager:
329
+ model_id = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
330
+
331
+
332
+ cache_dir = os.path.expandvars(os.getenv("HF_HOME", "$HOME/.cache/huggingface"))
333
+
334
+ try:
335
+ os.makedirs(cache_dir, exist_ok=True)
336
+ os.environ["HF_HOME"] = cache_dir
337
+ logger.info("HF_HOME set to %s", cache_dir)
338
+ except Exception as e:
339
+ logger.warning("Failed to prepare HF cache dir %s: %s", cache_dir, e)
340
+ return
341
+
342
+ snapshot_download(
343
+ repo_id=model_id,
344
+ repo_type="model",
345
+ cache_dir=cache_dir,
346
+ )
347
+ logger.info(f"Prefetched model_id={model_id} into cache_dir={cache_dir}")
348
+
349
+ # Configure VLLM processing
350
+ vision_config = VisionConfig(
351
+ model_path=model_id,
352
+ vision_interval=5.0,
353
+ max_new_tokens=64,
354
+ temperature=0.7,
355
+ jpeg_quality=85,
356
+ max_retries=3,
357
+ retry_delay=1.0,
358
+ device_preference="auto",
359
+ )
360
+
361
+ vision_manager = VisionManager(camera, vision_config)
362
+
363
+ device_info = vision_manager.processor.get_model_info()
364
+ logger.info(
365
+ f"Vision processing enabled: {device_info["model_path"]} on {device_info["device"]} memory: {device_info.get("gpu_memory", "N/A")}",
366
+ )
367
+
368
+ return vision_manager