| import os | |
| import sys | |
| import torch | |
| from openai import OpenAI | |
| from transformers import ( | |
| LlavaNextProcessor, LlavaNextForConditionalGeneration, | |
| Qwen2VLForConditionalGeneration, Qwen2VLProcessor | |
| ) | |
| ## init device | |
| device = "cpu" | |
| torch_dtype = torch.float16 | |
| vlms_list = [ | |
| # { | |
| # "type": "llava-next", | |
| # "name": "llava-v1.6-mistral-7b-hf", | |
| # "local_path": "models/vlms/llava-v1.6-mistral-7b-hf", | |
| # "processor": LlavaNextProcessor.from_pretrained( | |
| # "models/vlms/llava-v1.6-mistral-7b-hf" | |
| # ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained( | |
| # "llava-hf/llava-v1.6-mistral-7b-hf" | |
| # ), | |
| # "model": LlavaNextForConditionalGeneration.from_pretrained( | |
| # "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else | |
| # LlavaNextForConditionalGeneration.from_pretrained( | |
| # "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu"), | |
| # }, | |
| # { | |
| # "type": "llava-next", | |
| # "name": "llama3-llava-next-8b-hf (Preload)", | |
| # "local_path": "models/vlms/llama3-llava-next-8b-hf", | |
| # "processor": LlavaNextProcessor.from_pretrained( | |
| # "models/vlms/llama3-llava-next-8b-hf" | |
| # ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained( | |
| # "llava-hf/llama3-llava-next-8b-hf" | |
| # ), | |
| # "model": LlavaNextForConditionalGeneration.from_pretrained( | |
| # "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else | |
| # LlavaNextForConditionalGeneration.from_pretrained( | |
| # "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu"), | |
| # }, | |
| # { | |
| # "type": "llava-next", | |
| # "name": "llava-v1.6-vicuna-13b-hf", | |
| # "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf", | |
| # "processor": LlavaNextProcessor.from_pretrained( | |
| # "models/vlms/llava-v1.6-vicuna-13b-hf" | |
| # ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained( | |
| # "llava-hf/llava-v1.6-vicuna-13b-hf" | |
| # ), | |
| # "model": LlavaNextForConditionalGeneration.from_pretrained( | |
| # "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else | |
| # LlavaNextForConditionalGeneration.from_pretrained( | |
| # "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu"), | |
| # }, | |
| # { | |
| # "type": "llava-next", | |
| # "name": "llava-v1.6-34b-hf", | |
| # "local_path": "models/vlms/llava-v1.6-34b-hf", | |
| # "processor": LlavaNextProcessor.from_pretrained( | |
| # "models/vlms/llava-v1.6-34b-hf" | |
| # ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained( | |
| # "llava-hf/llava-v1.6-34b-hf" | |
| # ), | |
| # "model": LlavaNextForConditionalGeneration.from_pretrained( | |
| # "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else | |
| # LlavaNextForConditionalGeneration.from_pretrained( | |
| # "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu"), | |
| # }, | |
| # { | |
| # "type": "qwen2-vl", | |
| # "name": "Qwen2-VL-2B-Instruct", | |
| # "local_path": "models/vlms/Qwen2-VL-2B-Instruct", | |
| # "processor": Qwen2VLProcessor.from_pretrained( | |
| # "models/vlms/Qwen2-VL-2B-Instruct" | |
| # ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained( | |
| # "Qwen/Qwen2-VL-2B-Instruct" | |
| # ), | |
| # "model": Qwen2VLForConditionalGeneration.from_pretrained( | |
| # "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else | |
| # Qwen2VLForConditionalGeneration.from_pretrained( | |
| # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device | |
| # ).to("cpu"), | |
| # }, | |
| { | |
| "type": "qwen2-vl", | |
| "name": "Qwen2-VL-7B-Instruct (Default)", | |
| "local_path": "models/vlms/Qwen2-VL-7B-Instruct", | |
| "processor": Qwen2VLProcessor.from_pretrained( | |
| "models/vlms/Qwen2-VL-7B-Instruct" | |
| ) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else Qwen2VLProcessor.from_pretrained( | |
| "Qwen/Qwen2-VL-7B-Instruct" | |
| ), | |
| "model": Qwen2VLForConditionalGeneration.from_pretrained( | |
| "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device | |
| ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else | |
| Qwen2VLForConditionalGeneration.from_pretrained( | |
| "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device | |
| ).to("cpu"), | |
| }, | |
| { | |
| "type": "openai", | |
| "name": "GPT4-o (Highly Recommended)", | |
| "local_path": "", | |
| "processor": "", | |
| "model": "" | |
| }, | |
| ] | |
| vlms_template = {k["name"]: (k["type"], k["local_path"], k["processor"], k["model"]) for k in vlms_list} |