Spaces:
Sleeping
Sleeping
update initialize_model_once and create_llm_pipeline to handle error Failed to create pipeline: 'model'
Browse files
app.py
CHANGED
|
@@ -224,21 +224,37 @@ def initialize_model_once(model_key):
|
|
| 224 |
print(f"Error loading model {model_name}: {str(e)}")
|
| 225 |
print(traceback.format_exc())
|
| 226 |
raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
|
| 229 |
|
| 230 |
def create_llm_pipeline(model_key):
|
| 231 |
-
"""Create a new pipeline using the specified model"""
|
| 232 |
try:
|
| 233 |
print(f"Creating pipeline for model: {model_key}")
|
| 234 |
tokenizer, model, is_gguf = initialize_model_once(model_key)
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
# Get the model info for reference
|
| 237 |
-
|
| 238 |
|
| 239 |
-
if model is None:
|
| 240 |
-
raise ValueError(f"Model is None for {model_key}")
|
| 241 |
-
|
| 242 |
# For GGUF models from llama-cpp-python
|
| 243 |
if is_gguf:
|
| 244 |
# Create adapter to use GGUF model like HF pipeline
|
|
|
|
| 224 |
print(f"Error loading model {model_name}: {str(e)}")
|
| 225 |
print(traceback.format_exc())
|
| 226 |
raise RuntimeError(f"Failed to load model {model_name}: {str(e)}")
|
| 227 |
+
|
| 228 |
+
# Final verification that model loaded correctly
|
| 229 |
+
if MODEL_CACHE["model"] is None:
|
| 230 |
+
print(f"WARNING: Model {model_name} appears to be None after loading")
|
| 231 |
+
# Try to free memory before returning
|
| 232 |
+
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
| 233 |
+
gc.collect()
|
| 234 |
+
|
| 235 |
return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"], MODEL_CACHE.get("is_gguf", False)
|
| 236 |
|
| 237 |
def create_llm_pipeline(model_key):
|
| 238 |
+
"""Create a new pipeline using the specified model with better error handling"""
|
| 239 |
try:
|
| 240 |
print(f"Creating pipeline for model: {model_key}")
|
| 241 |
tokenizer, model, is_gguf = initialize_model_once(model_key)
|
| 242 |
|
| 243 |
+
# Additional check to ensure model was properly loaded
|
| 244 |
+
if model is None:
|
| 245 |
+
print(f"Model is None for {model_key}, falling back to alternate model")
|
| 246 |
+
fallback_model = get_fallback_model(model_key)
|
| 247 |
+
if fallback_model != model_key:
|
| 248 |
+
print(f"Attempting to use fallback model: {fallback_model}")
|
| 249 |
+
tokenizer, model, is_gguf = initialize_model_once(fallback_model)
|
| 250 |
+
if model is None:
|
| 251 |
+
raise ValueError(f"Both original and fallback models failed to load")
|
| 252 |
+
else:
|
| 253 |
+
raise ValueError(f"Model is None and no fallback available")
|
| 254 |
+
|
| 255 |
# Get the model info for reference
|
| 256 |
+
model_info = MODEL_CONFIG.get(model_key, MODEL_CONFIG.get(fallback_model, {}))
|
| 257 |
|
|
|
|
|
|
|
|
|
|
| 258 |
# For GGUF models from llama-cpp-python
|
| 259 |
if is_gguf:
|
| 260 |
# Create adapter to use GGUF model like HF pipeline
|