Spaces:

hmrizal
/

CSVBot-OpenSource

Sleeping

App Files Files Community

hmrizal commited on May 14

Commit

22b2e5f

verified ·

1 Parent(s): 95e8f89

error handling model phi-4, deepseek lite, flan t5, adding fallback model

Browse files

Files changed (1) hide show

app.py +74 -10

app.py CHANGED Viewed

@@ -170,6 +170,22 @@ def initialize_model_once(model_key):
                     )
                     MODEL_CACHE["is_gguf"] = False
                 # Handle standard HF models
                 else:
                     # Only use quantization if CUDA is available
@@ -247,7 +263,7 @@ def create_llm_pipeline(model_key):
                 max_new_tokens=256,  # Increased for more comprehensive answers
                 temperature=0.3,
                 top_p=0.9,
-                return_full_text=False,
             )
         else:
             print("Creating causal LM pipeline")
@@ -271,22 +287,47 @@ def create_llm_pipeline(model_key):
         print(traceback.format_exc())
         raise RuntimeError(f"Failed to create pipeline: {str(e)}")
 def handle_model_loading_error(model_key, session_id):
-    """Handle model loading errors by providing alternative model suggestions"""
     suggested_models = [
         "DeepSeek Coder Instruct",  # 1.3B model
-        "Phi-4 Mini Instruct",      # Light model
         "TinyLlama Chat",           # 1.1B model
-        "Flan T5 Small"             # Lightweight T5
     ]
-    # Remove the current model from suggestions if it's in the list
-    if model_key in suggested_models:
-        suggested_models.remove(model_key)
     suggestions = ", ".join(suggested_models[:3])  # Only show top 3 suggestions
-    return None, f"Unable to load model {model_key}. Please try another model such as: {suggestions}"
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
@@ -359,6 +400,15 @@ def create_conversational_chain(db, file_path, model_key):
             # Clean the result
             cleaned_result = raw_result.strip()
             # If result is empty after cleaning, use a fallback
             if not cleaned_result:
@@ -615,8 +665,9 @@ def create_gradio_interface():
             outputs=[model_info]
         )
-        # Process file handler - disables model selection after file is processed
         def handle_process_file(file, model_key, sess_id):
             if file is None:
                 return None, None, False, "Please upload a CSV file first."
@@ -628,6 +679,19 @@ def create_gradio_interface():
                 import traceback
                 print(f"Error processing file with {model_key}: {str(e)}")
                 print(traceback.format_exc())
                 error_msg = f"Error with model {model_key}: {str(e)}\n\nPlease try another model."
                 return None, False, [(None, error_msg)]

                     )
                     MODEL_CACHE["is_gguf"] = False
+                # Special handling for models that cause memory issues
+                elif model_key in ["Phi-4 Mini Instruct", "DeepSeek Lite Chat"]:
+                    # Reduce memory footprint
+                    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
+                    # For CPU-only environments, load with 8-bit quantization
+                    MODEL_CACHE["tokenizer"] = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+                    MODEL_CACHE["model"] = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        load_in_8bit=True,  # Use 8-bit instead of 4-bit
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        low_cpu_mem_usage=True,
+                        trust_remote_code=True
+                    )
+                    MODEL_CACHE["is_gguf"] = False
                 # Handle standard HF models
                 else:
                     # Only use quantization if CUDA is available
                 max_new_tokens=256,  # Increased for more comprehensive answers
                 temperature=0.3,
                 top_p=0.9,
+                # Remove return_full_text parameter for T5 models
             )
         else:
             print("Creating causal LM pipeline")
         print(traceback.format_exc())
         raise RuntimeError(f"Failed to create pipeline: {str(e)}")
+def get_fallback_model(current_model):
+    """Get appropriate fallback model for problematic models"""
+    fallback_map = {
+        "Phi-4 Mini Instruct": "TinyLlama Chat",
+        "DeepSeek Lite Chat": "DeepSeek Coder Instruct",
+        "Flan T5 Small": "Llama 2 Chat"
+    }
+    return fallback_map.get(current_model, "TinyLlama Chat")
+# Modified handle_model_loading_error function
 def handle_model_loading_error(model_key, session_id):
+    """Handle model loading errors by providing alternative model suggestions or fallbacks"""
+    # Get the appropriate fallback model
+    fallback_model = get_fallback_model(model_key)
+    # Try to load the fallback model automatically
+    if fallback_model != model_key:
+        print(f"Automatically trying fallback model: {fallback_model} for {model_key}")
+        try:
+            # Try to initialize the fallback model
+            tokenizer, model, is_gguf = initialize_model_once(fallback_model)
+            return tokenizer, model, is_gguf, f"Model {model_key} couldn't be loaded. Automatically switched to {fallback_model}."
+        except Exception as e:
+            print(f"Fallback model {fallback_model} also failed: {str(e)}")
+            # If fallback fails, continue with regular suggestion logic
+    # Regular suggestion logic for when fallbacks don't work or aren't applicable
     suggested_models = [
         "DeepSeek Coder Instruct",  # 1.3B model
         "TinyLlama Chat",           # 1.1B model
+        "Qwen2.5 Coder Instruct"    # Another option
     ]
+    # Remove problematic models and current model from suggestions
+    problem_models = ["Phi-4 Mini Instruct", "DeepSeek Lite Chat", "Flan T5 Small"]
+    suggested_models = [m for m in suggested_models if m not in problem_models and m != model_key]
     suggestions = ", ".join(suggested_models[:3])  # Only show top 3 suggestions
+    return None, None, None, f"Unable to load model {model_key}. Please try another model such as: {suggestions}"
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
             # Clean the result
             cleaned_result = raw_result.strip()
+            # Add special handling for T5 models
+            if MODEL_CONFIG.get(model_key, {}).get("is_t5", False):
+                # T5 models sometimes return lists instead of strings
+                if isinstance(raw_result, list) and len(raw_result) > 0:
+                    if isinstance(raw_result[0], dict) and "generated_text" in raw_result[0]:
+                        raw_result = raw_result[0]["generated_text"]
+                    else:
+                        raw_result = str(raw_result[0])
             # If result is empty after cleaning, use a fallback
             if not cleaned_result:
             outputs=[model_info]
         )
+        # Modified handle_process_file function
         def handle_process_file(file, model_key, sess_id):
+            """Process uploaded file with fallback model handling"""
             if file is None:
                 return None, None, False, "Please upload a CSV file first."
                 import traceback
                 print(f"Error processing file with {model_key}: {str(e)}")
                 print(traceback.format_exc())
+                # Try with fallback model if original fails
+                fallback = get_fallback_model(model_key)
+                if fallback != model_key:
+                    try:
+                        print(f"Trying fallback model: {fallback}")
+                        chatbot = ChatBot(sess_id, fallback)
+                        result = chatbot.process_file(file)
+                        message = f"Original model {model_key} failed. Using {fallback} instead.\n\n{result}"
+                        return chatbot, True, [(None, message)]
+                    except Exception as fallback_error:
+                        print(f"Fallback model also failed: {str(fallback_error)}")
                 error_msg = f"Error with model {model_key}: {str(e)}\n\nPlease try another model."
                 return None, False, [(None, error_msg)]