| { | |
| "title": "Stacking Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions on Stacking ensemble learning, covering basic concepts, implementation, and theoretical understanding.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the main idea of Stacking in ensemble learning?", | |
| "options": [ | |
| "Train models in parallel and average results", | |
| "Train sequential models to reduce bias", | |
| "Use only one strong learner", | |
| "Combine predictions of multiple models using a meta-learner" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Stacking involves combining different base learners' predictions with a meta-learner to improve overall performance." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Which component in Stacking combines the outputs of base learners?", | |
| "options": [ | |
| "Residual estimator", | |
| "Bootstrap sample", | |
| "Decision stump", | |
| "Meta-learner" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "The meta-learner takes predictions of base learners as input and produces the final output." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "Stacking differs from Bagging because it:", | |
| "options": [ | |
| "Uses a meta-learner to combine predictions", | |
| "Only reduces variance", | |
| "Trains models independently", | |
| "Uses bootstrapped samples only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Stacking focuses on learning the best combination of base learners via a meta-model." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Which of the following is a typical base learner in Stacking?", | |
| "options": [ | |
| "Meta-learner", | |
| "Feature selector", | |
| "Residual predictor", | |
| "Decision tree" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Decision trees, logistic regression, or other models can serve as base learners." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Which of these is a common meta-learner?", | |
| "options": [ | |
| "Decision stump", | |
| "Bootstrap sample", | |
| "Logistic regression", | |
| "PCA" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Logistic regression or linear regression is often used as a simple meta-learner to combine predictions." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "Stacking is most useful when base learners are:", | |
| "options": [ | |
| "Highly correlated", | |
| "Identical models", | |
| "Extremely simple only", | |
| "Diverse in type or error patterns" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Diversity among base learners allows the meta-learner to exploit complementary strengths." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "What is the main advantage of Stacking?", | |
| "options": [ | |
| "Reduces training time", | |
| "Improves predictive performance by combining multiple models", | |
| "Always reduces bias to zero", | |
| "Eliminates the need for parameter tuning" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "By learning from multiple base models, Stacking often achieves higher accuracy than any single model." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "In Stacking, which data is used to train the meta-learner?", | |
| "options": [ | |
| "Original training data only", | |
| "Residuals of base learners", | |
| "Randomly generated features", | |
| "Predictions of base learners on validation or out-of-fold data" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Using out-of-fold predictions prevents overfitting when training the meta-learner." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Which is a difference between Stacking and Boosting?", | |
| "options": [ | |
| "Stacking reduces variance only", | |
| "Boosting uses meta-learners, Stacking does not", | |
| "Stacking combines models in parallel, Boosting sequentially", | |
| "Boosting uses multiple meta-learners" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Boosting trains models sequentially to correct errors, while Stacking trains models independently and combines their predictions." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Why is cross-validation often used in Stacking?", | |
| "options": [ | |
| "To select meta-learner automatically", | |
| "To increase learning rate", | |
| "To train base learners faster", | |
| "To generate out-of-fold predictions for training the meta-learner" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Cross-validation provides unbiased predictions of base learners on data not seen during training, which is used to train the meta-learner." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Stacking is also known as:", | |
| "options": [ | |
| "Random forest ensemble", | |
| "Boosted regression", | |
| "Stacked generalization", | |
| "Sequential bagging" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Stacking was introduced as 'stacked generalization' by Wolpert to combine multiple models." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Which problem does Stacking address that single models might struggle with?", | |
| "options": [ | |
| "Reducing dataset size", | |
| "Combining strengths of different algorithms for better generalization", | |
| "Faster training", | |
| "Feature scaling" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Stacking leverages different models to capture various patterns and reduce generalization error." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "In a classification task, what type of output is passed to the meta-learner?", | |
| "options": [ | |
| "Random noise", | |
| "Residuals only", | |
| "Predicted probabilities or labels from base learners", | |
| "Original features only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "The meta-learner uses predictions (labels or probabilities) from base learners to make final predictions." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Which is true about the diversity of base learners in Stacking?", | |
| "options": [ | |
| "All base learners should be identical", | |
| "Greater diversity usually improves ensemble performance", | |
| "Meta-learner must be a tree", | |
| "Only deep trees are used" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Different algorithms or parameter settings increase diversity and help the ensemble learn better." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Which dataset is used to prevent overfitting of the meta-learner?", | |
| "options": [ | |
| "Random subset of test data", | |
| "Entire training set predictions", | |
| "Out-of-fold predictions from training set", | |
| "Residual errors only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Out-of-fold predictions give unbiased estimates for the meta-learner to learn safely." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Stacking can be applied to:", | |
| "options": [ | |
| "Unsupervised tasks only", | |
| "Both classification and regression tasks", | |
| "Only classification", | |
| "Only regression" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Stacking is versatile and can combine base learners for both regression and classification tasks." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Which is NOT a typical base learner in Stacking?", | |
| "options": [ | |
| "Logistic regression", | |
| "Decision tree", | |
| "KNN", | |
| "Random noise generator" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random noise is not a meaningful base learner and cannot contribute to ensemble learning." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Meta-learner complexity should be:", | |
| "options": [ | |
| "Always very deep", | |
| "Same as base learner complexity", | |
| "Simple enough to avoid overfitting on base predictions", | |
| "Randomly selected" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "A simple meta-learner generalizes better by learning patterns from base predictions without overfitting." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Which of the following can be used as meta-learner?", | |
| "options": [ | |
| "Random features only", | |
| "Bootstrap samples", | |
| "Noise vector", | |
| "Linear regression, logistic regression, or tree" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Various models can serve as meta-learner depending on the problem type." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Stacking usually improves performance when base learners:", | |
| "options": [ | |
| "Have complementary strengths and weaknesses", | |
| "Are identical in type", | |
| "Have zero diversity", | |
| "Are only weak learners" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Combining models with different strengths allows the meta-learner to correct errors and improve predictions." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Which is a common mistake when implementing Stacking?", | |
| "options": [ | |
| "Using simple meta-learner", | |
| "Using cross-validation for base predictions", | |
| "Training meta-learner on same data base learners saw", | |
| "Using different base learners" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Training meta-learner on same data can cause overfitting; out-of-fold predictions prevent this." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Stacking differs from Voting because:", | |
| "options": [ | |
| "It reduces variance only", | |
| "It averages predictions blindly", | |
| "It learns weights using a meta-learner rather than using fixed rules", | |
| "It uses bootstrap samples only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Unlike Voting, Stacking trains a model to optimally combine base learners’ predictions." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Which scenario benefits most from Stacking?", | |
| "options": [ | |
| "Identical models only", | |
| "Single model with high accuracy", | |
| "When multiple different models have complementary predictive power", | |
| "Very small datasets" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Stacking leverages diverse models to produce better generalization than any individual model." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Which metric should you use to evaluate Stacking?", | |
| "options": [ | |
| "Depends on the problem (accuracy, RMSE, F1, etc.)", | |
| "Always F1-score", | |
| "Always RMSE", | |
| "Always accuracy" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Evaluation metric depends on the type of task (classification or regression)." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "In K-fold Stacking, each fold provides predictions to:", | |
| "options": [ | |
| "Train the meta-learner without overfitting", | |
| "Generate residuals", | |
| "Train base learners only", | |
| "Randomly select features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "K-fold cross-validation provides unbiased predictions from base learners for the meta-learner." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Stacking can reduce generalization error by:", | |
| "options": [ | |
| "Randomly averaging predictions", | |
| "Ignoring base learners", | |
| "Combining strengths of multiple models", | |
| "Using only a single strong model" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Meta-learner exploits complementary strengths of base learners to improve predictions." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Which is true for regression tasks using Stacking?", | |
| "options": [ | |
| "Meta-learner predicts labels only", | |
| "Only classification is possible", | |
| "Residuals are ignored", | |
| "Base learners predict continuous values, meta-learner combines them" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "For regression, the meta-learner learns to combine continuous predictions from base learners." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Which prevents overfitting in Stacking?", | |
| "options": [ | |
| "Ignoring diversity of base learners", | |
| "Deep meta-learner only", | |
| "Using out-of-fold predictions for meta-learner training", | |
| "Training meta-learner on entire dataset predictions" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Out-of-fold predictions prevent the meta-learner from memorizing base learners’ predictions." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: Combining Random Forest, SVM, and KNN with a linear meta-learner. This is:", | |
| "options": [ | |
| "Boosting", | |
| "Bagging", | |
| "Stacking", | |
| "Voting" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Different base learners are combined via a meta-learner, which defines Stacking." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Which is the main requirement for base learners in Stacking?", | |
| "options": [ | |
| "They must be deep trees only", | |
| "They should be diverse and not perfectly correlated", | |
| "They should always be linear models", | |
| "They must have identical predictions" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Diversity ensures that the meta-learner can learn from complementary strengths of different models." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "In Stacking, why is it important that base learners are diverse?", | |
| "options": [ | |
| "Identical base learners are always better", | |
| "Diversity increases bias", | |
| "Diverse base learners capture different aspects of the data, improving meta-learner performance", | |
| "Diversity reduces computation" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Diversity among base learners ensures complementary strengths, which the meta-learner can exploit for better predictions." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Which technique is commonly used to generate unbiased predictions for meta-learner training?", | |
| "options": [ | |
| "K-fold cross-validation (out-of-fold predictions)", | |
| "Random feature selection", | |
| "Using test data", | |
| "Bootstrap sampling only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "K-fold cross-validation produces predictions from unseen data folds to prevent overfitting when training the meta-learner." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: You use three base learners with high correlation. What is likely to happen?", | |
| "options": [ | |
| "The meta-learner ignores correlation automatically", | |
| "Performance will drastically improve", | |
| "Overfitting is impossible", | |
| "The meta-learner gains little benefit due to redundant information" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Highly correlated base learners do not provide complementary information, reducing the benefit of Stacking." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Which type of meta-learner is commonly used for regression tasks?", | |
| "options": [ | |
| "Decision stump", | |
| "Logistic regression", | |
| "Linear regression or ridge regression", | |
| "Random noise generator" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Linear or regularized regression models are simple and effective for combining continuous outputs of base learners." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Which type of meta-learner is commonly used for classification tasks?", | |
| "options": [ | |
| "K-means clustering", | |
| "Random noise generator", | |
| "Logistic regression", | |
| "Linear regression" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Logistic regression can combine probability outputs from base learners and produce final class probabilities." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Stacking can be applied to:", | |
| "options": [ | |
| "Classification and regression", | |
| "Unsupervised tasks only", | |
| "Only classification", | |
| "Only regression" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Stacking is versatile and works for both classification and regression problems." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: Base learners perform poorly individually but differently. Stacking may:", | |
| "options": [ | |
| "Always fail", | |
| "Reduce bias only", | |
| "Increase correlation among predictions", | |
| "Improve overall performance by combining diverse predictions" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Even weak base learners can be combined effectively by the meta-learner if they make different errors." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Why should meta-learner complexity be limited?", | |
| "options": [ | |
| "To prevent overfitting on base learners’ predictions", | |
| "To reduce dataset size", | |
| "To increase training time", | |
| "Because base learners are always simple" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A simple meta-learner generalizes better on predictions from base learners without memorizing noise." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: Using Random Forest, SVM, and KNN as base learners with Logistic Regression as meta-learner. Which is true?", | |
| "options": [ | |
| "Diverse base learners + simple meta-learner is a common Stacking setup", | |
| "Base learners must be identical", | |
| "Meta-learner should be very deep", | |
| "Only regression problems are supported" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Combining different algorithms with a simple meta-learner is a standard approach in Stacking." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: Your meta-learner overfits the base learners’ predictions. Which solution is suitable?", | |
| "options": [ | |
| "Use simpler meta-learner or regularization", | |
| "Add more base learners without change", | |
| "Increase base learner complexity", | |
| "Ignore cross-validation" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Regularizing or simplifying the meta-learner reduces overfitting on base predictions." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Which cross-validation strategy is used to generate predictions for meta-learner training?", | |
| "options": [ | |
| "Random sampling", | |
| "No CV is needed", | |
| "K-fold cross-validation", | |
| "Leave-one-out only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "K-fold CV produces out-of-fold predictions to prevent overfitting of the meta-learner." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Stacking differs from Voting because:", | |
| "options": [ | |
| "It learns combination weights via a meta-learner", | |
| "It reduces variance only", | |
| "It uses identical base learners", | |
| "It averages predictions blindly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Voting combines base learners using fixed rules, while Stacking learns how to combine predictions optimally." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Your dataset is small. Stacking may:", | |
| "options": [ | |
| "Always improve accuracy", | |
| "Overfit due to limited training data for meta-learner", | |
| "Reduce computation time automatically", | |
| "Ignore base learners" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Meta-learner may overfit if there isn’t enough data for unbiased predictions from base learners." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Which situation is ideal for using Stacking?", | |
| "options": [ | |
| "Highly correlated base learners", | |
| "No training data available", | |
| "Single strong model is sufficient", | |
| "Multiple different models have complementary strengths" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Stacking benefits when base learners make different types of errors, allowing meta-learner to combine them effectively." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Why are out-of-fold predictions used instead of training predictions for the meta-learner?", | |
| "options": [ | |
| "To add noise intentionally", | |
| "To prevent meta-learner from overfitting", | |
| "To reduce computation", | |
| "To increase correlation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Using predictions on unseen folds ensures the meta-learner sees unbiased predictions and generalizes better." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: All base learners are trees with same depth. How to improve stacking?", | |
| "options": [ | |
| "Use only meta-learner", | |
| "Add more identical trees", | |
| "Reduce training data", | |
| "Increase diversity via different algorithms or hyperparameters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Diverse learners are key for stacking; otherwise, meta-learner gains little new information." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Which of the following helps prevent overfitting in stacking?", | |
| "options": [ | |
| "Adding noise to predictions", | |
| "Deep meta-learner only", | |
| "High learning rate only", | |
| "Cross-validation, simpler meta-learner, regularization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Using CV and regularization ensures meta-learner does not memorize base learners’ predictions." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Which task is stacking suitable for?", | |
| "options": [ | |
| "Structured regression, classification, and hybrid tasks", | |
| "Only unsupervised learning", | |
| "Only image generation", | |
| "Only dimensionality reduction" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Stacking is versatile and can be applied to any supervised task." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: You want to combine a Random Forest and a KNN for classification. What is a suitable meta-learner?", | |
| "options": [ | |
| "Logistic regression", | |
| "K-means clustering", | |
| "Principal Component Analysis", | |
| "Another Random Forest only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A simple model like logistic regression can effectively combine predictions from heterogeneous base learners." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Why is meta-learner training data usually smaller than base learner training data?", | |
| "options": [ | |
| "It sees random features only", | |
| "It uses the entire dataset again", | |
| "It only sees residuals", | |
| "It uses out-of-fold predictions from base learners" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Meta-learner sees predictions on validation folds, not full training data, to avoid overfitting." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: Base learners predict different class probabilities for a sample. What does the meta-learner do?", | |
| "options": [ | |
| "Selects the first base learner only", | |
| "Combines these predictions to make the final decision", | |
| "Averages features instead of predictions", | |
| "Ignores all predictions" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The meta-learner uses outputs from base learners as inputs to produce a more accurate final prediction." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Which of these is a benefit of using Stacking over individual models?", | |
| "options": [ | |
| "Reduces dataset size automatically", | |
| "Improved predictive performance by combining strengths of multiple models", | |
| "Always faster training", | |
| "No need for cross-validation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Stacking leverages diverse models to capture different patterns and reduce overall error." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: Stacking with highly correlated base learners results in:", | |
| "options": [ | |
| "Limited improvement due to redundant predictions", | |
| "No need for a meta-learner", | |
| "Automatic error correction", | |
| "Maximum improvement always" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "If base learners make similar errors, the meta-learner gains little new information." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Which factor is crucial for effective Stacking?", | |
| "options": [ | |
| "Training base learners on same features only", | |
| "Identical predictions from all base learners", | |
| "Diversity among base learners", | |
| "Using a deep meta-learner only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Different algorithms or parameters ensure base learners capture complementary information." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Small dataset, multiple base learners. Meta-learner shows overfitting. Recommended solution?", | |
| "options": [ | |
| "Increase number of trees only", | |
| "Ignore cross-validation", | |
| "Increase meta-learner complexity", | |
| "Use simpler meta-learner or regularization, possibly reduce number of base learners" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Simpler meta-learner and regularization prevent overfitting when training data is limited." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Why is stacking preferred over simple averaging or voting in some cases?", | |
| "options": [ | |
| "It always uses deep learning", | |
| "It learns optimal weights for combining predictions instead of using fixed rules", | |
| "It eliminates need for base learners", | |
| "It reduces computation time" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The meta-learner can adaptively combine base predictions based on data patterns, improving accuracy." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: Base learners are decision trees with shallow depth. Meta-learner is logistic regression. Likely effect?", | |
| "options": [ | |
| "Meta-learner can capture complementary signals and improve performance", | |
| "Performance will always drop", | |
| "Trees become irrelevant", | |
| "Only overfitting occurs" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Even weak or shallow learners can provide useful signals for the meta-learner." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Which is a common mistake in Stacking implementation?", | |
| "options": [ | |
| "Using simple meta-learner", | |
| "Training meta-learner on base learners’ training predictions (not out-of-fold predictions)", | |
| "Using diverse base learners", | |
| "Cross-validation for base predictions" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Using training predictions directly can cause overfitting; out-of-fold predictions are needed." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Stacking regression with three base learners. Which output type does the meta-learner use?", | |
| "options": [ | |
| "Predicted classes only", | |
| "Random noise vector", | |
| "Residuals only", | |
| "Predicted continuous values from base learners" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Meta-learner combines predicted continuous outputs from base learners to produce final regression output." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: You have Random Forest, XGBoost, and SVM as base learners. Which meta-learner is simple and effective?", | |
| "options": [ | |
| "PCA", | |
| "Deep neural network only", | |
| "Logistic regression or linear regression", | |
| "Random noise generator" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Simple regression models can effectively combine heterogeneous predictions without overfitting." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: Meta-learner predicts perfectly on training data but poorly on test data. Cause?", | |
| "options": [ | |
| "Dataset too large", | |
| "Meta-learner too simple", | |
| "Overfitting due to using training predictions instead of out-of-fold predictions", | |
| "Base learners are too diverse" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Training on base learners’ predictions from the same data leads to memorization and poor generalization." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Which of these is NOT a recommended strategy in Stacking?", | |
| "options": [ | |
| "Using out-of-fold predictions", | |
| "Using cross-validation for base learners", | |
| "Regularizing the meta-learner", | |
| "Using meta-learner trained on base learners’ training predictions" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Meta-learner must be trained on unbiased predictions; using training predictions causes overfitting." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: Base learners have high variance individually. Stacking can:", | |
| "options": [ | |
| "Always increase bias", | |
| "Reduce overall variance by combining their predictions", | |
| "Ignore base learner predictions", | |
| "Eliminate need for cross-validation" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Meta-learner can combine different noisy predictions to reduce overall variance and improve stability." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: Base learners are homogeneous (e.g., all logistic regressions). Likely effect?", | |
| "options": [ | |
| "Meta-learner ignored", | |
| "Maximum benefit always", | |
| "Overfitting impossible", | |
| "Limited improvement from Stacking due to redundancy" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Stacking works best when base learners are diverse; homogeneous models provide little new information." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Which approach improves stacking with limited data?", | |
| "options": [ | |
| "More complex meta-learner only", | |
| "Ignore base learner diversity", | |
| "Regularization, simpler meta-learner, careful cross-validation", | |
| "Train meta-learner on training predictions" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "These strategies reduce overfitting and improve generalization when data is scarce." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: Meta-learner underfits base predictions. Recommended fix?", | |
| "options": [ | |
| "Use training predictions instead of out-of-fold", | |
| "Reduce base learner diversity", | |
| "Use a slightly more complex meta-learner or additional features", | |
| "Ignore predictions" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "A slightly more flexible meta-learner can better capture relationships between base learners’ predictions." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: Combining Random Forest and Gradient Boosting as base learners. Which advantage does stacking provide?", | |
| "options": [ | |
| "Eliminates bias automatically", | |
| "Leverages complementary strengths of ensemble methods for better prediction", | |
| "Reduces variance to zero", | |
| "Replaces base learners completely" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Stacking allows different ensembles to complement each other, improving overall performance." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: Using stacking in classification, base learners predict probabilities. Meta-learner input?", | |
| "options": [ | |
| "Random noise vector", | |
| "Predicted probabilities from base learners", | |
| "Original features only", | |
| "Residual errors only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Meta-learner uses predicted probabilities from base learners as inputs to produce final classification." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Which scenario would reduce the benefit of stacking?", | |
| "options": [ | |
| "Base learners are diverse", | |
| "Base learners are highly correlated", | |
| "Out-of-fold predictions are used", | |
| "Meta-learner is regularized" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "High correlation among base learners provides redundant information, limiting stacking’s advantage." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: Stacking regression task shows overfitting. First check:", | |
| "options": [ | |
| "Whether meta-learner was trained on out-of-fold predictions", | |
| "Base learner type only", | |
| "Number of features only", | |
| "Dataset size only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Using training predictions instead of out-of-fold predictions is a common cause of overfitting in stacking." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: In a Kaggle competition, you combine multiple tree-based and linear models. Your meta-learner performs worse than individual base learners. Likely cause?", | |
| "options": [ | |
| "Base learners are too diverse", | |
| "Dataset is too large", | |
| "Meta-learner overfitted due to training on base learners’ training predictions", | |
| "Meta-learner is too simple" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Training the meta-learner on the same data as base learners can cause memorization and poor generalization." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: You notice highly correlated predictions from base learners. Which action is appropriate?", | |
| "options": [ | |
| "Ignore the correlation", | |
| "Increase number of trees in all learners", | |
| "Introduce more diverse base learners", | |
| "Use the same algorithm with different hyperparameters only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High correlation reduces the benefit of stacking. Introducing diverse models captures complementary patterns." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Base learners are neural networks with slightly different architectures. Meta-learner is linear regression. What is expected?", | |
| "options": [ | |
| "Meta-learner can combine complementary predictions to improve accuracy", | |
| "Performance always decreases", | |
| "Meta-learner will ignore base learners", | |
| "Stacking will fail because linear models cannot handle neural networks" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Linear meta-learner can learn optimal weights for combining diverse neural network outputs." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Using stacking for regression, meta-learner outputs extreme values. Cause?", | |
| "options": [ | |
| "Base learners’ predictions are poorly scaled or meta-learner is too complex", | |
| "Base learners are too diverse", | |
| "Meta-learner underfitted", | |
| "Dataset is too small" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper scaling or an overly complex meta-learner can lead to extreme predictions." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: You stack three models and notice high variance in meta-learner. Solution?", | |
| "options": [ | |
| "Add more identical base learners", | |
| "Regularize meta-learner or reduce complexity", | |
| "Ignore variance", | |
| "Use training predictions instead of out-of-fold" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Regularization prevents meta-learner from overfitting to noisy base learner predictions." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Base learners perform poorly individually but differently. Stacking improves results. Why?", | |
| "options": [ | |
| "Base learners are ignored", | |
| "Meta-learner leverages complementary errors to produce better overall predictions", | |
| "Stacking magically improves all models", | |
| "Random averaging occurs" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Even weak but diverse models can be combined effectively by the meta-learner." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: Meta-learner is too powerful (e.g., deep neural network). What is the likely outcome?", | |
| "options": [ | |
| "Improved generalization automatically", | |
| "Overfitting to base learners’ predictions", | |
| "Dataset size decreases", | |
| "Base learners become irrelevant" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Overly complex meta-learner may memorize base predictions instead of learning patterns, leading to poor generalization." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Small dataset with many base learners. Meta-learner underfits. Solution?", | |
| "options": [ | |
| "Reduce base learner complexity or number", | |
| "Train on test data", | |
| "Ignore diversity", | |
| "Increase meta-learner complexity" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Too many base learners can overwhelm meta-learner on small datasets. Reducing base learners or their complexity helps." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: Regression stacking task shows systematic bias. Solution?", | |
| "options": [ | |
| "Adjust meta-learner to correct bias or apply residual correction", | |
| "Use training predictions instead of out-of-fold", | |
| "Increase number of base learners only", | |
| "Ignore base learners" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner can be tuned or trained on residuals to correct systematic bias." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Ensemble includes Random Forest, XGBoost, and KNN. Test accuracy decreases after stacking. First check?", | |
| "options": [ | |
| "Whether meta-learner was trained on proper out-of-fold predictions", | |
| "Number of trees only", | |
| "Feature selection only", | |
| "Dataset size only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Improper meta-learner training is the most common cause of poor stacking performance." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: You want to combine multiple image classifiers via stacking. Which approach is suitable?", | |
| "options": [ | |
| "Use softmax probabilities from base classifiers as meta-learner input", | |
| "Use raw pixel inputs", | |
| "Ignore base classifiers", | |
| "Average features randomly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner combines probability predictions rather than raw data for effective stacking." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: In a stacking setup, meta-learner shows perfect training accuracy. Likely issue?", | |
| "options": [ | |
| "Overfitting due to using base learners’ training predictions", | |
| "Base learners are too diverse", | |
| "Meta-learner too simple", | |
| "Dataset too small" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Perfect training accuracy is a sign of overfitting; out-of-fold predictions prevent this." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Base learners are all SVMs with different kernels. Meta-learner is logistic regression. Likely outcome?", | |
| "options": [ | |
| "Improved generalization due to diversity in kernel functions", | |
| "No improvement, identical predictions", | |
| "Overfitting impossible", | |
| "Meta-learner ignored" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Different kernels capture complementary patterns, allowing meta-learner to improve predictions." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: Base learners have high variance errors. Stacking improves predictions. Why?", | |
| "options": [ | |
| "Meta-learner combines predictions to reduce variance and improve stability", | |
| "Stacking magically reduces errors", | |
| "Base learners are ignored", | |
| "Random averaging occurs" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner can smooth out high variance by learning the optimal combination of predictions." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: Regression stacking task shows systematic bias. Solution?", | |
| "options": [ | |
| "Adjust meta-learner to correct bias or apply residual correction", | |
| "Ignore base learners", | |
| "Increase number of base learners only", | |
| "Use training predictions instead of out-of-fold" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner can be tuned or trained on residuals to correct systematic bias." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Base learners predict probabilities for multi-class classification. Meta-learner input?", | |
| "options": [ | |
| "Concatenated class probabilities from all base learners", | |
| "Raw features only", | |
| "Residuals only", | |
| "Random noise vector" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner uses predicted probabilities from all classes to make the final decision." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: Meta-learner underfits in a classification stacking task. Recommended action?", | |
| "options": [ | |
| "Increase meta-learner capacity slightly or add engineered features", | |
| "Reduce base learner diversity", | |
| "Ignore base learners", | |
| "Train meta-learner on training predictions" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A slightly more complex meta-learner can capture relationships between base learners’ outputs." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: Small dataset, multiple base learners. Meta-learner overfits. Best solution?", | |
| "options": [ | |
| "Use simpler meta-learner and regularization", | |
| "Add more base learners", | |
| "Ignore cross-validation", | |
| "Train meta-learner on training predictions" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Simpler meta-learner with regularization prevents overfitting on limited out-of-fold predictions." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: Base learners include gradient boosting, random forest, and logistic regression. Stacking improves performance. Why?", | |
| "options": [ | |
| "Meta-learner exploits complementary predictions of heterogeneous models", | |
| "Stacking magically improves results", | |
| "Base learners are ignored", | |
| "Dataset size increases" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Diverse models capture different patterns, which meta-learner combines for better generalization." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: You want to stack deep learning models for regression. Best approach?", | |
| "options": [ | |
| "Use predicted outputs or features from penultimate layers as meta-learner input", | |
| "Raw images only", | |
| "Ignore base learners", | |
| "Average base model weights" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Using predictions or embeddings from deep models is standard for stacking to combine outputs effectively." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: Base learners are overfitting slightly. Meta-learner underfits. Recommendation?", | |
| "options": [ | |
| "Reduce base learner overfitting and slightly increase meta-learner capacity", | |
| "Ignore base learners", | |
| "Train meta-learner on test data", | |
| "Increase dataset size only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Balancing base and meta-learner capacities improves overall stacking performance." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: Stacking regression, meta-learner predicts negative values where base predictions are positive. Fix?", | |
| "options": [ | |
| "Check scaling and bias adjustments in meta-learner", | |
| "Ignore predictions", | |
| "Reduce base learners", | |
| "Use training predictions instead of out-of-fold" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner may require proper scaling or offset to combine base predictions correctly." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Meta-learner training time is extremely high. Possible solution?", | |
| "options": [ | |
| "Reduce number of base learners or use simpler meta-learner", | |
| "Increase base learner complexity", | |
| "Ignore training time", | |
| "Use training predictions directly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Simplifying the meta-learner or reducing base learners can significantly lower computation time." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: Stacking for imbalanced classification. Recommended approach?", | |
| "options": [ | |
| "Use probability outputs and apply class weighting or sampling strategies", | |
| "Ignore imbalance", | |
| "Train meta-learner on majority class only", | |
| "Use raw features directly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner can be trained with balanced inputs to handle imbalanced datasets effectively." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: Multiple base learners provide continuous outputs with different scales. What is recommended?", | |
| "options": [ | |
| "Normalize or standardize outputs before feeding into meta-learner", | |
| "Ignore scale differences", | |
| "Train meta-learner on raw values", | |
| "Use only one base learner" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner performs better when inputs are on comparable scales." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: Stacking with three classifiers, meta-learner predicts incorrectly on edge cases. Solution?", | |
| "options": [ | |
| "Use more diverse base learners or add engineered features", | |
| "Reduce base learner diversity", | |
| "Ignore predictions", | |
| "Train on training predictions only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Meta-learner can improve predictions on edge cases if base learners provide complementary information." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: You stack tree-based models with logistic regression meta-learner. Test RMSE is higher than best base learner. Likely cause?", | |
| "options": [ | |
| "Meta-learner overfitted or base predictions too correlated", | |
| "Stacking always reduces RMSE", | |
| "Dataset too large", | |
| "Meta-learner too simple" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Correlation among base learners or overfitting in meta-learner can degrade performance." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: Combining heterogeneous models via stacking for regression. Key considerations?", | |
| "options": [ | |
| "Diversity, proper meta-learner training, scaling of outputs", | |
| "Use identical base learners only", | |
| "Ignore cross-validation", | |
| "Increase number of base learners blindly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Effective stacking requires diverse base learners, out-of-fold meta-learner training, and proper scaling." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: Meta-learner underfits in a classification stacking task. Recommended action?", | |
| "options": [ | |
| "Increase meta-learner capacity slightly or add engineered features", | |
| "Reduce base learner diversity", | |
| "Ignore base learners", | |
| "Train meta-learner on training predictions" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A slightly more complex meta-learner can capture relationships between base learners’ outputs." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: Stacking regression ensemble shows overfitting. Which step should be prioritized?", | |
| "options": [ | |
| "Verify meta-learner uses out-of-fold predictions and apply regularization", | |
| "Add more base learners", | |
| "Ignore overfitting", | |
| "Train meta-learner on full training predictions" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Out-of-fold predictions and regularization are essential to prevent overfitting in stacking ensembles." | |
| } | |
| ] | |
| } | |