| { | |
| "title": "Logistic Regression Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions designed to teach and test your understanding of Logistic Regression, from basic concepts to advanced topics like regularization, odds ratio, decision boundaries, and real-world scenario applications.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the main purpose of Logistic Regression?", | |
| "options": [ | |
| "To cluster data points", | |
| "To predict binary or categorical outcomes", | |
| "To reduce dimensionality", | |
| "To predict a continuous outcome" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Logistic Regression models the probability of binary or categorical outcomes, not continuous values." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Which function does Logistic Regression use to map predicted values to probabilities?", | |
| "options": [ | |
| "Sigmoid function", | |
| "ReLU function", | |
| "Step function", | |
| "Linear function" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "The sigmoid function maps any real-valued number into the range 0 to 1, representing probabilities." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "In Logistic Regression, what does the odds ratio represent?", | |
| "options": [ | |
| "The number of features in the model", | |
| "The error of the model", | |
| "The predicted probability itself", | |
| "The ratio of probability of success to failure" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Odds ratio = probability of success / probability of failure." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "What type of relationship does Logistic Regression model between input and output?", | |
| "options": [ | |
| "Non-linear relationship between input and output", | |
| "Linear relationship between input and output", | |
| "Linear relationship between input and probability log-odds", | |
| "Polynomial relationship" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Logistic Regression models the log-odds (logit) as a linear combination of inputs." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Which loss function is used in Logistic Regression?", | |
| "options": [ | |
| "Hinge Loss", | |
| "Absolute Error", | |
| "Mean Squared Error", | |
| "Cross-Entropy / Log Loss" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Logistic Regression uses log loss (cross-entropy) to penalize incorrect probabilistic predictions." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "Scenario: A dataset has highly imbalanced classes. What is a good approach in Logistic Regression?", | |
| "options": [ | |
| "Remove majority class", | |
| "Use class weights or resampling techniques", | |
| "Ignore imbalance and train directly", | |
| "Use Linear Regression instead" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Class imbalance can bias predictions; weighting or resampling helps model performance." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Which metric is most appropriate for evaluating Logistic Regression?", | |
| "options": [ | |
| "Mean Absolute Error", | |
| "Accuracy, Precision, Recall, F1-score", | |
| "R-squared", | |
| "Explained Variance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Classification metrics like accuracy, precision, recall, and F1-score are used for Logistic Regression." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Scenario: Logistic Regression model shows overfitting. Recommended solution?", | |
| "options": [ | |
| "Reduce dataset size", | |
| "Apply regularization (L1 or L2)", | |
| "Increase learning rate", | |
| "Remove all features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Regularization penalizes large coefficients, reducing overfitting." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Which regularization is used in Logistic Regression to encourage sparsity in coefficients?", | |
| "options": [ | |
| "L2 Regularization (Ridge)", | |
| "No regularization", | |
| "L1 Regularization (Lasso)", | |
| "ElasticNet only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "L1 regularization encourages some coefficients to become exactly zero, promoting sparsity." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Scenario: Logistic Regression is applied with two highly correlated features. Observation?", | |
| "options": [ | |
| "Features ignored automatically", | |
| "Model always underfits", | |
| "Multicollinearity can inflate coefficient variance", | |
| "Coefficients are exact" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Highly correlated features lead to unstable coefficients due to multicollinearity." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Which threshold is commonly used to convert probabilities into class predictions?", | |
| "options": [ | |
| "1.0", | |
| "Negative values", | |
| "0.25", | |
| "0.5" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "By default, probabilities ≥0.5 are classified as 1, below 0.5 as 0." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Scenario: Predicted probability = 0.8. What is predicted class (threshold=0.5)?", | |
| "options": [ | |
| "Depends on dataset", | |
| "Undetermined", | |
| "Class 1", | |
| "Class 0" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Probability >0.5 is classified as Class 1 by default." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Scenario: Logistic Regression applied to a dataset with 3 classes. Which extension is required?", | |
| "options": [ | |
| "Binary Logistic Regression", | |
| "Ridge Regression", | |
| "Linear Regression", | |
| "Multinomial Logistic Regression (Softmax)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Multinomial logistic regression generalizes to multi-class problems using softmax function." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "What does the coefficient in Logistic Regression represent?", | |
| "options": [ | |
| "Predicted probability", | |
| "Change in log-odds per unit change in feature", | |
| "Mean of feature", | |
| "Error term" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Each coefficient measures the impact of its feature on log-odds of the outcome." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Scenario: Logistic Regression is applied with L2 regularization. Observation?", | |
| "options": [ | |
| "Coefficients become exactly zero", | |
| "Model ignores features", | |
| "Training fails", | |
| "Coefficients shrink, helps reduce overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "L2 penalizes large coefficients, reducing variance without forcing zeros." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Which function converts log-odds to probability in Logistic Regression?", | |
| "options": [ | |
| "ReLU", | |
| "Linear", | |
| "Tanh", | |
| "Sigmoid" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Sigmoid maps log-odds to probability between 0 and 1." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Scenario: Dataset has 100 features, 10,000 samples. Regularization needed?", | |
| "options": [ | |
| "No, model will generalize automatically", | |
| "Yes, to prevent overfitting and reduce coefficient variance", | |
| "Remove samples instead", | |
| "Use only linear regression" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Regularization is important when features are many relative to samples to improve generalization." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Scenario: Logistic Regression shows poor recall for minority class. Solution?", | |
| "options": [ | |
| "Remove majority class", | |
| "Ignore minority class", | |
| "Increase learning rate", | |
| "Adjust decision threshold or use class weights" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Threshold adjustment or class weighting helps improve minority class prediction." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Which optimization method is commonly used to train Logistic Regression?", | |
| "options": [ | |
| "Random Forest", | |
| "Gradient Descent / Newton-Raphson", | |
| "PCA", | |
| "K-Means" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Gradient-based optimization (like gradient descent or Newton-Raphson) is used to minimize log-loss." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Scenario: Logistic Regression applied with perfect separation. Observation?", | |
| "options": [ | |
| "Model ignores features", | |
| "Coefficients can go to infinity; regularization needed", | |
| "Model works fine without issues", | |
| "Training error is high" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Perfect separation leads to extremely large coefficients; L1/L2 regularization stabilizes estimates." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Scenario: Logistic Regression applied to highly imbalanced dataset. Metric to monitor?", | |
| "options": [ | |
| "R-squared", | |
| "Precision, Recall, F1-score", | |
| "Explained Variance", | |
| "Mean Absolute Error" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Classification metrics like precision, recall, and F1 are more appropriate than regression metrics." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: Model predicts 0.49 for minority class with threshold=0.5. Observation?", | |
| "options": [ | |
| "Prediction invalid", | |
| "Class predicted as 0; threshold can be adjusted", | |
| "Model underfits", | |
| "Class predicted as 1" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Probability <0.5 leads to class 0; threshold adjustment can improve minority class recall." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Scenario: Logistic Regression with correlated inputs. Potential issue?", | |
| "options": [ | |
| "Model ignores correlated features automatically", | |
| "Multicollinearity inflates variance of coefficients", | |
| "Training fails", | |
| "Model underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Multicollinearity leads to unstable coefficient estimates." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Scenario: Logistic Regression used for spam email detection. What is the output?", | |
| "options": [ | |
| "Continuous score unrelated to probability", | |
| "Distance from origin", | |
| "Probability of spam", | |
| "Exact class label only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Logistic Regression outputs the probability of the positive class (spam)." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Scenario: Logistic Regression applied with L1 regularization. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model ignores features", | |
| "Some coefficients may become exactly zero, feature selection happens", | |
| "All coefficients increase" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "L1 regularization shrinks some coefficients to zero, effectively performing feature selection." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with outliers. Observation?", | |
| "options": [ | |
| "Model underfits", | |
| "Training fails", | |
| "Outliers have no effect", | |
| "Coefficients may be skewed by outliers" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Outliers can distort the logistic regression coefficients, affecting predictions." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: Logistic Regression with L2 regularization on small dataset. Observation?", | |
| "options": [ | |
| "Model ignores features", | |
| "Coefficients become exactly zero", | |
| "Coefficients shrink, improving generalization", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "L2 regularization penalizes large coefficients, stabilizing them for small datasets." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: Logistic Regression applied with highly correlated features. Observation?", | |
| "options": [ | |
| "Model underfits", | |
| "Model ignores correlated features automatically", | |
| "Training fails", | |
| "Multicollinearity inflates variance of coefficients" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Highly correlated features lead to unstable coefficient estimates, increasing variance." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: Logistic Regression used for credit default prediction. Output?", | |
| "options": [ | |
| "Distance from origin", | |
| "Exact class label only", | |
| "Continuous score unrelated to probability", | |
| "Probability of default" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "The model outputs probabilities, which can then be converted to class labels using a threshold." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: Logistic Regression trained with balanced class weights. Observation?", | |
| "options": [ | |
| "All probabilities are 0.5", | |
| "Model ignores minority class", | |
| "Minority class predictions improve", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Class weights balance the loss function, improving minority class prediction." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: Logistic Regression applied with feature scaling. Observation?", | |
| "options": [ | |
| "Scaling changes predicted classes", | |
| "Scaling reduces number of features", | |
| "Scaling is required to make model work", | |
| "Scaling helps optimization but does not affect model predictions" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Feature scaling speeds up convergence but does not change final probabilities." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: Logistic Regression applied with perfect separation. Observation?", | |
| "options": [ | |
| "Coefficients may become infinite", | |
| "Training fails automatically", | |
| "Model ignores features", | |
| "Model underfits" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Perfect separation leads to very large coefficients; regularization stabilizes estimates." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: Logistic Regression applied with threshold=0.7. Observation?", | |
| "options": [ | |
| "Predictions become stricter for positive class", | |
| "Model underfits", | |
| "Threshold does not affect predictions", | |
| "Predictions become more lenient" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Higher threshold means a higher probability is required to classify as positive, reducing false positives but increasing false negatives." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Scenario: Logistic Regression applied with L1 regularization. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "All coefficients increase", | |
| "Some coefficients shrink to zero", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "L1 encourages sparsity; some features are removed automatically." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: Logistic Regression applied to multiclass problem. Observation?", | |
| "options": [ | |
| "Binary logistic regression works fine", | |
| "Model ignores extra classes", | |
| "Use multinomial logistic regression with softmax", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Binary logistic regression cannot handle more than two classes without modification." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: Logistic Regression applied to imbalanced dataset. Observation?", | |
| "options": [ | |
| "Minority class predictions improve automatically", | |
| "Majority class dominates predictions", | |
| "Training fails", | |
| "Model ignores majority class" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Without adjustments, logistic regression may predict majority class most of the time." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: Logistic Regression applied with gradient descent optimizer. Observation?", | |
| "options": [ | |
| "Coefficients are updated iteratively to minimize log-loss", | |
| "Training fails", | |
| "Model overfits automatically", | |
| "Predictions remain constant" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Gradient descent iteratively updates weights to minimize cross-entropy loss." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with categorical features. Observation?", | |
| "options": [ | |
| "Model ignores categorical features", | |
| "Categorical features must be encoded (e.g., one-hot)", | |
| "Training fails", | |
| "Model handles categories directly" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Logistic Regression requires numeric input; categorical variables must be encoded." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: Logistic Regression applied with very few samples. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model ignores features", | |
| "Regularization is critical to prevent overfitting", | |
| "Model always underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets can lead to high variance; regularization helps stabilize coefficients." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: Logistic Regression applied to text classification using TF-IDF features. Observation?", | |
| "options": [ | |
| "Training error is zero", | |
| "Model fails automatically", | |
| "Model can handle high-dimensional sparse data with regularization", | |
| "Model ignores sparse features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "With regularization, logistic regression works well on high-dimensional sparse data like TF-IDF vectors." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Imputation required before training", | |
| "Model ignores missing values automatically", | |
| "Model underfits" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Logistic Regression cannot handle missing values directly; preprocessing like imputation is required." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: Logistic Regression applied with regularization strength very high. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model ignores features", | |
| "Coefficients increase automatically", | |
| "Coefficients shrink drastically, model may underfit" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "High regularization penalizes coefficients heavily, potentially underfitting the data." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Logistic Regression applied with learning rate too high. Observation?", | |
| "options": [ | |
| "Optimization may diverge", | |
| "Training fails silently", | |
| "Predictions remain perfect", | |
| "Model always converges" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Too high learning rate can make gradient descent overshoot, preventing convergence." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: Logistic Regression applied with L1 and L2 regularization combined. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "ElasticNet combines both, balancing sparsity and coefficient shrinkage", | |
| "All coefficients go to zero", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "ElasticNet uses a weighted combination of L1 and L2 to balance sparsity and shrinkage." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: Logistic Regression applied with non-linear patterns in features. Observation?", | |
| "options": [ | |
| "Model ignores non-linear features", | |
| "Training fails", | |
| "Model captures non-linearities automatically", | |
| "Linear decision boundary may underfit; feature engineering or polynomial expansion needed" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Logistic Regression assumes linear relationship between log-odds and features; non-linearities require feature transformations." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: Logistic Regression applied to probability output. Which method can calibrate probabilities?", | |
| "options": [ | |
| "StandardScaler", | |
| "PCA", | |
| "Platt Scaling or Isotonic Regression", | |
| "Ridge Regression" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Platt scaling or isotonic regression adjusts predicted probabilities for better calibration." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: Logistic Regression applied with many irrelevant features. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Regularization can reduce effect of irrelevant features", | |
| "Model ignores irrelevant features automatically", | |
| "Model overfits regardless" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Regularization reduces coefficients of uninformative features, improving generalization." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with 3 classes. Threshold method?", | |
| "options": [ | |
| "Model fails", | |
| "Softmax probabilities used for multiclass prediction", | |
| "Binary logistic regression applies", | |
| "Single threshold 0.5 used for all classes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Multinomial logistic regression uses softmax to handle multiple classes." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with skewed classes. Observation?", | |
| "options": [ | |
| "Minority class ignored automatically", | |
| "Class weighting or resampling improves minority prediction", | |
| "Model fails", | |
| "All predictions become majority class" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Class weighting or resampling is needed to handle skewed datasets effectively." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: Logistic Regression applied with early stopping during optimization. Observation?", | |
| "options": [ | |
| "Coefficients go to zero", | |
| "Model always underfits", | |
| "Prevents overfitting and reduces training time", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Early stopping halts training when improvement slows, helping avoid overfitting." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with nonlinear boundaries. Observation?", | |
| "options": [ | |
| "Model perfectly separates classes", | |
| "Training fails", | |
| "Model ignores features", | |
| "Linear decision boundary may underfit; feature engineering needed" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Logistic Regression assumes linear decision boundary on log-odds; nonlinear relationships require transformations." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: Logistic Regression applied to high-dimensional sparse data like text. Observation?", | |
| "options": [ | |
| "Training fails automatically", | |
| "Model ignores sparse features", | |
| "Model always underfits", | |
| "Regularization is essential to prevent overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "L1 or L2 regularization stabilizes coefficients in high-dimensional sparse datasets." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: Logistic Regression applied to highly imbalanced dataset. Best practice?", | |
| "options": [ | |
| "Increase learning rate", | |
| "Use class weighting or resampling techniques", | |
| "Ignore imbalance and train directly", | |
| "Remove minority class" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Adjusting for class imbalance helps improve minority class predictions." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: Logistic Regression model shows large coefficients for correlated features. Observation?", | |
| "options": [ | |
| "Model ignores correlated features automatically", | |
| "Coefficients are perfect", | |
| "Training fails", | |
| "Multicollinearity inflates variance of coefficients" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Highly correlated inputs can lead to unstable coefficient estimates." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Logistic Regression applied with threshold=0.3. Observation?", | |
| "options": [ | |
| "Threshold has no effect", | |
| "Predictions become stricter", | |
| "Model underfits", | |
| "Predictions become more lenient for positive class" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Lowering threshold increases positive predictions, improving recall but may reduce precision." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: Logistic Regression applied with very small L2 regularization. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model underfits automatically", | |
| "Coefficients shrink to zero", | |
| "Coefficients may be large, risk of overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Small regularization may allow large coefficients, increasing variance." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: Logistic Regression applied with L1 regularization. Observation?", | |
| "options": [ | |
| "Some coefficients shrink to zero, performing feature selection", | |
| "Training fails", | |
| "All coefficients increase", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "L1 regularization promotes sparsity, setting some coefficients exactly to zero." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: Logistic Regression applied to multiclass problem. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Binary logistic regression works fine", | |
| "Use multinomial logistic regression with softmax", | |
| "Model ignores extra classes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Binary logistic regression cannot handle more than two classes without modification." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with missing values. Observation?", | |
| "options": [ | |
| "Training fails automatically", | |
| "Model ignores missing values automatically", | |
| "Model underfits", | |
| "Imputation required before training" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Logistic Regression cannot handle missing values directly; preprocessing is needed." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: Logistic Regression applied to text classification with TF-IDF features. Observation?", | |
| "options": [ | |
| "Training error is zero", | |
| "Model fails automatically", | |
| "All sparse features are ignored", | |
| "Regularization prevents overfitting in high-dimensional sparse features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Regularization stabilizes coefficients and improves generalization on sparse datasets." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: Logistic Regression applied with gradient descent and large learning rate. Observation?", | |
| "options": [ | |
| "Model converges perfectly", | |
| "Optimization may diverge", | |
| "Model ignores features", | |
| "Predictions remain constant" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Too high learning rate can cause gradient descent to overshoot and fail to converge." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: Logistic Regression applied with perfect separation in classes. Observation?", | |
| "options": [ | |
| "Model underfits", | |
| "Training fails automatically", | |
| "Coefficients may become extremely large", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Perfect separation leads to very large coefficients; regularization helps stabilize the model." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: Logistic Regression applied with early stopping. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Coefficients go to zero", | |
| "Prevents overfitting and reduces training time", | |
| "Model always underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Early stopping halts training when loss improvement slows, improving generalization." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with skewed target. Observation?", | |
| "options": [ | |
| "Use class weights or resampling to balance predictions", | |
| "Minority class ignored automatically", | |
| "All predictions become majority class", | |
| "Model fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Adjusting for skewed targets helps prevent biased predictions toward majority class." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: Logistic Regression applied with categorical features. Observation?", | |
| "options": [ | |
| "Model ignores categorical features", | |
| "Training fails", | |
| "Categorical features must be encoded numerically", | |
| "Model handles categories automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Logistic Regression requires numeric input, so categories need encoding (e.g., one-hot)." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: Logistic Regression applied with too many irrelevant features. Observation?", | |
| "options": [ | |
| "Model ignores irrelevant features automatically", | |
| "Model overfits regardless", | |
| "Training fails", | |
| "Regularization reduces effect of irrelevant features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Regularization helps suppress coefficients of uninformative features." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: Logistic Regression applied with L1 and L2 combined. Observation?", | |
| "options": [ | |
| "ElasticNet balances sparsity and shrinkage", | |
| "All coefficients become zero", | |
| "Model ignores features", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "ElasticNet combines L1 and L2 penalties to balance feature selection and coefficient shrinkage." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: Logistic Regression applied with adjusted threshold for minority class. Observation?", | |
| "options": [ | |
| "Precision decreases automatically", | |
| "Recall of minority class improves", | |
| "All predictions become majority class", | |
| "Model fails" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Lowering threshold increases positive predictions, improving recall for minority class." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Logistic Regression applied with small dataset. Observation?", | |
| "options": [ | |
| "Model underfits automatically", | |
| "Training fails", | |
| "Regularization stabilizes coefficients and reduces variance", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small datasets are prone to overfitting; regularization improves generalization." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: Logistic Regression applied with non-linear feature transformations. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model ignores non-linear features", | |
| "Non-linear terms help model complex relationships", | |
| "Predictions remain linear" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Polynomial or interaction terms allow Logistic Regression to capture non-linear relationships." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: Logistic Regression applied with continuous target mistakenly. Observation?", | |
| "options": [ | |
| "Model ignores continuous targets", | |
| "Model works fine", | |
| "Training fails", | |
| "Logistic Regression is inappropriate; should use Linear Regression" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Logistic Regression predicts probabilities for categorical outcomes, not continuous values." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Logistic Regression applied with L2 regularization too strong. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model ignores features", | |
| "Coefficients increase automatically", | |
| "Model may underfit due to overly shrunk coefficients" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Excessive regularization reduces coefficient magnitude, potentially underfitting." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Logistic Regression applied to imbalanced multiclass problem. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Class weighting or resampling recommended for each class", | |
| "All predictions go to majority class", | |
| "Model ignores minority classes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Balanced weighting improves prediction performance for minority classes." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Logistic Regression applied with very high learning rate. Observation?", | |
| "options": [ | |
| "Gradient descent may diverge", | |
| "Model ignores features", | |
| "Predictions remain constant", | |
| "Model converges perfectly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Too high learning rate causes optimization to overshoot, preventing convergence." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: Logistic Regression applied with probability calibration methods. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Calibration has no effect", | |
| "Platt scaling or isotonic regression improves predicted probabilities", | |
| "Model ignores calibration" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Probability calibration aligns predicted probabilities with true outcomes, improving reliability." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Logistic Regression applied with small training data and no regularization. Observation?", | |
| "options": [ | |
| "Model may overfit due to high variance", | |
| "Training fails", | |
| "Model underfits automatically", | |
| "Model ignores features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small datasets can cause overfitting; regularization helps stabilize coefficients." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: Logistic Regression applied with a feature highly correlated with target. Observation?", | |
| "options": [ | |
| "Model ignores the feature", | |
| "Training fails", | |
| "Model coefficient will likely be significant", | |
| "Regularization removes feature automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Highly predictive features typically get larger coefficients, unless heavily regularized." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Logistic Regression applied with overcomplete features (more features than samples). Observation?", | |
| "options": [ | |
| "Training fails automatically", | |
| "Regularization is essential to prevent overfitting", | |
| "Model always underfits", | |
| "All features ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Too many features relative to samples increase overfitting risk; regularization stabilizes model." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: Logistic Regression applied with extreme class imbalance. Observation?", | |
| "options": [ | |
| "Minority class predictions improve automatically", | |
| "Predictions dominated by majority class without class weighting", | |
| "Training fails", | |
| "All probabilities become 0.5" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Without adjustments, the model predicts the majority class most of the time." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Logistic Regression applied to multiclass problem using one-vs-rest. Observation?", | |
| "options": [ | |
| "Binary logistic regression fails automatically", | |
| "Training fails", | |
| "Each class is treated as positive against all others", | |
| "Only majority class is predicted" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "One-vs-rest handles multiclass by training separate classifiers for each class." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: Logistic Regression applied with very high regularization. Observation?", | |
| "options": [ | |
| "Predictions become perfect", | |
| "Coefficients shrink too much; model may underfit", | |
| "Coefficients increase automatically", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Strong regularization reduces coefficient magnitude excessively, potentially underfitting." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: Logistic Regression applied with a categorical feature incorrectly encoded as ordinal. Observation?", | |
| "options": [ | |
| "Model ignores feature automatically", | |
| "Training fails", | |
| "Model may misinterpret ordering; predictions may be biased", | |
| "Predictions remain correct" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Ordinal encoding imposes an artificial order; one-hot encoding is better for nominal features." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Logistic Regression applied with overlapping class distributions. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model may have misclassifications; probabilities indicate uncertainty", | |
| "Model ignores overlapping features", | |
| "All predictions are correct" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Overlap leads to inherent classification errors; logistic regression outputs probability estimates reflecting uncertainty." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: Logistic Regression applied with threshold set very high (0.9). Observation?", | |
| "options": [ | |
| "Few positives predicted; recall decreases", | |
| "Training fails", | |
| "All predictions become positive", | |
| "Model underfits automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High threshold reduces positive predictions, improving precision but lowering recall." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: Logistic Regression applied with L1 regularization on sparse dataset. Observation?", | |
| "options": [ | |
| "All coefficients increase", | |
| "Model ignores sparse features", | |
| "Training fails automatically", | |
| "Some coefficients shrink to zero, performing feature selection" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "L1 encourages sparsity, zeroing out uninformative features." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Logistic Regression applied with feature scaling not applied. Observation?", | |
| "options": [ | |
| "Model fails automatically", | |
| "Optimization may be slower but predictions unaffected", | |
| "Predictions become invalid", | |
| "Coefficients ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Scaling affects optimization speed, not the final probability outputs." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: Logistic Regression applied with learning rate too low. Observation?", | |
| "options": [ | |
| "Predictions remain constant", | |
| "Training fails", | |
| "Model underfits automatically", | |
| "Convergence is slow but eventual solution correct" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Small learning rate slows gradient descent but does not prevent eventual convergence." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with multicollinearity. Observation?", | |
| "options": [ | |
| "Coefficients unstable; variance inflated", | |
| "Training fails", | |
| "Model ignores correlated features automatically", | |
| "Predictions unaffected" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High correlation among features inflates coefficient variance, making estimates unstable." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: Logistic Regression applied with probability calibration. Observation?", | |
| "options": [ | |
| "Platt scaling or isotonic regression improves probability estimates", | |
| "Calibration has no effect", | |
| "Training fails", | |
| "Model ignores calibration" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Probability calibration aligns predicted probabilities with actual outcomes." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: Logistic Regression applied with interaction terms added. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Model can capture combined effect of features", | |
| "Model ignores interactions", | |
| "Predictions become random" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Interaction terms allow logistic regression to model dependencies between features." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with outliers. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Predictions unaffected", | |
| "Model ignores outliers automatically", | |
| "Outliers may distort coefficients; regularization helps" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Outliers can skew estimates; regularization stabilizes coefficients." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: Logistic Regression applied with small sample size and large number of features. Observation?", | |
| "options": [ | |
| "Model underfits automatically", | |
| "High risk of overfitting; regularization essential", | |
| "Predictions remain perfect", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Many features relative to samples increase variance; regularization prevents overfitting." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Logistic Regression applied with multiclass softmax. Observation?", | |
| "options": [ | |
| "Model ignores extra classes", | |
| "Training fails", | |
| "Binary thresholding works automatically", | |
| "Softmax outputs probabilities for each class" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Softmax generalizes logistic regression to multiple classes, outputting probabilities." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: Logistic Regression applied with polynomial features. Observation?", | |
| "options": [ | |
| "Predictions remain linear", | |
| "Model ignores polynomial terms", | |
| "Training fails", | |
| "Can model non-linear relationships between features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Polynomial terms allow logistic regression to capture non-linear effects." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: Logistic Regression applied with overfitting on training set. Observation?", | |
| "options": [ | |
| "Training fails automatically", | |
| "Model ignores training data", | |
| "Predictions perfect on test set", | |
| "Apply regularization or reduce features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Regularization or feature selection reduces overfitting and improves generalization." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: Logistic Regression applied to dataset with skewed class distribution. Observation?", | |
| "options": [ | |
| "Training fails", | |
| "Predictions always majority class", | |
| "Use class weights or resampling", | |
| "Model ignores minority class automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Adjusting for imbalance improves minority class prediction performance." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: Logistic Regression applied with continuous predictors on very different scales. Observation?", | |
| "options": [ | |
| "Model fails automatically", | |
| "Training error zero", | |
| "Predictions invalid", | |
| "Scaling helps optimization; predictions unchanged" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Scaling speeds convergence but does not affect model predictions." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: Logistic Regression applied with threshold adjustment. Observation?", | |
| "options": [ | |
| "Threshold has no effect", | |
| "Changing threshold trades off precision and recall", | |
| "Training fails", | |
| "Predictions remain constant" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Adjusting threshold changes classification cutoff, affecting false positives and negatives." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: Logistic Regression applied with noisy data. Observation?", | |
| "options": [ | |
| "Noise is ignored automatically", | |
| "Model may misclassify; regularization improves stability", | |
| "Predictions perfect", | |
| "Training fails" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Noise affects coefficient estimation; regularization improves generalization." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: Logistic Regression applied with missing categorical features. Observation?", | |
| "options": [ | |
| "Model ignores missing categories automatically", | |
| "Training fails", | |
| "Predictions unaffected", | |
| "Imputation or encoding needed before training" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Missing categorical data must be imputed or encoded for logistic regression to work." | |
| } | |
| ] | |
| } | |