deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Gaussian Mixture Models (GMM) Mastery: 100 MCQs",
"description": "A comprehensive set of 100 MCQs covering Gaussian Mixture Models (GMM) from fundamental intuition to EM algorithm, applications, soft clustering, covariance types, and real-world scenarios.",
"questions": [
{
"id": 1,
"questionText": "What is the primary goal of a Gaussian Mixture Model (GMM)?",
"options": [
"To reduce data dimensionality",
"To perform supervised classification",
"To model data as a mixture of multiple Gaussian distributions",
"To perform hard clustering like K-Means"
],
"correctAnswerIndex": 2,
"explanation": "GMM assumes data is generated from a mixture of multiple Gaussian distributions."
},
{
"id": 2,
"questionText": "GMM is mainly used for:",
"options": [
"Unsupervised clustering",
"Time series forecasting",
"Supervised learning",
"Regression problems"
],
"correctAnswerIndex": 0,
"explanation": "GMM is an unsupervised clustering technique."
},
{
"id": 3,
"questionText": "GMM provides which type of clustering?",
"options": [
"Soft probabilistic clustering",
"Binary classification",
"Hard clustering",
"Feature selection"
],
"correctAnswerIndex": 0,
"explanation": "GMM assigns probability of belonging to each cluster."
},
{
"id": 4,
"questionText": "Which algorithm is commonly used to train GMM?",
"options": [
"Backpropagation",
"Expectation-Maximization (EM)",
"Gradient Descent",
"Genetic Algorithm"
],
"correctAnswerIndex": 1,
"explanation": "EM algorithm is used to estimate GMM parameters."
},
{
"id": 5,
"questionText": "In GMM, each Gaussian distribution is called a:",
"options": [
"Component",
"Kernel",
"Label",
"Loss function"
],
"correctAnswerIndex": 0,
"explanation": "Each Gaussian is a component of the mixture model."
},
{
"id": 6,
"questionText": "The output of GMM for each data point is:",
"options": [
"Single cluster label only",
"Probability distribution over all clusters",
"Binary classification output",
"Feature importance scores"
],
"correctAnswerIndex": 1,
"explanation": "GMM gives probability of belonging to each Gaussian cluster."
},
{
"id": 7,
"questionText": "GMM assumes that data in each cluster follows a:",
"options": [
"Uniform distribution",
"Poisson distribution",
"Exponential distribution",
"Gaussian distribution"
],
"correctAnswerIndex": 3,
"explanation": "Each cluster is modeled as a Normal (Gaussian) distribution."
},
{
"id": 8,
"questionText": "The number of Gaussian components in GMM must be:",
"options": [
"Always equal to number of features",
"Unlimited by default",
"Automatically detected always",
"Predefined manually in most implementations"
],
"correctAnswerIndex": 3,
"explanation": "We usually define number of components (k) before training."
},
{
"id": 9,
"questionText": "GMM is a generalization of which algorithm?",
"options": [
"Random Forest",
"K-Means",
"Naive Bayes",
"SVM"
],
"correctAnswerIndex": 1,
"explanation": "GMM is probabilistic extension of K-Means with soft assignments."
},
{
"id": 10,
"questionText": "Which of the following does GMM estimate?",
"options": [
"Mean only",
"Variance only",
"Only class probabilities",
"Mean and covariance of each Gaussian"
],
"correctAnswerIndex": 3,
"explanation": "GMM learns mean, covariance, and weight of each Gaussian."
},
{
"id": 11,
"questionText": "What does 'mixture' mean in GMM?",
"options": [
"Multiple datasets combined",
"Adding noise to data",
"Combination of several Gaussian probability distributions",
"Blending of supervised and unsupervised learning"
],
"correctAnswerIndex": 2,
"explanation": "GMM models data as sum of multiple Gaussian distributions."
},
{
"id": 12,
"questionText": "GMM can model clusters with:",
"options": [
"Only spherical shapes",
"Elliptical and varying density clusters",
"Only equal-sized circles",
"Linear boundaries only"
],
"correctAnswerIndex": 1,
"explanation": "GMM supports ellipsoidal clusters due to covariance matrix."
},
{
"id": 13,
"questionText": "Which type of covariance can GMM use?",
"options": [
"Only diagonal",
"Only identity matrix",
"Only full",
"Full, Diagonal, Tied, Spherical"
],
"correctAnswerIndex": 3,
"explanation": "GMM allows flexible covariance structure options."
},
{
"id": 14,
"questionText": "GMM is best suited when clusters are:",
"options": [
"Categorical only",
"Perfectly separated",
"Non-overlapping and spherical",
"Overlapping and elliptical"
],
"correctAnswerIndex": 3,
"explanation": "GMM is ideal for overlapping soft clusters."
},
{
"id": 15,
"questionText": "Which step assigns probability to each point belonging to a cluster in EM?",
"options": [
"Initialization step",
"Maximization step",
"Regularization step",
"Expectation step"
],
"correctAnswerIndex": 3,
"explanation": "E-step calculates responsibility of each Gaussian."
},
{
"id": 16,
"questionText": "Which step updates parameters of Gaussians in EM?",
"options": [
"Prediction step",
"Maximization step",
"Normalization step",
"Expectation step"
],
"correctAnswerIndex": 1,
"explanation": "M-step updates means, covariances, and weights."
},
{
"id": 17,
"questionText": "In GMM, mixing coefficients must:",
"options": [
"Sum to one",
"Be greater than one",
"Sum to zero",
"Be negative"
],
"correctAnswerIndex": 0,
"explanation": "Mixing weights represent probabilities → must sum to 1."
},
{
"id": 18,
"questionText": "What does a high responsibility value mean in GMM?",
"options": [
"Point is outlier",
"Point strongly belongs to that Gaussian",
"Cluster is ignored",
"Model has failed"
],
"correctAnswerIndex": 1,
"explanation": "High responsibility = high probability of belonging to that cluster."
},
{
"id": 19,
"questionText": "GMM belongs to which model category?",
"options": [
"Neural network",
"Discriminative model",
"Purely geometric model",
"Generative probabilistic model"
],
"correctAnswerIndex": 3,
"explanation": "GMM models probability distribution of data (generative)."
},
{
"id": 20,
"questionText": "GMM is useful in:",
"options": [
"Anomaly detection",
"Speaker recognition",
"Image segmentation",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "GMM widely used in real-world probabilistic applications."
},
{
"id": 21,
"questionText": "What does GMM do better than K-Means?",
"options": [
"Handle only linear separability",
"Ignore feature scale",
"Model overlapping probabilistic clusters",
"Assign hard labels only"
],
"correctAnswerIndex": 2,
"explanation": "GMM outperforms K-Means when clusters overlap."
},
{
"id": 22,
"questionText": "A drawback of GMM is:",
"options": [
"Requires predefined number of clusters",
"No probabilistic output",
"Only works with categorical data",
"Cannot handle continuous data"
],
"correctAnswerIndex": 0,
"explanation": "Number of components must be specified before fitting."
},
{
"id": 23,
"questionText": "What initialization is commonly used for GMM?",
"options": [
"K-Means centroids",
"Random labels",
"Bootstrap resampling",
"Gradient descent"
],
"correctAnswerIndex": 0,
"explanation": "K-Means is commonly used to initialize cluster means."
},
{
"id": 24,
"questionText": "GMM uses which principle to maximize likelihood?",
"options": [
"Gradient Descent",
"Expectation-Maximization",
"Dropout Regularization",
"Least Squares Minimization"
],
"correctAnswerIndex": 1,
"explanation": "EM is a likelihood-based optimization method."
},
{
"id": 25,
"questionText": "The final decision in GMM assigns a point to the cluster with:",
"options": [
"Highest variance",
"Minimum distance",
"Maximum probability (responsibility)",
"Random chance"
],
"correctAnswerIndex": 2,
"explanation": "Soft → hard label by selecting cluster with highest probability."
},
{
"id": 26,
"questionText": "What role does covariance play in GMM?",
"options": [
"Defines cluster shape and orientation",
"Sets learning rate",
"Controls number of clusters",
"Removes noise features"
],
"correctAnswerIndex": 0,
"explanation": "Covariance allows modeling elliptical and rotated clusters."
},
{
"id": 27,
"questionText": "What does GMM maximize during training?",
"options": [
"Sum of distances",
"Training accuracy",
"Entropy of clusters",
"Total log-likelihood of data"
],
"correctAnswerIndex": 3,
"explanation": "EM optimizes log-likelihood of observing the data."
},
{
"id": 28,
"questionText": "What happens if two Gaussian components overlap heavily?",
"options": [
"Clusters merge into one automatically",
"GMM switches to K-Means automatically",
"GMM handles it with soft probabilities",
"GMM fails immediately"
],
"correctAnswerIndex": 2,
"explanation": "GMM assigns probabilities to each cluster; overlap is handled naturally via soft assignments."
},
{
"id": 29,
"questionText": "Which metric can be used to choose the number of components in GMM?",
"options": [
"Accuracy",
"F1-score",
"Learning rate",
"AIC (Akaike Information Criterion)"
],
"correctAnswerIndex": 3,
"explanation": "AIC and BIC help balance model fit with complexity to select number of components."
},
{
"id": 30,
"questionText": "BIC in GMM is used to:",
"options": [
"Normalize probabilities",
"Estimate cluster assignments",
"Update mean and covariance",
"Select number of clusters considering model complexity"
],
"correctAnswerIndex": 3,
"explanation": "Bayesian Information Criterion penalizes overly complex models to avoid overfitting."
},
{
"id": 31,
"questionText": "Scenario: You have overlapping clusters in 2D data. Which approach is suitable?",
"options": [
"K-Means",
"DBSCAN with minPts=1",
"Hierarchical clustering",
"Gaussian Mixture Model"
],
"correctAnswerIndex": 3,
"explanation": "GMM handles overlapping clusters probabilistically, unlike K-Means which assigns hard labels."
},
{
"id": 32,
"questionText": "Which initialization can improve EM convergence?",
"options": [
"Using covariance as identity for all clusters",
"Setting all means to zero",
"Using K-Means centroids as initial means",
"Randomly choosing one data point"
],
"correctAnswerIndex": 2,
"explanation": "K-Means initialization provides better starting points for EM algorithm."
},
{
"id": 33,
"questionText": "Soft clustering means:",
"options": [
"Each point has a probability of belonging to multiple clusters",
"Clusters are linearly separable",
"Clusters have equal sizes",
"Each point is assigned only one cluster"
],
"correctAnswerIndex": 0,
"explanation": "Soft clustering assigns probabilities rather than hard labels."
},
{
"id": 34,
"questionText": "What does EM algorithm alternate between?",
"options": [
"Expectation (E-step) and Maximization (M-step)",
"Gradient descent and regularization",
"Probability normalization and prediction",
"Clustering and dimensionality reduction"
],
"correctAnswerIndex": 0,
"explanation": "EM alternates between computing responsibilities and updating parameters."
},
{
"id": 35,
"questionText": "Scenario: A GMM component has nearly zero weight after EM. Implication?",
"options": [
"Component is insignificant; may be removed",
"Covariance matrix is singular",
"Model is invalid",
"Training failed"
],
"correctAnswerIndex": 0,
"explanation": "Very low weight indicates component contributes little to data representation."
},
{
"id": 36,
"questionText": "Scenario: Covariance type set to 'spherical'. Effect?",
"options": [
"All clusters are circular with equal variance in all directions",
"EM cannot converge",
"Clusters can have arbitrary orientation",
"Covariance ignored"
],
"correctAnswerIndex": 0,
"explanation": "Spherical covariance assumes isotropic variance for each cluster."
},
{
"id": 37,
"questionText": "Scenario: Covariance type 'full' in GMM allows:",
"options": [
"One-dimensional data only",
"Elliptical clusters with arbitrary orientation",
"Only circular clusters",
"Clusters of equal size"
],
"correctAnswerIndex": 1,
"explanation": "Full covariance allows each cluster to have a unique covariance matrix."
},
{
"id": 38,
"questionText": "Scenario: High-dimensional data with GMM. Challenge?",
"options": [
"Covariance estimation becomes difficult",
"EM converges faster",
"Number of clusters reduces automatically",
"Probabilities become binary"
],
"correctAnswerIndex": 0,
"explanation": "Estimating full covariance in high dimensions is prone to overfitting."
},
{
"id": 39,
"questionText": "Scenario: Using diagonal covariance instead of full. Advantage?",
"options": [
"Reduces number of parameters, faster EM",
"EM fails automatically",
"Improves cluster overlap",
"Always increases accuracy"
],
"correctAnswerIndex": 0,
"explanation": "Diagonal covariance assumes feature independence, reducing parameters."
},
{
"id": 40,
"questionText": "Scenario: Two clusters have very close means. EM may:",
"options": [
"Fail to run",
"Merge clusters automatically",
"Assign probabilities reflecting overlap",
"Ignore one cluster"
],
"correctAnswerIndex": 2,
"explanation": "Soft assignments reflect uncertainty in overlapping regions."
},
{
"id": 41,
"questionText": "Scenario: EM initialized with random means may:",
"options": [
"Converge to local maxima",
"Merge clusters automatically",
"Fail to compute responsibilities",
"Always find global maximum"
],
"correctAnswerIndex": 0,
"explanation": "EM is sensitive to initialization; may converge to local optima."
},
{
"id": 42,
"questionText": "Scenario: GMM applied to anomaly detection. How?",
"options": [
"EM ignores outliers automatically",
"Points with low likelihood under model considered anomalies",
"Points assigned to smallest cluster are anomalies",
"Clusters removed, remaining points are anomalies"
],
"correctAnswerIndex": 1,
"explanation": "GMM can detect outliers by evaluating likelihood of each point."
},
{
"id": 43,
"questionText": "Scenario: Overfitting in GMM can occur when:",
"options": [
"Dataset is small but clusters well-separated",
"Covariance type is spherical",
"Initialization uses K-Means",
"Too many components relative to data size"
],
"correctAnswerIndex": 3,
"explanation": "Excessive components may fit noise rather than true structure."
},
{
"id": 44,
"questionText": "Scenario: Selecting number of components with BIC. Lower BIC means:",
"options": [
"EM failed",
"Overfitting",
"Better balance between fit and complexity",
"Worse model"
],
"correctAnswerIndex": 2,
"explanation": "Lower BIC indicates model explains data well without unnecessary complexity."
},
{
"id": 45,
"questionText": "Scenario: You normalize features before GMM. Benefit?",
"options": [
"EM converges slower",
"Number of components reduces",
"Prevents dominance by large-scale features",
"Covariance becomes singular"
],
"correctAnswerIndex": 2,
"explanation": "Feature scaling ensures all features contribute equally to Gaussian components."
},
{
"id": 46,
"questionText": "Scenario: You use too few components in GMM. Likely effect?",
"options": [
"Covariance becomes negative",
"Overfitting",
"EM fails to converge",
"Underfitting, poor representation of clusters"
],
"correctAnswerIndex": 3,
"explanation": "Too few Gaussians cannot capture underlying data structure."
},
{
"id": 47,
"questionText": "Scenario: Two clusters have different variances. Which GMM setting captures this?",
"options": [
"Tied covariance",
"Diagonal covariance only",
"Full covariance",
"Spherical covariance"
],
"correctAnswerIndex": 2,
"explanation": "Full covariance allows different shapes and orientations per cluster."
},
{
"id": 48,
"questionText": "Scenario: Real-world use of GMM in speaker recognition relies on:",
"options": [
"Only frequency features",
"Decision trees",
"Modeling probability distribution of feature vectors",
"Hard cluster labels"
],
"correctAnswerIndex": 2,
"explanation": "GMM models the distribution of features for each speaker."
},
{
"id": 49,
"questionText": "Scenario: Image segmentation with GMM. How?",
"options": [
"K-Means replaces EM",
"Pixels assigned randomly",
"Only grayscale images",
"Pixels assigned probabilistically to color clusters"
],
"correctAnswerIndex": 3,
"explanation": "GMM can segment images based on color probability distributions."
},
{
"id": 50,
"questionText": "Scenario: Overlapping Gaussian clusters, hard assignment used. Effect?",
"options": [
"Covariance becomes zero",
"EM improves accuracy",
"Soft assignment automatically applied",
"Information loss, may misclassify points"
],
"correctAnswerIndex": 3,
"explanation": "Hard labels ignore uncertainty and may misrepresent overlapping regions."
},
{
"id": 51,
"questionText": "Scenario: You have categorical features. GMM suitability?",
"options": [
"Perfectly suitable",
"Requires only diagonal covariance",
"Not ideal; GMM assumes continuous features",
"Number of components is irrelevant"
],
"correctAnswerIndex": 2,
"explanation": "GMM assumes continuous-valued features for Gaussian distributions."
},
{
"id": 52,
"questionText": "Scenario: EM converges slowly. Common solutions?",
"options": [
"Ignore convergence criteria",
"Better initialization, feature scaling, or fewer components",
"Increase number of iterations indefinitely",
"Switch to K-Means always"
],
"correctAnswerIndex": 1,
"explanation": "Good initialization and preprocessing improve EM efficiency."
},
{
"id": 53,
"questionText": "Scenario: GMM for anomaly detection in network traffic. Strategy?",
"options": [
"Ignore rare events",
"Hard assign all points to clusters",
"Flag low likelihood points as anomalies",
"Use K-Means to cluster anomalies"
],
"correctAnswerIndex": 2,
"explanation": "Points with low probability under the model are considered outliers."
},
{
"id": 54,
"questionText": "Scenario: EM oscillates between two solutions. Likely reason?",
"options": [
"Number of components too small",
"Poor initialization causing local maxima",
"Covariance matrix full",
"Using diagonal covariance"
],
"correctAnswerIndex": 1,
"explanation": "EM can get stuck in local maxima if initial parameters are suboptimal."
},
{
"id": 55,
"questionText": "Scenario: Soft clustering probability threshold used to assign points. Advantage?",
"options": [
"EM fails automatically",
"Always misclassifies clusters",
"Allows filtering uncertain points",
"Covariance ignored"
],
"correctAnswerIndex": 2,
"explanation": "Thresholding allows flexible assignment based on confidence."
},
{
"id": 56,
"questionText": "Scenario: Tied covariance for all components. Effect?",
"options": [
"All clusters share same shape/orientation",
"Covariance ignored",
"Number of components reduced automatically",
"EM cannot run"
],
"correctAnswerIndex": 0,
"explanation": "Tied covariance forces all Gaussians to share same covariance matrix."
},
{
"id": 57,
"questionText": "Scenario: GMM with diagonal covariance and correlated features. Effect?",
"options": [
"EM automatically switches to full",
"Perfect modeling",
"Covariance fails to compute",
"Model may be suboptimal due to ignored correlations"
],
"correctAnswerIndex": 3,
"explanation": "Diagonal covariance ignores feature correlations, which may reduce accuracy."
},
{
"id": 58,
"questionText": "Scenario: High-dimensional data, small sample size. Solution for GMM?",
"options": [
"Always full covariance",
"Increase number of components",
"Ignore dimension scaling",
"Use diagonal covariance or reduce dimensions with PCA"
],
"correctAnswerIndex": 3,
"explanation": "Reducing parameters via diagonal covariance or PCA helps prevent overfitting."
},
{
"id": 59,
"questionText": "Scenario: GMM applied on time-series data. Typical strategy?",
"options": [
"Switch to K-Means only",
"Model features extracted per time window",
"Use raw timestamps directly",
"Ignore temporal ordering"
],
"correctAnswerIndex": 1,
"explanation": "Features are extracted per window to apply GMM effectively."
},
{
"id": 60,
"questionText": "Scenario: You wish to compare two GMMs with different components. Metric?",
"options": [
"Silhouette score",
"Mean squared error",
"AIC/BIC",
"Accuracy"
],
"correctAnswerIndex": 2,
"explanation": "AIC/BIC compare likelihoods while penalizing complexity."
},
{
"id": 61,
"questionText": "Scenario: EM stops improving log-likelihood. Action?",
"options": [
"Reinitialize covariance",
"Converged; training can stop",
"Increase components automatically",
"Reduce number of iterations"
],
"correctAnswerIndex": 1,
"explanation": "Plateau in log-likelihood indicates convergence."
},
{
"id": 62,
"questionText": "Scenario: GMM applied on overlapping clusters. Which is true?",
"options": [
"Clusters must be separated manually",
"GMM fails completely",
"K-Means always better",
"Soft assignments handle ambiguity better than hard assignments"
],
"correctAnswerIndex": 3,
"explanation": "Soft probabilistic assignments capture uncertainty in overlapping regions."
},
{
"id": 63,
"questionText": "Scenario: EM converges to degenerate covariance. Likely cause?",
"options": [
"Full covariance required",
"Component collapsed to single data point",
"Too few iterations",
"Random initialization"
],
"correctAnswerIndex": 1,
"explanation": "A component may shrink variance toward zero, creating numerical issues."
},
{
"id": 64,
"questionText": "Scenario: Choosing between GMM and K-Means. Advantage of GMM?",
"options": [
"Always faster",
"Works only on spherical clusters",
"No parameters needed",
"Handles overlapping, probabilistic clusters"
],
"correctAnswerIndex": 3,
"explanation": "GMM can model overlap using probabilities, unlike K-Means."
},
{
"id": 65,
"questionText": "Scenario: GMM with full covariance. Drawback?",
"options": [
"EM does not converge",
"Cannot model elliptical clusters",
"Higher number of parameters; risk of overfitting",
"Soft assignment ignored"
],
"correctAnswerIndex": 2,
"explanation": "Full covariance requires estimating many parameters, sensitive to small datasets."
},
{
"id": 66,
"questionText": "Scenario: GMM for anomaly detection. Threshold selection?",
"options": [
"Use hard assignments only",
"Ignore low-probability points",
"Based on likelihood distribution of normal data",
"Random selection"
],
"correctAnswerIndex": 2,
"explanation": "Threshold is chosen based on typical likelihood values of normal data."
},
{
"id": 67,
"questionText": "Scenario: EM alternates but log-likelihood decreases. Cause?",
"options": [
"Soft assignments ignored",
"Convergence achieved",
"Number of components too low",
"Numerical instability or rounding errors"
],
"correctAnswerIndex": 3,
"explanation": "Numerical issues may cause slight decreases; regularization may help."
},
{
"id": 68,
"questionText": "Scenario: GMM on skewed data. Observation?",
"options": [
"Clusters automatically corrected",
"EM converges faster",
"Gaussian assumption may be violated",
"Covariance ignored"
],
"correctAnswerIndex": 2,
"explanation": "GMM assumes Gaussian distribution; skewed data may reduce accuracy."
},
{
"id": 69,
"questionText": "Scenario: Using GMM to compress data. How?",
"options": [
"Remove clusters randomly",
"Use only spherical covariance",
"Switch to K-Means",
"Represent each point by cluster responsibilities instead of raw features"
],
"correctAnswerIndex": 3,
"explanation": "Probabilities can serve as a compact representation of original features."
},
{
"id": 70,
"questionText": "Scenario: GMM applied to multimodal distribution. Advantage?",
"options": [
"Models multiple peaks naturally using several Gaussians",
"Covariance must be diagonal",
"Cannot handle multimodal data",
"Requires K-Means preprocessing"
],
"correctAnswerIndex": 0,
"explanation": "Multiple Gaussian components allow GMM to capture multimodal patterns."
},
{
"id": 71,
"questionText": "Scenario: You want to model customer segments with GMM. Best approach?",
"options": [
"Use K-Means only",
"Use soft clustering to capture overlapping preferences",
"Assign each customer randomly",
"Ignore continuous features"
],
"correctAnswerIndex": 1,
"explanation": "Soft clustering captures overlapping behavior patterns between segments."
},
{
"id": 72,
"questionText": "Scenario: Data has outliers. How does GMM handle them?",
"options": [
"EM fails automatically",
"Outliers dominate clusters",
"Outliers get low probabilities; may need special handling",
"Clusters merge to include outliers"
],
"correctAnswerIndex": 2,
"explanation": "Outliers have low likelihood under Gaussian components, reducing their impact."
},
{
"id": 73,
"questionText": "Scenario: Using EM, you notice very slow convergence. Possible fix?",
"options": [
"Switch to hierarchical clustering",
"Randomly assign clusters",
"Improve initialization, scale features, or reduce number of components",
"Increase iterations without changes"
],
"correctAnswerIndex": 2,
"explanation": "Good initialization and preprocessing improve EM efficiency."
},
{
"id": 74,
"questionText": "Scenario: You apply GMM to cluster text embeddings. Challenge?",
"options": [
"High-dimensionality may make full covariance unstable",
"GMM works perfectly without change",
"Soft assignments are ignored",
"Clusters must be one-dimensional"
],
"correctAnswerIndex": 0,
"explanation": "High-dimensional embeddings require dimensionality reduction or diagonal covariance."
},
{
"id": 75,
"questionText": "Scenario: You use GMM with tied covariance. Effect?",
"options": [
"EM fails automatically",
"Clusters become one",
"Each cluster has unique covariance",
"All clusters share same covariance matrix"
],
"correctAnswerIndex": 3,
"explanation": "Tied covariance enforces shared shape/orientation for all Gaussians."
},
{
"id": 76,
"questionText": "Scenario: GMM model seems overfitted. Possible reasons?",
"options": [
"Too many components or full covariance on small data",
"Diagonal covariance used",
"EM converged perfectly",
"Spherical covariance used"
],
"correctAnswerIndex": 0,
"explanation": "Overfitting occurs when model complexity exceeds data size."
},
{
"id": 77,
"questionText": "Scenario: You want interpretable clusters with GMM. Strategy?",
"options": [
"Soft assignments ignored",
"Use fewer components and diagonal covariance",
"Random initialization",
"Use full covariance and many components"
],
"correctAnswerIndex": 1,
"explanation": "Fewer components and simpler covariance improve interpretability."
},
{
"id": 78,
"questionText": "Scenario: You combine GMM with PCA. Purpose?",
"options": [
"Ignore low-variance features",
"Increase number of clusters",
"EM converges automatically",
"Reduce dimensionality to stabilize covariance estimation"
],
"correctAnswerIndex": 3,
"explanation": "PCA reduces features, improving parameter estimation in high-dimensional GMM."
},
{
"id": 79,
"questionText": "Scenario: GMM applied on customer churn probability. Approach?",
"options": [
"Use only binary labels",
"Switch to linear regression",
"Hard cluster and ignore overlap",
"Model feature distribution, assign probabilities to segments"
],
"correctAnswerIndex": 3,
"explanation": "GMM probabilistically models customer groups for better segmentation."
},
{
"id": 80,
"questionText": "Scenario: You notice EM stuck in local optimum. Solution?",
"options": [
"Use fewer components only",
"Increase iterations infinitely",
"Try multiple random initializations",
"Ignore convergence"
],
"correctAnswerIndex": 2,
"explanation": "Multiple initializations reduce chance of getting trapped in local maxima."
},
{
"id": 81,
"questionText": "Scenario: Using GMM for speaker verification. Why suitable?",
"options": [
"K-Means performs better",
"Captures probabilistic feature distributions per speaker",
"Covariance must be spherical",
"Hard assignments suffice"
],
"correctAnswerIndex": 1,
"explanation": "GMM models variability in speaker features effectively."
},
{
"id": 82,
"questionText": "Scenario: EM applied to data with small clusters. Challenge?",
"options": [
"Soft assignments fail",
"Small clusters may be ignored or collapse",
"Covariance ignored",
"EM always finds them"
],
"correctAnswerIndex": 1,
"explanation": "EM may assign negligible weight to very small clusters."
},
{
"id": 83,
"questionText": "Scenario: GMM used in anomaly detection for fraud. Key idea?",
"options": [
"Transactions are clustered randomly",
"High-probability transactions flagged",
"Transactions with low probability under model are flagged",
"EM ignores rare patterns"
],
"correctAnswerIndex": 2,
"explanation": "Low-likelihood points are potential anomalies."
},
{
"id": 84,
"questionText": "Scenario: GMM applied to multimodal sensor readings. Advantage?",
"options": [
"EM fails automatically",
"Single Gaussian suffices",
"Spherical covariance required",
"Multiple peaks captured by several Gaussian components"
],
"correctAnswerIndex": 3,
"explanation": "Multiple Gaussians allow modeling multimodal distributions."
},
{
"id": 85,
"questionText": "Scenario: You notice EM log-likelihood plateauing early. Interpretation?",
"options": [
"EM converged; model parameters stabilized",
"Covariance ignored",
"EM failed",
"Increase components immediately"
],
"correctAnswerIndex": 0,
"explanation": "Plateau indicates convergence of EM algorithm."
},
{
"id": 86,
"questionText": "Scenario: GMM with full covariance on small dataset. Risk?",
"options": [
"Overfitting due to too many parameters",
"EM fails automatically",
"Better modeling",
"Clusters ignored"
],
"correctAnswerIndex": 0,
"explanation": "Full covariance requires estimating many parameters, risky for small data."
},
{
"id": 87,
"questionText": "Scenario: GMM used for image segmentation. Key step?",
"options": [
"Only grayscale images allowed",
"K-Means replacement",
"Assign pixels probabilistically to color clusters",
"Ignore soft assignments"
],
"correctAnswerIndex": 2,
"explanation": "Soft clustering assigns pixels to Gaussian components representing colors."
},
{
"id": 88,
"questionText": "Scenario: GMM applied to text clustering with embeddings. Key step?",
"options": [
"Increase components arbitrarily",
"Soft assignments ignored",
"Use dimensionality reduction to stabilize covariance estimation",
"EM fails automatically"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensionality prevents overfitting in high-dimensional embeddings."
},
{
"id": 89,
"questionText": "Scenario: GMM with overlapping clusters. Hard labels used. Effect?",
"options": [
"Improves EM convergence",
"Loss of probabilistic information; misclassification possible",
"EM fails",
"Covariance ignored"
],
"correctAnswerIndex": 1,
"explanation": "Hard labels ignore uncertainty in overlapping regions."
},
{
"id": 90,
"questionText": "Scenario: EM for GMM shows component collapsing. Solution?",
"options": [
"Use diagonal covariance always",
"Reduce number of components only",
"Regularize covariance to prevent singularities",
"Ignore component"
],
"correctAnswerIndex": 2,
"explanation": "Regularization prevents variances from collapsing to zero."
},
{
"id": 91,
"questionText": "Scenario: Choosing GMM vs K-Means. Advantage?",
"options": [
"K-Means always faster",
"EM not required",
"GMM ignores probabilities",
"Soft assignment, handles overlap and ellipsoidal clusters"
],
"correctAnswerIndex": 3,
"explanation": "GMM models overlapping clusters with probabilistic assignments."
},
{
"id": 92,
"questionText": "Scenario: GMM applied for speech synthesis. Benefit?",
"options": [
"Clusters speakers only",
"Soft assignments ignored",
"Models probability distribution of acoustic features",
"Only spherical clusters allowed"
],
"correctAnswerIndex": 2,
"explanation": "GMM captures feature distributions needed for realistic speech synthesis."
},
{
"id": 93,
"questionText": "Scenario: EM fails to converge. Possible reasons?",
"options": [
"Too few iterations",
"Poor initialization, singular covariance, or incompatible data",
"Full covariance always fails",
"Soft assignments ignored"
],
"correctAnswerIndex": 1,
"explanation": "Bad initialization or degenerate covariances can prevent EM convergence."
},
{
"id": 94,
"questionText": "Scenario: You need probabilistic clustering on 2D sensor data. Choice?",
"options": [
"Hierarchical clustering",
"DBSCAN only",
"GMM with appropriate covariance type",
"K-Means only"
],
"correctAnswerIndex": 2,
"explanation": "GMM provides soft probabilistic clustering for continuous data."
},
{
"id": 95,
"questionText": "Scenario: GMM used for anomaly detection in machinery. How?",
"options": [
"Use hard assignments only",
"Cluster readings randomly",
"Flag low-likelihood sensor readings as anomalies",
"Ignore rare readings"
],
"correctAnswerIndex": 2,
"explanation": "Points that do not fit any Gaussian component well can indicate anomalies."
},
{
"id": 96,
"questionText": "Scenario: High-dimensional embeddings, GMM unstable. Solution?",
"options": [
"Increase components",
"Ignore scaling",
"Reduce dimensions with PCA or use diagonal covariance",
"Use full covariance only"
],
"correctAnswerIndex": 2,
"explanation": "Reducing parameters prevents overfitting and stabilizes EM."
},
{
"id": 97,
"questionText": "Scenario: GMM for multimodal customer behavior. Advantage?",
"options": [
"K-Means better",
"Multiple components capture different behavioral modes",
"Single Gaussian suffices",
"Covariance must be spherical"
],
"correctAnswerIndex": 1,
"explanation": "Different modes of behavior can be modeled with multiple Gaussians."
},
{
"id": 98,
"questionText": "Scenario: EM converges but log-likelihood decreases occasionally. Cause?",
"options": [
"Numerical instability; can use regularization",
"Covariance ignored",
"EM failed",
"Increase components"
],
"correctAnswerIndex": 0,
"explanation": "Small decreases may occur due to rounding errors; regularization helps."
},
{
"id": 99,
"questionText": "Scenario: Soft assignment threshold applied. Benefit?",
"options": [
"Filter uncertain points or highlight ambiguous memberships",
"EM fails automatically",
"Covariance ignored",
"Always misclassifies clusters"
],
"correctAnswerIndex": 0,
"explanation": "Thresholding helps decide which points are confidently assigned."
},
{
"id": 100,
"questionText": "Scenario: GMM applied in finance for risk clustering. Advantage?",
"options": [
"Requires categorical data only",
"Models probability distribution of different risk profiles",
"Soft assignment ignored",
"Clusters randomly"
],
"correctAnswerIndex": 1,
"explanation": "GMM provides probabilistic segmentation of customers or assets by risk levels."
}
]
}