{ "title": "Gaussian Mixture Models (GMM) Mastery: 100 MCQs", "description": "A comprehensive set of 100 MCQs covering Gaussian Mixture Models (GMM) from fundamental intuition to EM algorithm, applications, soft clustering, covariance types, and real-world scenarios.", "questions": [ { "id": 1, "questionText": "What is the primary goal of a Gaussian Mixture Model (GMM)?", "options": [ "To reduce data dimensionality", "To perform supervised classification", "To model data as a mixture of multiple Gaussian distributions", "To perform hard clustering like K-Means" ], "correctAnswerIndex": 2, "explanation": "GMM assumes data is generated from a mixture of multiple Gaussian distributions." }, { "id": 2, "questionText": "GMM is mainly used for:", "options": [ "Unsupervised clustering", "Time series forecasting", "Supervised learning", "Regression problems" ], "correctAnswerIndex": 0, "explanation": "GMM is an unsupervised clustering technique." }, { "id": 3, "questionText": "GMM provides which type of clustering?", "options": [ "Soft probabilistic clustering", "Binary classification", "Hard clustering", "Feature selection" ], "correctAnswerIndex": 0, "explanation": "GMM assigns probability of belonging to each cluster." }, { "id": 4, "questionText": "Which algorithm is commonly used to train GMM?", "options": [ "Backpropagation", "Expectation-Maximization (EM)", "Gradient Descent", "Genetic Algorithm" ], "correctAnswerIndex": 1, "explanation": "EM algorithm is used to estimate GMM parameters." }, { "id": 5, "questionText": "In GMM, each Gaussian distribution is called a:", "options": [ "Component", "Kernel", "Label", "Loss function" ], "correctAnswerIndex": 0, "explanation": "Each Gaussian is a component of the mixture model." }, { "id": 6, "questionText": "The output of GMM for each data point is:", "options": [ "Single cluster label only", "Probability distribution over all clusters", "Binary classification output", "Feature importance scores" ], "correctAnswerIndex": 1, "explanation": "GMM gives probability of belonging to each Gaussian cluster." }, { "id": 7, "questionText": "GMM assumes that data in each cluster follows a:", "options": [ "Uniform distribution", "Poisson distribution", "Exponential distribution", "Gaussian distribution" ], "correctAnswerIndex": 3, "explanation": "Each cluster is modeled as a Normal (Gaussian) distribution." }, { "id": 8, "questionText": "The number of Gaussian components in GMM must be:", "options": [ "Always equal to number of features", "Unlimited by default", "Automatically detected always", "Predefined manually in most implementations" ], "correctAnswerIndex": 3, "explanation": "We usually define number of components (k) before training." }, { "id": 9, "questionText": "GMM is a generalization of which algorithm?", "options": [ "Random Forest", "K-Means", "Naive Bayes", "SVM" ], "correctAnswerIndex": 1, "explanation": "GMM is probabilistic extension of K-Means with soft assignments." }, { "id": 10, "questionText": "Which of the following does GMM estimate?", "options": [ "Mean only", "Variance only", "Only class probabilities", "Mean and covariance of each Gaussian" ], "correctAnswerIndex": 3, "explanation": "GMM learns mean, covariance, and weight of each Gaussian." }, { "id": 11, "questionText": "What does 'mixture' mean in GMM?", "options": [ "Multiple datasets combined", "Adding noise to data", "Combination of several Gaussian probability distributions", "Blending of supervised and unsupervised learning" ], "correctAnswerIndex": 2, "explanation": "GMM models data as sum of multiple Gaussian distributions." }, { "id": 12, "questionText": "GMM can model clusters with:", "options": [ "Only spherical shapes", "Elliptical and varying density clusters", "Only equal-sized circles", "Linear boundaries only" ], "correctAnswerIndex": 1, "explanation": "GMM supports ellipsoidal clusters due to covariance matrix." }, { "id": 13, "questionText": "Which type of covariance can GMM use?", "options": [ "Only diagonal", "Only identity matrix", "Only full", "Full, Diagonal, Tied, Spherical" ], "correctAnswerIndex": 3, "explanation": "GMM allows flexible covariance structure options." }, { "id": 14, "questionText": "GMM is best suited when clusters are:", "options": [ "Categorical only", "Perfectly separated", "Non-overlapping and spherical", "Overlapping and elliptical" ], "correctAnswerIndex": 3, "explanation": "GMM is ideal for overlapping soft clusters." }, { "id": 15, "questionText": "Which step assigns probability to each point belonging to a cluster in EM?", "options": [ "Initialization step", "Maximization step", "Regularization step", "Expectation step" ], "correctAnswerIndex": 3, "explanation": "E-step calculates responsibility of each Gaussian." }, { "id": 16, "questionText": "Which step updates parameters of Gaussians in EM?", "options": [ "Prediction step", "Maximization step", "Normalization step", "Expectation step" ], "correctAnswerIndex": 1, "explanation": "M-step updates means, covariances, and weights." }, { "id": 17, "questionText": "In GMM, mixing coefficients must:", "options": [ "Sum to one", "Be greater than one", "Sum to zero", "Be negative" ], "correctAnswerIndex": 0, "explanation": "Mixing weights represent probabilities → must sum to 1." }, { "id": 18, "questionText": "What does a high responsibility value mean in GMM?", "options": [ "Point is outlier", "Point strongly belongs to that Gaussian", "Cluster is ignored", "Model has failed" ], "correctAnswerIndex": 1, "explanation": "High responsibility = high probability of belonging to that cluster." }, { "id": 19, "questionText": "GMM belongs to which model category?", "options": [ "Neural network", "Discriminative model", "Purely geometric model", "Generative probabilistic model" ], "correctAnswerIndex": 3, "explanation": "GMM models probability distribution of data (generative)." }, { "id": 20, "questionText": "GMM is useful in:", "options": [ "Anomaly detection", "Speaker recognition", "Image segmentation", "All of the above" ], "correctAnswerIndex": 3, "explanation": "GMM widely used in real-world probabilistic applications." }, { "id": 21, "questionText": "What does GMM do better than K-Means?", "options": [ "Handle only linear separability", "Ignore feature scale", "Model overlapping probabilistic clusters", "Assign hard labels only" ], "correctAnswerIndex": 2, "explanation": "GMM outperforms K-Means when clusters overlap." }, { "id": 22, "questionText": "A drawback of GMM is:", "options": [ "Requires predefined number of clusters", "No probabilistic output", "Only works with categorical data", "Cannot handle continuous data" ], "correctAnswerIndex": 0, "explanation": "Number of components must be specified before fitting." }, { "id": 23, "questionText": "What initialization is commonly used for GMM?", "options": [ "K-Means centroids", "Random labels", "Bootstrap resampling", "Gradient descent" ], "correctAnswerIndex": 0, "explanation": "K-Means is commonly used to initialize cluster means." }, { "id": 24, "questionText": "GMM uses which principle to maximize likelihood?", "options": [ "Gradient Descent", "Expectation-Maximization", "Dropout Regularization", "Least Squares Minimization" ], "correctAnswerIndex": 1, "explanation": "EM is a likelihood-based optimization method." }, { "id": 25, "questionText": "The final decision in GMM assigns a point to the cluster with:", "options": [ "Highest variance", "Minimum distance", "Maximum probability (responsibility)", "Random chance" ], "correctAnswerIndex": 2, "explanation": "Soft → hard label by selecting cluster with highest probability." }, { "id": 26, "questionText": "What role does covariance play in GMM?", "options": [ "Defines cluster shape and orientation", "Sets learning rate", "Controls number of clusters", "Removes noise features" ], "correctAnswerIndex": 0, "explanation": "Covariance allows modeling elliptical and rotated clusters." }, { "id": 27, "questionText": "What does GMM maximize during training?", "options": [ "Sum of distances", "Training accuracy", "Entropy of clusters", "Total log-likelihood of data" ], "correctAnswerIndex": 3, "explanation": "EM optimizes log-likelihood of observing the data." }, { "id": 28, "questionText": "What happens if two Gaussian components overlap heavily?", "options": [ "Clusters merge into one automatically", "GMM switches to K-Means automatically", "GMM handles it with soft probabilities", "GMM fails immediately" ], "correctAnswerIndex": 2, "explanation": "GMM assigns probabilities to each cluster; overlap is handled naturally via soft assignments." }, { "id": 29, "questionText": "Which metric can be used to choose the number of components in GMM?", "options": [ "Accuracy", "F1-score", "Learning rate", "AIC (Akaike Information Criterion)" ], "correctAnswerIndex": 3, "explanation": "AIC and BIC help balance model fit with complexity to select number of components." }, { "id": 30, "questionText": "BIC in GMM is used to:", "options": [ "Normalize probabilities", "Estimate cluster assignments", "Update mean and covariance", "Select number of clusters considering model complexity" ], "correctAnswerIndex": 3, "explanation": "Bayesian Information Criterion penalizes overly complex models to avoid overfitting." }, { "id": 31, "questionText": "Scenario: You have overlapping clusters in 2D data. Which approach is suitable?", "options": [ "K-Means", "DBSCAN with minPts=1", "Hierarchical clustering", "Gaussian Mixture Model" ], "correctAnswerIndex": 3, "explanation": "GMM handles overlapping clusters probabilistically, unlike K-Means which assigns hard labels." }, { "id": 32, "questionText": "Which initialization can improve EM convergence?", "options": [ "Using covariance as identity for all clusters", "Setting all means to zero", "Using K-Means centroids as initial means", "Randomly choosing one data point" ], "correctAnswerIndex": 2, "explanation": "K-Means initialization provides better starting points for EM algorithm." }, { "id": 33, "questionText": "Soft clustering means:", "options": [ "Each point has a probability of belonging to multiple clusters", "Clusters are linearly separable", "Clusters have equal sizes", "Each point is assigned only one cluster" ], "correctAnswerIndex": 0, "explanation": "Soft clustering assigns probabilities rather than hard labels." }, { "id": 34, "questionText": "What does EM algorithm alternate between?", "options": [ "Expectation (E-step) and Maximization (M-step)", "Gradient descent and regularization", "Probability normalization and prediction", "Clustering and dimensionality reduction" ], "correctAnswerIndex": 0, "explanation": "EM alternates between computing responsibilities and updating parameters." }, { "id": 35, "questionText": "Scenario: A GMM component has nearly zero weight after EM. Implication?", "options": [ "Component is insignificant; may be removed", "Covariance matrix is singular", "Model is invalid", "Training failed" ], "correctAnswerIndex": 0, "explanation": "Very low weight indicates component contributes little to data representation." }, { "id": 36, "questionText": "Scenario: Covariance type set to 'spherical'. Effect?", "options": [ "All clusters are circular with equal variance in all directions", "EM cannot converge", "Clusters can have arbitrary orientation", "Covariance ignored" ], "correctAnswerIndex": 0, "explanation": "Spherical covariance assumes isotropic variance for each cluster." }, { "id": 37, "questionText": "Scenario: Covariance type 'full' in GMM allows:", "options": [ "One-dimensional data only", "Elliptical clusters with arbitrary orientation", "Only circular clusters", "Clusters of equal size" ], "correctAnswerIndex": 1, "explanation": "Full covariance allows each cluster to have a unique covariance matrix." }, { "id": 38, "questionText": "Scenario: High-dimensional data with GMM. Challenge?", "options": [ "Covariance estimation becomes difficult", "EM converges faster", "Number of clusters reduces automatically", "Probabilities become binary" ], "correctAnswerIndex": 0, "explanation": "Estimating full covariance in high dimensions is prone to overfitting." }, { "id": 39, "questionText": "Scenario: Using diagonal covariance instead of full. Advantage?", "options": [ "Reduces number of parameters, faster EM", "EM fails automatically", "Improves cluster overlap", "Always increases accuracy" ], "correctAnswerIndex": 0, "explanation": "Diagonal covariance assumes feature independence, reducing parameters." }, { "id": 40, "questionText": "Scenario: Two clusters have very close means. EM may:", "options": [ "Fail to run", "Merge clusters automatically", "Assign probabilities reflecting overlap", "Ignore one cluster" ], "correctAnswerIndex": 2, "explanation": "Soft assignments reflect uncertainty in overlapping regions." }, { "id": 41, "questionText": "Scenario: EM initialized with random means may:", "options": [ "Converge to local maxima", "Merge clusters automatically", "Fail to compute responsibilities", "Always find global maximum" ], "correctAnswerIndex": 0, "explanation": "EM is sensitive to initialization; may converge to local optima." }, { "id": 42, "questionText": "Scenario: GMM applied to anomaly detection. How?", "options": [ "EM ignores outliers automatically", "Points with low likelihood under model considered anomalies", "Points assigned to smallest cluster are anomalies", "Clusters removed, remaining points are anomalies" ], "correctAnswerIndex": 1, "explanation": "GMM can detect outliers by evaluating likelihood of each point." }, { "id": 43, "questionText": "Scenario: Overfitting in GMM can occur when:", "options": [ "Dataset is small but clusters well-separated", "Covariance type is spherical", "Initialization uses K-Means", "Too many components relative to data size" ], "correctAnswerIndex": 3, "explanation": "Excessive components may fit noise rather than true structure." }, { "id": 44, "questionText": "Scenario: Selecting number of components with BIC. Lower BIC means:", "options": [ "EM failed", "Overfitting", "Better balance between fit and complexity", "Worse model" ], "correctAnswerIndex": 2, "explanation": "Lower BIC indicates model explains data well without unnecessary complexity." }, { "id": 45, "questionText": "Scenario: You normalize features before GMM. Benefit?", "options": [ "EM converges slower", "Number of components reduces", "Prevents dominance by large-scale features", "Covariance becomes singular" ], "correctAnswerIndex": 2, "explanation": "Feature scaling ensures all features contribute equally to Gaussian components." }, { "id": 46, "questionText": "Scenario: You use too few components in GMM. Likely effect?", "options": [ "Covariance becomes negative", "Overfitting", "EM fails to converge", "Underfitting, poor representation of clusters" ], "correctAnswerIndex": 3, "explanation": "Too few Gaussians cannot capture underlying data structure." }, { "id": 47, "questionText": "Scenario: Two clusters have different variances. Which GMM setting captures this?", "options": [ "Tied covariance", "Diagonal covariance only", "Full covariance", "Spherical covariance" ], "correctAnswerIndex": 2, "explanation": "Full covariance allows different shapes and orientations per cluster." }, { "id": 48, "questionText": "Scenario: Real-world use of GMM in speaker recognition relies on:", "options": [ "Only frequency features", "Decision trees", "Modeling probability distribution of feature vectors", "Hard cluster labels" ], "correctAnswerIndex": 2, "explanation": "GMM models the distribution of features for each speaker." }, { "id": 49, "questionText": "Scenario: Image segmentation with GMM. How?", "options": [ "K-Means replaces EM", "Pixels assigned randomly", "Only grayscale images", "Pixels assigned probabilistically to color clusters" ], "correctAnswerIndex": 3, "explanation": "GMM can segment images based on color probability distributions." }, { "id": 50, "questionText": "Scenario: Overlapping Gaussian clusters, hard assignment used. Effect?", "options": [ "Covariance becomes zero", "EM improves accuracy", "Soft assignment automatically applied", "Information loss, may misclassify points" ], "correctAnswerIndex": 3, "explanation": "Hard labels ignore uncertainty and may misrepresent overlapping regions." }, { "id": 51, "questionText": "Scenario: You have categorical features. GMM suitability?", "options": [ "Perfectly suitable", "Requires only diagonal covariance", "Not ideal; GMM assumes continuous features", "Number of components is irrelevant" ], "correctAnswerIndex": 2, "explanation": "GMM assumes continuous-valued features for Gaussian distributions." }, { "id": 52, "questionText": "Scenario: EM converges slowly. Common solutions?", "options": [ "Ignore convergence criteria", "Better initialization, feature scaling, or fewer components", "Increase number of iterations indefinitely", "Switch to K-Means always" ], "correctAnswerIndex": 1, "explanation": "Good initialization and preprocessing improve EM efficiency." }, { "id": 53, "questionText": "Scenario: GMM for anomaly detection in network traffic. Strategy?", "options": [ "Ignore rare events", "Hard assign all points to clusters", "Flag low likelihood points as anomalies", "Use K-Means to cluster anomalies" ], "correctAnswerIndex": 2, "explanation": "Points with low probability under the model are considered outliers." }, { "id": 54, "questionText": "Scenario: EM oscillates between two solutions. Likely reason?", "options": [ "Number of components too small", "Poor initialization causing local maxima", "Covariance matrix full", "Using diagonal covariance" ], "correctAnswerIndex": 1, "explanation": "EM can get stuck in local maxima if initial parameters are suboptimal." }, { "id": 55, "questionText": "Scenario: Soft clustering probability threshold used to assign points. Advantage?", "options": [ "EM fails automatically", "Always misclassifies clusters", "Allows filtering uncertain points", "Covariance ignored" ], "correctAnswerIndex": 2, "explanation": "Thresholding allows flexible assignment based on confidence." }, { "id": 56, "questionText": "Scenario: Tied covariance for all components. Effect?", "options": [ "All clusters share same shape/orientation", "Covariance ignored", "Number of components reduced automatically", "EM cannot run" ], "correctAnswerIndex": 0, "explanation": "Tied covariance forces all Gaussians to share same covariance matrix." }, { "id": 57, "questionText": "Scenario: GMM with diagonal covariance and correlated features. Effect?", "options": [ "EM automatically switches to full", "Perfect modeling", "Covariance fails to compute", "Model may be suboptimal due to ignored correlations" ], "correctAnswerIndex": 3, "explanation": "Diagonal covariance ignores feature correlations, which may reduce accuracy." }, { "id": 58, "questionText": "Scenario: High-dimensional data, small sample size. Solution for GMM?", "options": [ "Always full covariance", "Increase number of components", "Ignore dimension scaling", "Use diagonal covariance or reduce dimensions with PCA" ], "correctAnswerIndex": 3, "explanation": "Reducing parameters via diagonal covariance or PCA helps prevent overfitting." }, { "id": 59, "questionText": "Scenario: GMM applied on time-series data. Typical strategy?", "options": [ "Switch to K-Means only", "Model features extracted per time window", "Use raw timestamps directly", "Ignore temporal ordering" ], "correctAnswerIndex": 1, "explanation": "Features are extracted per window to apply GMM effectively." }, { "id": 60, "questionText": "Scenario: You wish to compare two GMMs with different components. Metric?", "options": [ "Silhouette score", "Mean squared error", "AIC/BIC", "Accuracy" ], "correctAnswerIndex": 2, "explanation": "AIC/BIC compare likelihoods while penalizing complexity." }, { "id": 61, "questionText": "Scenario: EM stops improving log-likelihood. Action?", "options": [ "Reinitialize covariance", "Converged; training can stop", "Increase components automatically", "Reduce number of iterations" ], "correctAnswerIndex": 1, "explanation": "Plateau in log-likelihood indicates convergence." }, { "id": 62, "questionText": "Scenario: GMM applied on overlapping clusters. Which is true?", "options": [ "Clusters must be separated manually", "GMM fails completely", "K-Means always better", "Soft assignments handle ambiguity better than hard assignments" ], "correctAnswerIndex": 3, "explanation": "Soft probabilistic assignments capture uncertainty in overlapping regions." }, { "id": 63, "questionText": "Scenario: EM converges to degenerate covariance. Likely cause?", "options": [ "Full covariance required", "Component collapsed to single data point", "Too few iterations", "Random initialization" ], "correctAnswerIndex": 1, "explanation": "A component may shrink variance toward zero, creating numerical issues." }, { "id": 64, "questionText": "Scenario: Choosing between GMM and K-Means. Advantage of GMM?", "options": [ "Always faster", "Works only on spherical clusters", "No parameters needed", "Handles overlapping, probabilistic clusters" ], "correctAnswerIndex": 3, "explanation": "GMM can model overlap using probabilities, unlike K-Means." }, { "id": 65, "questionText": "Scenario: GMM with full covariance. Drawback?", "options": [ "EM does not converge", "Cannot model elliptical clusters", "Higher number of parameters; risk of overfitting", "Soft assignment ignored" ], "correctAnswerIndex": 2, "explanation": "Full covariance requires estimating many parameters, sensitive to small datasets." }, { "id": 66, "questionText": "Scenario: GMM for anomaly detection. Threshold selection?", "options": [ "Use hard assignments only", "Ignore low-probability points", "Based on likelihood distribution of normal data", "Random selection" ], "correctAnswerIndex": 2, "explanation": "Threshold is chosen based on typical likelihood values of normal data." }, { "id": 67, "questionText": "Scenario: EM alternates but log-likelihood decreases. Cause?", "options": [ "Soft assignments ignored", "Convergence achieved", "Number of components too low", "Numerical instability or rounding errors" ], "correctAnswerIndex": 3, "explanation": "Numerical issues may cause slight decreases; regularization may help." }, { "id": 68, "questionText": "Scenario: GMM on skewed data. Observation?", "options": [ "Clusters automatically corrected", "EM converges faster", "Gaussian assumption may be violated", "Covariance ignored" ], "correctAnswerIndex": 2, "explanation": "GMM assumes Gaussian distribution; skewed data may reduce accuracy." }, { "id": 69, "questionText": "Scenario: Using GMM to compress data. How?", "options": [ "Remove clusters randomly", "Use only spherical covariance", "Switch to K-Means", "Represent each point by cluster responsibilities instead of raw features" ], "correctAnswerIndex": 3, "explanation": "Probabilities can serve as a compact representation of original features." }, { "id": 70, "questionText": "Scenario: GMM applied to multimodal distribution. Advantage?", "options": [ "Models multiple peaks naturally using several Gaussians", "Covariance must be diagonal", "Cannot handle multimodal data", "Requires K-Means preprocessing" ], "correctAnswerIndex": 0, "explanation": "Multiple Gaussian components allow GMM to capture multimodal patterns." }, { "id": 71, "questionText": "Scenario: You want to model customer segments with GMM. Best approach?", "options": [ "Use K-Means only", "Use soft clustering to capture overlapping preferences", "Assign each customer randomly", "Ignore continuous features" ], "correctAnswerIndex": 1, "explanation": "Soft clustering captures overlapping behavior patterns between segments." }, { "id": 72, "questionText": "Scenario: Data has outliers. How does GMM handle them?", "options": [ "EM fails automatically", "Outliers dominate clusters", "Outliers get low probabilities; may need special handling", "Clusters merge to include outliers" ], "correctAnswerIndex": 2, "explanation": "Outliers have low likelihood under Gaussian components, reducing their impact." }, { "id": 73, "questionText": "Scenario: Using EM, you notice very slow convergence. Possible fix?", "options": [ "Switch to hierarchical clustering", "Randomly assign clusters", "Improve initialization, scale features, or reduce number of components", "Increase iterations without changes" ], "correctAnswerIndex": 2, "explanation": "Good initialization and preprocessing improve EM efficiency." }, { "id": 74, "questionText": "Scenario: You apply GMM to cluster text embeddings. Challenge?", "options": [ "High-dimensionality may make full covariance unstable", "GMM works perfectly without change", "Soft assignments are ignored", "Clusters must be one-dimensional" ], "correctAnswerIndex": 0, "explanation": "High-dimensional embeddings require dimensionality reduction or diagonal covariance." }, { "id": 75, "questionText": "Scenario: You use GMM with tied covariance. Effect?", "options": [ "EM fails automatically", "Clusters become one", "Each cluster has unique covariance", "All clusters share same covariance matrix" ], "correctAnswerIndex": 3, "explanation": "Tied covariance enforces shared shape/orientation for all Gaussians." }, { "id": 76, "questionText": "Scenario: GMM model seems overfitted. Possible reasons?", "options": [ "Too many components or full covariance on small data", "Diagonal covariance used", "EM converged perfectly", "Spherical covariance used" ], "correctAnswerIndex": 0, "explanation": "Overfitting occurs when model complexity exceeds data size." }, { "id": 77, "questionText": "Scenario: You want interpretable clusters with GMM. Strategy?", "options": [ "Soft assignments ignored", "Use fewer components and diagonal covariance", "Random initialization", "Use full covariance and many components" ], "correctAnswerIndex": 1, "explanation": "Fewer components and simpler covariance improve interpretability." }, { "id": 78, "questionText": "Scenario: You combine GMM with PCA. Purpose?", "options": [ "Ignore low-variance features", "Increase number of clusters", "EM converges automatically", "Reduce dimensionality to stabilize covariance estimation" ], "correctAnswerIndex": 3, "explanation": "PCA reduces features, improving parameter estimation in high-dimensional GMM." }, { "id": 79, "questionText": "Scenario: GMM applied on customer churn probability. Approach?", "options": [ "Use only binary labels", "Switch to linear regression", "Hard cluster and ignore overlap", "Model feature distribution, assign probabilities to segments" ], "correctAnswerIndex": 3, "explanation": "GMM probabilistically models customer groups for better segmentation." }, { "id": 80, "questionText": "Scenario: You notice EM stuck in local optimum. Solution?", "options": [ "Use fewer components only", "Increase iterations infinitely", "Try multiple random initializations", "Ignore convergence" ], "correctAnswerIndex": 2, "explanation": "Multiple initializations reduce chance of getting trapped in local maxima." }, { "id": 81, "questionText": "Scenario: Using GMM for speaker verification. Why suitable?", "options": [ "K-Means performs better", "Captures probabilistic feature distributions per speaker", "Covariance must be spherical", "Hard assignments suffice" ], "correctAnswerIndex": 1, "explanation": "GMM models variability in speaker features effectively." }, { "id": 82, "questionText": "Scenario: EM applied to data with small clusters. Challenge?", "options": [ "Soft assignments fail", "Small clusters may be ignored or collapse", "Covariance ignored", "EM always finds them" ], "correctAnswerIndex": 1, "explanation": "EM may assign negligible weight to very small clusters." }, { "id": 83, "questionText": "Scenario: GMM used in anomaly detection for fraud. Key idea?", "options": [ "Transactions are clustered randomly", "High-probability transactions flagged", "Transactions with low probability under model are flagged", "EM ignores rare patterns" ], "correctAnswerIndex": 2, "explanation": "Low-likelihood points are potential anomalies." }, { "id": 84, "questionText": "Scenario: GMM applied to multimodal sensor readings. Advantage?", "options": [ "EM fails automatically", "Single Gaussian suffices", "Spherical covariance required", "Multiple peaks captured by several Gaussian components" ], "correctAnswerIndex": 3, "explanation": "Multiple Gaussians allow modeling multimodal distributions." }, { "id": 85, "questionText": "Scenario: You notice EM log-likelihood plateauing early. Interpretation?", "options": [ "EM converged; model parameters stabilized", "Covariance ignored", "EM failed", "Increase components immediately" ], "correctAnswerIndex": 0, "explanation": "Plateau indicates convergence of EM algorithm." }, { "id": 86, "questionText": "Scenario: GMM with full covariance on small dataset. Risk?", "options": [ "Overfitting due to too many parameters", "EM fails automatically", "Better modeling", "Clusters ignored" ], "correctAnswerIndex": 0, "explanation": "Full covariance requires estimating many parameters, risky for small data." }, { "id": 87, "questionText": "Scenario: GMM used for image segmentation. Key step?", "options": [ "Only grayscale images allowed", "K-Means replacement", "Assign pixels probabilistically to color clusters", "Ignore soft assignments" ], "correctAnswerIndex": 2, "explanation": "Soft clustering assigns pixels to Gaussian components representing colors." }, { "id": 88, "questionText": "Scenario: GMM applied to text clustering with embeddings. Key step?", "options": [ "Increase components arbitrarily", "Soft assignments ignored", "Use dimensionality reduction to stabilize covariance estimation", "EM fails automatically" ], "correctAnswerIndex": 2, "explanation": "Reducing dimensionality prevents overfitting in high-dimensional embeddings." }, { "id": 89, "questionText": "Scenario: GMM with overlapping clusters. Hard labels used. Effect?", "options": [ "Improves EM convergence", "Loss of probabilistic information; misclassification possible", "EM fails", "Covariance ignored" ], "correctAnswerIndex": 1, "explanation": "Hard labels ignore uncertainty in overlapping regions." }, { "id": 90, "questionText": "Scenario: EM for GMM shows component collapsing. Solution?", "options": [ "Use diagonal covariance always", "Reduce number of components only", "Regularize covariance to prevent singularities", "Ignore component" ], "correctAnswerIndex": 2, "explanation": "Regularization prevents variances from collapsing to zero." }, { "id": 91, "questionText": "Scenario: Choosing GMM vs K-Means. Advantage?", "options": [ "K-Means always faster", "EM not required", "GMM ignores probabilities", "Soft assignment, handles overlap and ellipsoidal clusters" ], "correctAnswerIndex": 3, "explanation": "GMM models overlapping clusters with probabilistic assignments." }, { "id": 92, "questionText": "Scenario: GMM applied for speech synthesis. Benefit?", "options": [ "Clusters speakers only", "Soft assignments ignored", "Models probability distribution of acoustic features", "Only spherical clusters allowed" ], "correctAnswerIndex": 2, "explanation": "GMM captures feature distributions needed for realistic speech synthesis." }, { "id": 93, "questionText": "Scenario: EM fails to converge. Possible reasons?", "options": [ "Too few iterations", "Poor initialization, singular covariance, or incompatible data", "Full covariance always fails", "Soft assignments ignored" ], "correctAnswerIndex": 1, "explanation": "Bad initialization or degenerate covariances can prevent EM convergence." }, { "id": 94, "questionText": "Scenario: You need probabilistic clustering on 2D sensor data. Choice?", "options": [ "Hierarchical clustering", "DBSCAN only", "GMM with appropriate covariance type", "K-Means only" ], "correctAnswerIndex": 2, "explanation": "GMM provides soft probabilistic clustering for continuous data." }, { "id": 95, "questionText": "Scenario: GMM used for anomaly detection in machinery. How?", "options": [ "Use hard assignments only", "Cluster readings randomly", "Flag low-likelihood sensor readings as anomalies", "Ignore rare readings" ], "correctAnswerIndex": 2, "explanation": "Points that do not fit any Gaussian component well can indicate anomalies." }, { "id": 96, "questionText": "Scenario: High-dimensional embeddings, GMM unstable. Solution?", "options": [ "Increase components", "Ignore scaling", "Reduce dimensions with PCA or use diagonal covariance", "Use full covariance only" ], "correctAnswerIndex": 2, "explanation": "Reducing parameters prevents overfitting and stabilizes EM." }, { "id": 97, "questionText": "Scenario: GMM for multimodal customer behavior. Advantage?", "options": [ "K-Means better", "Multiple components capture different behavioral modes", "Single Gaussian suffices", "Covariance must be spherical" ], "correctAnswerIndex": 1, "explanation": "Different modes of behavior can be modeled with multiple Gaussians." }, { "id": 98, "questionText": "Scenario: EM converges but log-likelihood decreases occasionally. Cause?", "options": [ "Numerical instability; can use regularization", "Covariance ignored", "EM failed", "Increase components" ], "correctAnswerIndex": 0, "explanation": "Small decreases may occur due to rounding errors; regularization helps." }, { "id": 99, "questionText": "Scenario: Soft assignment threshold applied. Benefit?", "options": [ "Filter uncertain points or highlight ambiguous memberships", "EM fails automatically", "Covariance ignored", "Always misclassifies clusters" ], "correctAnswerIndex": 0, "explanation": "Thresholding helps decide which points are confidently assigned." }, { "id": 100, "questionText": "Scenario: GMM applied in finance for risk clustering. Advantage?", "options": [ "Requires categorical data only", "Models probability distribution of different risk profiles", "Soft assignment ignored", "Clusters randomly" ], "correctAnswerIndex": 1, "explanation": "GMM provides probabilistic segmentation of customers or assets by risk levels." } ] }