| { | |
| "title": "Gaussian Mixture Models (GMM) Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 MCQs covering Gaussian Mixture Models (GMM) from fundamental intuition to EM algorithm, applications, soft clustering, covariance types, and real-world scenarios.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the primary goal of a Gaussian Mixture Model (GMM)?", | |
| "options": [ | |
| "To reduce data dimensionality", | |
| "To perform supervised classification", | |
| "To model data as a mixture of multiple Gaussian distributions", | |
| "To perform hard clustering like K-Means" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM assumes data is generated from a mixture of multiple Gaussian distributions." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "GMM is mainly used for:", | |
| "options": [ | |
| "Unsupervised clustering", | |
| "Time series forecasting", | |
| "Supervised learning", | |
| "Regression problems" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "GMM is an unsupervised clustering technique." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "GMM provides which type of clustering?", | |
| "options": [ | |
| "Soft probabilistic clustering", | |
| "Binary classification", | |
| "Hard clustering", | |
| "Feature selection" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "GMM assigns probability of belonging to each cluster." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Which algorithm is commonly used to train GMM?", | |
| "options": [ | |
| "Backpropagation", | |
| "Expectation-Maximization (EM)", | |
| "Gradient Descent", | |
| "Genetic Algorithm" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "EM algorithm is used to estimate GMM parameters." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "In GMM, each Gaussian distribution is called a:", | |
| "options": [ | |
| "Component", | |
| "Kernel", | |
| "Label", | |
| "Loss function" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Each Gaussian is a component of the mixture model." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "The output of GMM for each data point is:", | |
| "options": [ | |
| "Single cluster label only", | |
| "Probability distribution over all clusters", | |
| "Binary classification output", | |
| "Feature importance scores" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM gives probability of belonging to each Gaussian cluster." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "GMM assumes that data in each cluster follows a:", | |
| "options": [ | |
| "Uniform distribution", | |
| "Poisson distribution", | |
| "Exponential distribution", | |
| "Gaussian distribution" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Each cluster is modeled as a Normal (Gaussian) distribution." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "The number of Gaussian components in GMM must be:", | |
| "options": [ | |
| "Always equal to number of features", | |
| "Unlimited by default", | |
| "Automatically detected always", | |
| "Predefined manually in most implementations" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "We usually define number of components (k) before training." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "GMM is a generalization of which algorithm?", | |
| "options": [ | |
| "Random Forest", | |
| "K-Means", | |
| "Naive Bayes", | |
| "SVM" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM is probabilistic extension of K-Means with soft assignments." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Which of the following does GMM estimate?", | |
| "options": [ | |
| "Mean only", | |
| "Variance only", | |
| "Only class probabilities", | |
| "Mean and covariance of each Gaussian" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM learns mean, covariance, and weight of each Gaussian." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "What does 'mixture' mean in GMM?", | |
| "options": [ | |
| "Multiple datasets combined", | |
| "Adding noise to data", | |
| "Combination of several Gaussian probability distributions", | |
| "Blending of supervised and unsupervised learning" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM models data as sum of multiple Gaussian distributions." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "GMM can model clusters with:", | |
| "options": [ | |
| "Only spherical shapes", | |
| "Elliptical and varying density clusters", | |
| "Only equal-sized circles", | |
| "Linear boundaries only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM supports ellipsoidal clusters due to covariance matrix." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Which type of covariance can GMM use?", | |
| "options": [ | |
| "Only diagonal", | |
| "Only identity matrix", | |
| "Only full", | |
| "Full, Diagonal, Tied, Spherical" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM allows flexible covariance structure options." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "GMM is best suited when clusters are:", | |
| "options": [ | |
| "Categorical only", | |
| "Perfectly separated", | |
| "Non-overlapping and spherical", | |
| "Overlapping and elliptical" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM is ideal for overlapping soft clusters." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Which step assigns probability to each point belonging to a cluster in EM?", | |
| "options": [ | |
| "Initialization step", | |
| "Maximization step", | |
| "Regularization step", | |
| "Expectation step" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "E-step calculates responsibility of each Gaussian." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Which step updates parameters of Gaussians in EM?", | |
| "options": [ | |
| "Prediction step", | |
| "Maximization step", | |
| "Normalization step", | |
| "Expectation step" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "M-step updates means, covariances, and weights." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "In GMM, mixing coefficients must:", | |
| "options": [ | |
| "Sum to one", | |
| "Be greater than one", | |
| "Sum to zero", | |
| "Be negative" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Mixing weights represent probabilities → must sum to 1." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "What does a high responsibility value mean in GMM?", | |
| "options": [ | |
| "Point is outlier", | |
| "Point strongly belongs to that Gaussian", | |
| "Cluster is ignored", | |
| "Model has failed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "High responsibility = high probability of belonging to that cluster." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "GMM belongs to which model category?", | |
| "options": [ | |
| "Neural network", | |
| "Discriminative model", | |
| "Purely geometric model", | |
| "Generative probabilistic model" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM models probability distribution of data (generative)." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "GMM is useful in:", | |
| "options": [ | |
| "Anomaly detection", | |
| "Speaker recognition", | |
| "Image segmentation", | |
| "All of the above" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM widely used in real-world probabilistic applications." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "What does GMM do better than K-Means?", | |
| "options": [ | |
| "Handle only linear separability", | |
| "Ignore feature scale", | |
| "Model overlapping probabilistic clusters", | |
| "Assign hard labels only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM outperforms K-Means when clusters overlap." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "A drawback of GMM is:", | |
| "options": [ | |
| "Requires predefined number of clusters", | |
| "No probabilistic output", | |
| "Only works with categorical data", | |
| "Cannot handle continuous data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Number of components must be specified before fitting." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "What initialization is commonly used for GMM?", | |
| "options": [ | |
| "K-Means centroids", | |
| "Random labels", | |
| "Bootstrap resampling", | |
| "Gradient descent" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "K-Means is commonly used to initialize cluster means." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "GMM uses which principle to maximize likelihood?", | |
| "options": [ | |
| "Gradient Descent", | |
| "Expectation-Maximization", | |
| "Dropout Regularization", | |
| "Least Squares Minimization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "EM is a likelihood-based optimization method." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "The final decision in GMM assigns a point to the cluster with:", | |
| "options": [ | |
| "Highest variance", | |
| "Minimum distance", | |
| "Maximum probability (responsibility)", | |
| "Random chance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Soft → hard label by selecting cluster with highest probability." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "What role does covariance play in GMM?", | |
| "options": [ | |
| "Defines cluster shape and orientation", | |
| "Sets learning rate", | |
| "Controls number of clusters", | |
| "Removes noise features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Covariance allows modeling elliptical and rotated clusters." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "What does GMM maximize during training?", | |
| "options": [ | |
| "Sum of distances", | |
| "Training accuracy", | |
| "Entropy of clusters", | |
| "Total log-likelihood of data" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "EM optimizes log-likelihood of observing the data." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "What happens if two Gaussian components overlap heavily?", | |
| "options": [ | |
| "Clusters merge into one automatically", | |
| "GMM switches to K-Means automatically", | |
| "GMM handles it with soft probabilities", | |
| "GMM fails immediately" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM assigns probabilities to each cluster; overlap is handled naturally via soft assignments." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Which metric can be used to choose the number of components in GMM?", | |
| "options": [ | |
| "Accuracy", | |
| "F1-score", | |
| "Learning rate", | |
| "AIC (Akaike Information Criterion)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "AIC and BIC help balance model fit with complexity to select number of components." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "BIC in GMM is used to:", | |
| "options": [ | |
| "Normalize probabilities", | |
| "Estimate cluster assignments", | |
| "Update mean and covariance", | |
| "Select number of clusters considering model complexity" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Bayesian Information Criterion penalizes overly complex models to avoid overfitting." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: You have overlapping clusters in 2D data. Which approach is suitable?", | |
| "options": [ | |
| "K-Means", | |
| "DBSCAN with minPts=1", | |
| "Hierarchical clustering", | |
| "Gaussian Mixture Model" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM handles overlapping clusters probabilistically, unlike K-Means which assigns hard labels." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Which initialization can improve EM convergence?", | |
| "options": [ | |
| "Using covariance as identity for all clusters", | |
| "Setting all means to zero", | |
| "Using K-Means centroids as initial means", | |
| "Randomly choosing one data point" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "K-Means initialization provides better starting points for EM algorithm." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Soft clustering means:", | |
| "options": [ | |
| "Each point has a probability of belonging to multiple clusters", | |
| "Clusters are linearly separable", | |
| "Clusters have equal sizes", | |
| "Each point is assigned only one cluster" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Soft clustering assigns probabilities rather than hard labels." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "What does EM algorithm alternate between?", | |
| "options": [ | |
| "Expectation (E-step) and Maximization (M-step)", | |
| "Gradient descent and regularization", | |
| "Probability normalization and prediction", | |
| "Clustering and dimensionality reduction" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "EM alternates between computing responsibilities and updating parameters." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: A GMM component has nearly zero weight after EM. Implication?", | |
| "options": [ | |
| "Component is insignificant; may be removed", | |
| "Covariance matrix is singular", | |
| "Model is invalid", | |
| "Training failed" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Very low weight indicates component contributes little to data representation." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: Covariance type set to 'spherical'. Effect?", | |
| "options": [ | |
| "All clusters are circular with equal variance in all directions", | |
| "EM cannot converge", | |
| "Clusters can have arbitrary orientation", | |
| "Covariance ignored" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Spherical covariance assumes isotropic variance for each cluster." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: Covariance type 'full' in GMM allows:", | |
| "options": [ | |
| "One-dimensional data only", | |
| "Elliptical clusters with arbitrary orientation", | |
| "Only circular clusters", | |
| "Clusters of equal size" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Full covariance allows each cluster to have a unique covariance matrix." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: High-dimensional data with GMM. Challenge?", | |
| "options": [ | |
| "Covariance estimation becomes difficult", | |
| "EM converges faster", | |
| "Number of clusters reduces automatically", | |
| "Probabilities become binary" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Estimating full covariance in high dimensions is prone to overfitting." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: Using diagonal covariance instead of full. Advantage?", | |
| "options": [ | |
| "Reduces number of parameters, faster EM", | |
| "EM fails automatically", | |
| "Improves cluster overlap", | |
| "Always increases accuracy" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Diagonal covariance assumes feature independence, reducing parameters." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: Two clusters have very close means. EM may:", | |
| "options": [ | |
| "Fail to run", | |
| "Merge clusters automatically", | |
| "Assign probabilities reflecting overlap", | |
| "Ignore one cluster" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Soft assignments reflect uncertainty in overlapping regions." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: EM initialized with random means may:", | |
| "options": [ | |
| "Converge to local maxima", | |
| "Merge clusters automatically", | |
| "Fail to compute responsibilities", | |
| "Always find global maximum" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "EM is sensitive to initialization; may converge to local optima." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: GMM applied to anomaly detection. How?", | |
| "options": [ | |
| "EM ignores outliers automatically", | |
| "Points with low likelihood under model considered anomalies", | |
| "Points assigned to smallest cluster are anomalies", | |
| "Clusters removed, remaining points are anomalies" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM can detect outliers by evaluating likelihood of each point." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Overfitting in GMM can occur when:", | |
| "options": [ | |
| "Dataset is small but clusters well-separated", | |
| "Covariance type is spherical", | |
| "Initialization uses K-Means", | |
| "Too many components relative to data size" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Excessive components may fit noise rather than true structure." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: Selecting number of components with BIC. Lower BIC means:", | |
| "options": [ | |
| "EM failed", | |
| "Overfitting", | |
| "Better balance between fit and complexity", | |
| "Worse model" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Lower BIC indicates model explains data well without unnecessary complexity." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: You normalize features before GMM. Benefit?", | |
| "options": [ | |
| "EM converges slower", | |
| "Number of components reduces", | |
| "Prevents dominance by large-scale features", | |
| "Covariance becomes singular" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Feature scaling ensures all features contribute equally to Gaussian components." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: You use too few components in GMM. Likely effect?", | |
| "options": [ | |
| "Covariance becomes negative", | |
| "Overfitting", | |
| "EM fails to converge", | |
| "Underfitting, poor representation of clusters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Too few Gaussians cannot capture underlying data structure." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: Two clusters have different variances. Which GMM setting captures this?", | |
| "options": [ | |
| "Tied covariance", | |
| "Diagonal covariance only", | |
| "Full covariance", | |
| "Spherical covariance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Full covariance allows different shapes and orientations per cluster." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: Real-world use of GMM in speaker recognition relies on:", | |
| "options": [ | |
| "Only frequency features", | |
| "Decision trees", | |
| "Modeling probability distribution of feature vectors", | |
| "Hard cluster labels" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM models the distribution of features for each speaker." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Image segmentation with GMM. How?", | |
| "options": [ | |
| "K-Means replaces EM", | |
| "Pixels assigned randomly", | |
| "Only grayscale images", | |
| "Pixels assigned probabilistically to color clusters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM can segment images based on color probability distributions." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: Overlapping Gaussian clusters, hard assignment used. Effect?", | |
| "options": [ | |
| "Covariance becomes zero", | |
| "EM improves accuracy", | |
| "Soft assignment automatically applied", | |
| "Information loss, may misclassify points" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Hard labels ignore uncertainty and may misrepresent overlapping regions." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: You have categorical features. GMM suitability?", | |
| "options": [ | |
| "Perfectly suitable", | |
| "Requires only diagonal covariance", | |
| "Not ideal; GMM assumes continuous features", | |
| "Number of components is irrelevant" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM assumes continuous-valued features for Gaussian distributions." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: EM converges slowly. Common solutions?", | |
| "options": [ | |
| "Ignore convergence criteria", | |
| "Better initialization, feature scaling, or fewer components", | |
| "Increase number of iterations indefinitely", | |
| "Switch to K-Means always" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Good initialization and preprocessing improve EM efficiency." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: GMM for anomaly detection in network traffic. Strategy?", | |
| "options": [ | |
| "Ignore rare events", | |
| "Hard assign all points to clusters", | |
| "Flag low likelihood points as anomalies", | |
| "Use K-Means to cluster anomalies" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Points with low probability under the model are considered outliers." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: EM oscillates between two solutions. Likely reason?", | |
| "options": [ | |
| "Number of components too small", | |
| "Poor initialization causing local maxima", | |
| "Covariance matrix full", | |
| "Using diagonal covariance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "EM can get stuck in local maxima if initial parameters are suboptimal." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Soft clustering probability threshold used to assign points. Advantage?", | |
| "options": [ | |
| "EM fails automatically", | |
| "Always misclassifies clusters", | |
| "Allows filtering uncertain points", | |
| "Covariance ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Thresholding allows flexible assignment based on confidence." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: Tied covariance for all components. Effect?", | |
| "options": [ | |
| "All clusters share same shape/orientation", | |
| "Covariance ignored", | |
| "Number of components reduced automatically", | |
| "EM cannot run" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Tied covariance forces all Gaussians to share same covariance matrix." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: GMM with diagonal covariance and correlated features. Effect?", | |
| "options": [ | |
| "EM automatically switches to full", | |
| "Perfect modeling", | |
| "Covariance fails to compute", | |
| "Model may be suboptimal due to ignored correlations" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Diagonal covariance ignores feature correlations, which may reduce accuracy." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: High-dimensional data, small sample size. Solution for GMM?", | |
| "options": [ | |
| "Always full covariance", | |
| "Increase number of components", | |
| "Ignore dimension scaling", | |
| "Use diagonal covariance or reduce dimensions with PCA" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Reducing parameters via diagonal covariance or PCA helps prevent overfitting." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: GMM applied on time-series data. Typical strategy?", | |
| "options": [ | |
| "Switch to K-Means only", | |
| "Model features extracted per time window", | |
| "Use raw timestamps directly", | |
| "Ignore temporal ordering" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Features are extracted per window to apply GMM effectively." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: You wish to compare two GMMs with different components. Metric?", | |
| "options": [ | |
| "Silhouette score", | |
| "Mean squared error", | |
| "AIC/BIC", | |
| "Accuracy" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "AIC/BIC compare likelihoods while penalizing complexity." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: EM stops improving log-likelihood. Action?", | |
| "options": [ | |
| "Reinitialize covariance", | |
| "Converged; training can stop", | |
| "Increase components automatically", | |
| "Reduce number of iterations" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Plateau in log-likelihood indicates convergence." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: GMM applied on overlapping clusters. Which is true?", | |
| "options": [ | |
| "Clusters must be separated manually", | |
| "GMM fails completely", | |
| "K-Means always better", | |
| "Soft assignments handle ambiguity better than hard assignments" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Soft probabilistic assignments capture uncertainty in overlapping regions." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: EM converges to degenerate covariance. Likely cause?", | |
| "options": [ | |
| "Full covariance required", | |
| "Component collapsed to single data point", | |
| "Too few iterations", | |
| "Random initialization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A component may shrink variance toward zero, creating numerical issues." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: Choosing between GMM and K-Means. Advantage of GMM?", | |
| "options": [ | |
| "Always faster", | |
| "Works only on spherical clusters", | |
| "No parameters needed", | |
| "Handles overlapping, probabilistic clusters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM can model overlap using probabilities, unlike K-Means." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: GMM with full covariance. Drawback?", | |
| "options": [ | |
| "EM does not converge", | |
| "Cannot model elliptical clusters", | |
| "Higher number of parameters; risk of overfitting", | |
| "Soft assignment ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Full covariance requires estimating many parameters, sensitive to small datasets." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: GMM for anomaly detection. Threshold selection?", | |
| "options": [ | |
| "Use hard assignments only", | |
| "Ignore low-probability points", | |
| "Based on likelihood distribution of normal data", | |
| "Random selection" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Threshold is chosen based on typical likelihood values of normal data." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: EM alternates but log-likelihood decreases. Cause?", | |
| "options": [ | |
| "Soft assignments ignored", | |
| "Convergence achieved", | |
| "Number of components too low", | |
| "Numerical instability or rounding errors" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Numerical issues may cause slight decreases; regularization may help." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: GMM on skewed data. Observation?", | |
| "options": [ | |
| "Clusters automatically corrected", | |
| "EM converges faster", | |
| "Gaussian assumption may be violated", | |
| "Covariance ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM assumes Gaussian distribution; skewed data may reduce accuracy." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Using GMM to compress data. How?", | |
| "options": [ | |
| "Remove clusters randomly", | |
| "Use only spherical covariance", | |
| "Switch to K-Means", | |
| "Represent each point by cluster responsibilities instead of raw features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Probabilities can serve as a compact representation of original features." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: GMM applied to multimodal distribution. Advantage?", | |
| "options": [ | |
| "Models multiple peaks naturally using several Gaussians", | |
| "Covariance must be diagonal", | |
| "Cannot handle multimodal data", | |
| "Requires K-Means preprocessing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Multiple Gaussian components allow GMM to capture multimodal patterns." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: You want to model customer segments with GMM. Best approach?", | |
| "options": [ | |
| "Use K-Means only", | |
| "Use soft clustering to capture overlapping preferences", | |
| "Assign each customer randomly", | |
| "Ignore continuous features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Soft clustering captures overlapping behavior patterns between segments." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Data has outliers. How does GMM handle them?", | |
| "options": [ | |
| "EM fails automatically", | |
| "Outliers dominate clusters", | |
| "Outliers get low probabilities; may need special handling", | |
| "Clusters merge to include outliers" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Outliers have low likelihood under Gaussian components, reducing their impact." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Using EM, you notice very slow convergence. Possible fix?", | |
| "options": [ | |
| "Switch to hierarchical clustering", | |
| "Randomly assign clusters", | |
| "Improve initialization, scale features, or reduce number of components", | |
| "Increase iterations without changes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Good initialization and preprocessing improve EM efficiency." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: You apply GMM to cluster text embeddings. Challenge?", | |
| "options": [ | |
| "High-dimensionality may make full covariance unstable", | |
| "GMM works perfectly without change", | |
| "Soft assignments are ignored", | |
| "Clusters must be one-dimensional" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-dimensional embeddings require dimensionality reduction or diagonal covariance." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: You use GMM with tied covariance. Effect?", | |
| "options": [ | |
| "EM fails automatically", | |
| "Clusters become one", | |
| "Each cluster has unique covariance", | |
| "All clusters share same covariance matrix" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Tied covariance enforces shared shape/orientation for all Gaussians." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: GMM model seems overfitted. Possible reasons?", | |
| "options": [ | |
| "Too many components or full covariance on small data", | |
| "Diagonal covariance used", | |
| "EM converged perfectly", | |
| "Spherical covariance used" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Overfitting occurs when model complexity exceeds data size." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: You want interpretable clusters with GMM. Strategy?", | |
| "options": [ | |
| "Soft assignments ignored", | |
| "Use fewer components and diagonal covariance", | |
| "Random initialization", | |
| "Use full covariance and many components" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Fewer components and simpler covariance improve interpretability." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: You combine GMM with PCA. Purpose?", | |
| "options": [ | |
| "Ignore low-variance features", | |
| "Increase number of clusters", | |
| "EM converges automatically", | |
| "Reduce dimensionality to stabilize covariance estimation" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "PCA reduces features, improving parameter estimation in high-dimensional GMM." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: GMM applied on customer churn probability. Approach?", | |
| "options": [ | |
| "Use only binary labels", | |
| "Switch to linear regression", | |
| "Hard cluster and ignore overlap", | |
| "Model feature distribution, assign probabilities to segments" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM probabilistically models customer groups for better segmentation." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: You notice EM stuck in local optimum. Solution?", | |
| "options": [ | |
| "Use fewer components only", | |
| "Increase iterations infinitely", | |
| "Try multiple random initializations", | |
| "Ignore convergence" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Multiple initializations reduce chance of getting trapped in local maxima." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: Using GMM for speaker verification. Why suitable?", | |
| "options": [ | |
| "K-Means performs better", | |
| "Captures probabilistic feature distributions per speaker", | |
| "Covariance must be spherical", | |
| "Hard assignments suffice" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM models variability in speaker features effectively." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: EM applied to data with small clusters. Challenge?", | |
| "options": [ | |
| "Soft assignments fail", | |
| "Small clusters may be ignored or collapse", | |
| "Covariance ignored", | |
| "EM always finds them" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "EM may assign negligible weight to very small clusters." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: GMM used in anomaly detection for fraud. Key idea?", | |
| "options": [ | |
| "Transactions are clustered randomly", | |
| "High-probability transactions flagged", | |
| "Transactions with low probability under model are flagged", | |
| "EM ignores rare patterns" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Low-likelihood points are potential anomalies." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: GMM applied to multimodal sensor readings. Advantage?", | |
| "options": [ | |
| "EM fails automatically", | |
| "Single Gaussian suffices", | |
| "Spherical covariance required", | |
| "Multiple peaks captured by several Gaussian components" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Multiple Gaussians allow modeling multimodal distributions." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: You notice EM log-likelihood plateauing early. Interpretation?", | |
| "options": [ | |
| "EM converged; model parameters stabilized", | |
| "Covariance ignored", | |
| "EM failed", | |
| "Increase components immediately" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Plateau indicates convergence of EM algorithm." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: GMM with full covariance on small dataset. Risk?", | |
| "options": [ | |
| "Overfitting due to too many parameters", | |
| "EM fails automatically", | |
| "Better modeling", | |
| "Clusters ignored" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Full covariance requires estimating many parameters, risky for small data." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: GMM used for image segmentation. Key step?", | |
| "options": [ | |
| "Only grayscale images allowed", | |
| "K-Means replacement", | |
| "Assign pixels probabilistically to color clusters", | |
| "Ignore soft assignments" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Soft clustering assigns pixels to Gaussian components representing colors." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: GMM applied to text clustering with embeddings. Key step?", | |
| "options": [ | |
| "Increase components arbitrarily", | |
| "Soft assignments ignored", | |
| "Use dimensionality reduction to stabilize covariance estimation", | |
| "EM fails automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing dimensionality prevents overfitting in high-dimensional embeddings." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: GMM with overlapping clusters. Hard labels used. Effect?", | |
| "options": [ | |
| "Improves EM convergence", | |
| "Loss of probabilistic information; misclassification possible", | |
| "EM fails", | |
| "Covariance ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Hard labels ignore uncertainty in overlapping regions." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: EM for GMM shows component collapsing. Solution?", | |
| "options": [ | |
| "Use diagonal covariance always", | |
| "Reduce number of components only", | |
| "Regularize covariance to prevent singularities", | |
| "Ignore component" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Regularization prevents variances from collapsing to zero." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: Choosing GMM vs K-Means. Advantage?", | |
| "options": [ | |
| "K-Means always faster", | |
| "EM not required", | |
| "GMM ignores probabilities", | |
| "Soft assignment, handles overlap and ellipsoidal clusters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "GMM models overlapping clusters with probabilistic assignments." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: GMM applied for speech synthesis. Benefit?", | |
| "options": [ | |
| "Clusters speakers only", | |
| "Soft assignments ignored", | |
| "Models probability distribution of acoustic features", | |
| "Only spherical clusters allowed" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM captures feature distributions needed for realistic speech synthesis." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: EM fails to converge. Possible reasons?", | |
| "options": [ | |
| "Too few iterations", | |
| "Poor initialization, singular covariance, or incompatible data", | |
| "Full covariance always fails", | |
| "Soft assignments ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Bad initialization or degenerate covariances can prevent EM convergence." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: You need probabilistic clustering on 2D sensor data. Choice?", | |
| "options": [ | |
| "Hierarchical clustering", | |
| "DBSCAN only", | |
| "GMM with appropriate covariance type", | |
| "K-Means only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "GMM provides soft probabilistic clustering for continuous data." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: GMM used for anomaly detection in machinery. How?", | |
| "options": [ | |
| "Use hard assignments only", | |
| "Cluster readings randomly", | |
| "Flag low-likelihood sensor readings as anomalies", | |
| "Ignore rare readings" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Points that do not fit any Gaussian component well can indicate anomalies." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: High-dimensional embeddings, GMM unstable. Solution?", | |
| "options": [ | |
| "Increase components", | |
| "Ignore scaling", | |
| "Reduce dimensions with PCA or use diagonal covariance", | |
| "Use full covariance only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing parameters prevents overfitting and stabilizes EM." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: GMM for multimodal customer behavior. Advantage?", | |
| "options": [ | |
| "K-Means better", | |
| "Multiple components capture different behavioral modes", | |
| "Single Gaussian suffices", | |
| "Covariance must be spherical" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Different modes of behavior can be modeled with multiple Gaussians." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: EM converges but log-likelihood decreases occasionally. Cause?", | |
| "options": [ | |
| "Numerical instability; can use regularization", | |
| "Covariance ignored", | |
| "EM failed", | |
| "Increase components" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small decreases may occur due to rounding errors; regularization helps." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: Soft assignment threshold applied. Benefit?", | |
| "options": [ | |
| "Filter uncertain points or highlight ambiguous memberships", | |
| "EM fails automatically", | |
| "Covariance ignored", | |
| "Always misclassifies clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Thresholding helps decide which points are confidently assigned." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: GMM applied in finance for risk clustering. Advantage?", | |
| "options": [ | |
| "Requires categorical data only", | |
| "Models probability distribution of different risk profiles", | |
| "Soft assignment ignored", | |
| "Clusters randomly" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "GMM provides probabilistic segmentation of customers or assets by risk levels." | |
| } | |
| ] | |
| } | |