Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

App Files Files Community

MachineLearningAlgorithms / data /GMM.json

deedrop1140

Upload 41 files

0d00d62 verified 3 months ago

raw

history blame contribute delete

42.9 kB

	{
	"title": "Gaussian Mixture Models (GMM) Mastery: 100 MCQs",
	"description": "A comprehensive set of 100 MCQs covering Gaussian Mixture Models (GMM) from fundamental intuition to EM algorithm, applications, soft clustering, covariance types, and real-world scenarios.",
	"questions": [
	{
	"id": 1,
	"questionText": "What is the primary goal of a Gaussian Mixture Model (GMM)?",
	"options": [
	"To reduce data dimensionality",
	"To perform supervised classification",
	"To model data as a mixture of multiple Gaussian distributions",
	"To perform hard clustering like K-Means"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM assumes data is generated from a mixture of multiple Gaussian distributions."
	},
	{
	"id": 2,
	"questionText": "GMM is mainly used for:",
	"options": [
	"Unsupervised clustering",
	"Time series forecasting",
	"Supervised learning",
	"Regression problems"
	],
	"correctAnswerIndex": 0,
	"explanation": "GMM is an unsupervised clustering technique."
	},
	{
	"id": 3,
	"questionText": "GMM provides which type of clustering?",
	"options": [
	"Soft probabilistic clustering",
	"Binary classification",
	"Hard clustering",
	"Feature selection"
	],
	"correctAnswerIndex": 0,
	"explanation": "GMM assigns probability of belonging to each cluster."
	},
	{
	"id": 4,
	"questionText": "Which algorithm is commonly used to train GMM?",
	"options": [
	"Backpropagation",
	"Expectation-Maximization (EM)",
	"Gradient Descent",
	"Genetic Algorithm"
	],
	"correctAnswerIndex": 1,
	"explanation": "EM algorithm is used to estimate GMM parameters."
	},
	{
	"id": 5,
	"questionText": "In GMM, each Gaussian distribution is called a:",
	"options": [
	"Component",
	"Kernel",
	"Label",
	"Loss function"
	],
	"correctAnswerIndex": 0,
	"explanation": "Each Gaussian is a component of the mixture model."
	},
	{
	"id": 6,
	"questionText": "The output of GMM for each data point is:",
	"options": [
	"Single cluster label only",
	"Probability distribution over all clusters",
	"Binary classification output",
	"Feature importance scores"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM gives probability of belonging to each Gaussian cluster."
	},
	{
	"id": 7,
	"questionText": "GMM assumes that data in each cluster follows a:",
	"options": [
	"Uniform distribution",
	"Poisson distribution",
	"Exponential distribution",
	"Gaussian distribution"
	],
	"correctAnswerIndex": 3,
	"explanation": "Each cluster is modeled as a Normal (Gaussian) distribution."
	},
	{
	"id": 8,
	"questionText": "The number of Gaussian components in GMM must be:",
	"options": [
	"Always equal to number of features",
	"Unlimited by default",
	"Automatically detected always",
	"Predefined manually in most implementations"
	],
	"correctAnswerIndex": 3,
	"explanation": "We usually define number of components (k) before training."
	},
	{
	"id": 9,
	"questionText": "GMM is a generalization of which algorithm?",
	"options": [
	"Random Forest",
	"K-Means",
	"Naive Bayes",
	"SVM"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM is probabilistic extension of K-Means with soft assignments."
	},
	{
	"id": 10,
	"questionText": "Which of the following does GMM estimate?",
	"options": [
	"Mean only",
	"Variance only",
	"Only class probabilities",
	"Mean and covariance of each Gaussian"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM learns mean, covariance, and weight of each Gaussian."
	},
	{
	"id": 11,
	"questionText": "What does 'mixture' mean in GMM?",
	"options": [
	"Multiple datasets combined",
	"Adding noise to data",
	"Combination of several Gaussian probability distributions",
	"Blending of supervised and unsupervised learning"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM models data as sum of multiple Gaussian distributions."
	},
	{
	"id": 12,
	"questionText": "GMM can model clusters with:",
	"options": [
	"Only spherical shapes",
	"Elliptical and varying density clusters",
	"Only equal-sized circles",
	"Linear boundaries only"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM supports ellipsoidal clusters due to covariance matrix."
	},
	{
	"id": 13,
	"questionText": "Which type of covariance can GMM use?",
	"options": [
	"Only diagonal",
	"Only identity matrix",
	"Only full",
	"Full, Diagonal, Tied, Spherical"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM allows flexible covariance structure options."
	},
	{
	"id": 14,
	"questionText": "GMM is best suited when clusters are:",
	"options": [
	"Categorical only",
	"Perfectly separated",
	"Non-overlapping and spherical",
	"Overlapping and elliptical"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM is ideal for overlapping soft clusters."
	},
	{
	"id": 15,
	"questionText": "Which step assigns probability to each point belonging to a cluster in EM?",
	"options": [
	"Initialization step",
	"Maximization step",
	"Regularization step",
	"Expectation step"
	],
	"correctAnswerIndex": 3,
	"explanation": "E-step calculates responsibility of each Gaussian."
	},
	{
	"id": 16,
	"questionText": "Which step updates parameters of Gaussians in EM?",
	"options": [
	"Prediction step",
	"Maximization step",
	"Normalization step",
	"Expectation step"
	],
	"correctAnswerIndex": 1,
	"explanation": "M-step updates means, covariances, and weights."
	},
	{
	"id": 17,
	"questionText": "In GMM, mixing coefficients must:",
	"options": [
	"Sum to one",
	"Be greater than one",
	"Sum to zero",
	"Be negative"
	],
	"correctAnswerIndex": 0,
	"explanation": "Mixing weights represent probabilities → must sum to 1."
	},
	{
	"id": 18,
	"questionText": "What does a high responsibility value mean in GMM?",
	"options": [
	"Point is outlier",
	"Point strongly belongs to that Gaussian",
	"Cluster is ignored",
	"Model has failed"
	],
	"correctAnswerIndex": 1,
	"explanation": "High responsibility = high probability of belonging to that cluster."
	},
	{
	"id": 19,
	"questionText": "GMM belongs to which model category?",
	"options": [
	"Neural network",
	"Discriminative model",
	"Purely geometric model",
	"Generative probabilistic model"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM models probability distribution of data (generative)."
	},
	{
	"id": 20,
	"questionText": "GMM is useful in:",
	"options": [
	"Anomaly detection",
	"Speaker recognition",
	"Image segmentation",
	"All of the above"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM widely used in real-world probabilistic applications."
	},
	{
	"id": 21,
	"questionText": "What does GMM do better than K-Means?",
	"options": [
	"Handle only linear separability",
	"Ignore feature scale",
	"Model overlapping probabilistic clusters",
	"Assign hard labels only"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM outperforms K-Means when clusters overlap."
	},
	{
	"id": 22,
	"questionText": "A drawback of GMM is:",
	"options": [
	"Requires predefined number of clusters",
	"No probabilistic output",
	"Only works with categorical data",
	"Cannot handle continuous data"
	],
	"correctAnswerIndex": 0,
	"explanation": "Number of components must be specified before fitting."
	},
	{
	"id": 23,
	"questionText": "What initialization is commonly used for GMM?",
	"options": [
	"K-Means centroids",
	"Random labels",
	"Bootstrap resampling",
	"Gradient descent"
	],
	"correctAnswerIndex": 0,
	"explanation": "K-Means is commonly used to initialize cluster means."
	},
	{
	"id": 24,
	"questionText": "GMM uses which principle to maximize likelihood?",
	"options": [
	"Gradient Descent",
	"Expectation-Maximization",
	"Dropout Regularization",
	"Least Squares Minimization"
	],
	"correctAnswerIndex": 1,
	"explanation": "EM is a likelihood-based optimization method."
	},
	{
	"id": 25,
	"questionText": "The final decision in GMM assigns a point to the cluster with:",
	"options": [
	"Highest variance",
	"Minimum distance",
	"Maximum probability (responsibility)",
	"Random chance"
	],
	"correctAnswerIndex": 2,
	"explanation": "Soft → hard label by selecting cluster with highest probability."
	},
	{
	"id": 26,
	"questionText": "What role does covariance play in GMM?",
	"options": [
	"Defines cluster shape and orientation",
	"Sets learning rate",
	"Controls number of clusters",
	"Removes noise features"
	],
	"correctAnswerIndex": 0,
	"explanation": "Covariance allows modeling elliptical and rotated clusters."
	},
	{
	"id": 27,
	"questionText": "What does GMM maximize during training?",
	"options": [
	"Sum of distances",
	"Training accuracy",
	"Entropy of clusters",
	"Total log-likelihood of data"
	],
	"correctAnswerIndex": 3,
	"explanation": "EM optimizes log-likelihood of observing the data."
	},
	{
	"id": 28,
	"questionText": "What happens if two Gaussian components overlap heavily?",
	"options": [
	"Clusters merge into one automatically",
	"GMM switches to K-Means automatically",
	"GMM handles it with soft probabilities",
	"GMM fails immediately"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM assigns probabilities to each cluster; overlap is handled naturally via soft assignments."
	},
	{
	"id": 29,
	"questionText": "Which metric can be used to choose the number of components in GMM?",
	"options": [
	"Accuracy",
	"F1-score",
	"Learning rate",
	"AIC (Akaike Information Criterion)"
	],
	"correctAnswerIndex": 3,
	"explanation": "AIC and BIC help balance model fit with complexity to select number of components."
	},
	{
	"id": 30,
	"questionText": "BIC in GMM is used to:",
	"options": [
	"Normalize probabilities",
	"Estimate cluster assignments",
	"Update mean and covariance",
	"Select number of clusters considering model complexity"
	],
	"correctAnswerIndex": 3,
	"explanation": "Bayesian Information Criterion penalizes overly complex models to avoid overfitting."
	},
	{
	"id": 31,
	"questionText": "Scenario: You have overlapping clusters in 2D data. Which approach is suitable?",
	"options": [
	"K-Means",
	"DBSCAN with minPts=1",
	"Hierarchical clustering",
	"Gaussian Mixture Model"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM handles overlapping clusters probabilistically, unlike K-Means which assigns hard labels."
	},
	{
	"id": 32,
	"questionText": "Which initialization can improve EM convergence?",
	"options": [
	"Using covariance as identity for all clusters",
	"Setting all means to zero",
	"Using K-Means centroids as initial means",
	"Randomly choosing one data point"
	],
	"correctAnswerIndex": 2,
	"explanation": "K-Means initialization provides better starting points for EM algorithm."
	},
	{
	"id": 33,
	"questionText": "Soft clustering means:",
	"options": [
	"Each point has a probability of belonging to multiple clusters",
	"Clusters are linearly separable",
	"Clusters have equal sizes",
	"Each point is assigned only one cluster"
	],
	"correctAnswerIndex": 0,
	"explanation": "Soft clustering assigns probabilities rather than hard labels."
	},
	{
	"id": 34,
	"questionText": "What does EM algorithm alternate between?",
	"options": [
	"Expectation (E-step) and Maximization (M-step)",
	"Gradient descent and regularization",
	"Probability normalization and prediction",
	"Clustering and dimensionality reduction"
	],
	"correctAnswerIndex": 0,
	"explanation": "EM alternates between computing responsibilities and updating parameters."
	},
	{
	"id": 35,
	"questionText": "Scenario: A GMM component has nearly zero weight after EM. Implication?",
	"options": [
	"Component is insignificant; may be removed",
	"Covariance matrix is singular",
	"Model is invalid",
	"Training failed"
	],
	"correctAnswerIndex": 0,
	"explanation": "Very low weight indicates component contributes little to data representation."
	},
	{
	"id": 36,
	"questionText": "Scenario: Covariance type set to 'spherical'. Effect?",
	"options": [
	"All clusters are circular with equal variance in all directions",
	"EM cannot converge",
	"Clusters can have arbitrary orientation",
	"Covariance ignored"
	],
	"correctAnswerIndex": 0,
	"explanation": "Spherical covariance assumes isotropic variance for each cluster."
	},
	{
	"id": 37,
	"questionText": "Scenario: Covariance type 'full' in GMM allows:",
	"options": [
	"One-dimensional data only",
	"Elliptical clusters with arbitrary orientation",
	"Only circular clusters",
	"Clusters of equal size"
	],
	"correctAnswerIndex": 1,
	"explanation": "Full covariance allows each cluster to have a unique covariance matrix."
	},
	{
	"id": 38,
	"questionText": "Scenario: High-dimensional data with GMM. Challenge?",
	"options": [
	"Covariance estimation becomes difficult",
	"EM converges faster",
	"Number of clusters reduces automatically",
	"Probabilities become binary"
	],
	"correctAnswerIndex": 0,
	"explanation": "Estimating full covariance in high dimensions is prone to overfitting."
	},
	{
	"id": 39,
	"questionText": "Scenario: Using diagonal covariance instead of full. Advantage?",
	"options": [
	"Reduces number of parameters, faster EM",
	"EM fails automatically",
	"Improves cluster overlap",
	"Always increases accuracy"
	],
	"correctAnswerIndex": 0,
	"explanation": "Diagonal covariance assumes feature independence, reducing parameters."
	},
	{
	"id": 40,
	"questionText": "Scenario: Two clusters have very close means. EM may:",
	"options": [
	"Fail to run",
	"Merge clusters automatically",
	"Assign probabilities reflecting overlap",
	"Ignore one cluster"
	],
	"correctAnswerIndex": 2,
	"explanation": "Soft assignments reflect uncertainty in overlapping regions."
	},
	{
	"id": 41,
	"questionText": "Scenario: EM initialized with random means may:",
	"options": [
	"Converge to local maxima",
	"Merge clusters automatically",
	"Fail to compute responsibilities",
	"Always find global maximum"
	],
	"correctAnswerIndex": 0,
	"explanation": "EM is sensitive to initialization; may converge to local optima."
	},
	{
	"id": 42,
	"questionText": "Scenario: GMM applied to anomaly detection. How?",
	"options": [
	"EM ignores outliers automatically",
	"Points with low likelihood under model considered anomalies",
	"Points assigned to smallest cluster are anomalies",
	"Clusters removed, remaining points are anomalies"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM can detect outliers by evaluating likelihood of each point."
	},
	{
	"id": 43,
	"questionText": "Scenario: Overfitting in GMM can occur when:",
	"options": [
	"Dataset is small but clusters well-separated",
	"Covariance type is spherical",
	"Initialization uses K-Means",
	"Too many components relative to data size"
	],
	"correctAnswerIndex": 3,
	"explanation": "Excessive components may fit noise rather than true structure."
	},
	{
	"id": 44,
	"questionText": "Scenario: Selecting number of components with BIC. Lower BIC means:",
	"options": [
	"EM failed",
	"Overfitting",
	"Better balance between fit and complexity",
	"Worse model"
	],
	"correctAnswerIndex": 2,
	"explanation": "Lower BIC indicates model explains data well without unnecessary complexity."
	},
	{
	"id": 45,
	"questionText": "Scenario: You normalize features before GMM. Benefit?",
	"options": [
	"EM converges slower",
	"Number of components reduces",
	"Prevents dominance by large-scale features",
	"Covariance becomes singular"
	],
	"correctAnswerIndex": 2,
	"explanation": "Feature scaling ensures all features contribute equally to Gaussian components."
	},
	{
	"id": 46,
	"questionText": "Scenario: You use too few components in GMM. Likely effect?",
	"options": [
	"Covariance becomes negative",
	"Overfitting",
	"EM fails to converge",
	"Underfitting, poor representation of clusters"
	],
	"correctAnswerIndex": 3,
	"explanation": "Too few Gaussians cannot capture underlying data structure."
	},
	{
	"id": 47,
	"questionText": "Scenario: Two clusters have different variances. Which GMM setting captures this?",
	"options": [
	"Tied covariance",
	"Diagonal covariance only",
	"Full covariance",
	"Spherical covariance"
	],
	"correctAnswerIndex": 2,
	"explanation": "Full covariance allows different shapes and orientations per cluster."
	},
	{
	"id": 48,
	"questionText": "Scenario: Real-world use of GMM in speaker recognition relies on:",
	"options": [
	"Only frequency features",
	"Decision trees",
	"Modeling probability distribution of feature vectors",
	"Hard cluster labels"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM models the distribution of features for each speaker."
	},
	{
	"id": 49,
	"questionText": "Scenario: Image segmentation with GMM. How?",
	"options": [
	"K-Means replaces EM",
	"Pixels assigned randomly",
	"Only grayscale images",
	"Pixels assigned probabilistically to color clusters"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM can segment images based on color probability distributions."
	},
	{
	"id": 50,
	"questionText": "Scenario: Overlapping Gaussian clusters, hard assignment used. Effect?",
	"options": [
	"Covariance becomes zero",
	"EM improves accuracy",
	"Soft assignment automatically applied",
	"Information loss, may misclassify points"
	],
	"correctAnswerIndex": 3,
	"explanation": "Hard labels ignore uncertainty and may misrepresent overlapping regions."
	},
	{
	"id": 51,
	"questionText": "Scenario: You have categorical features. GMM suitability?",
	"options": [
	"Perfectly suitable",
	"Requires only diagonal covariance",
	"Not ideal; GMM assumes continuous features",
	"Number of components is irrelevant"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM assumes continuous-valued features for Gaussian distributions."
	},
	{
	"id": 52,
	"questionText": "Scenario: EM converges slowly. Common solutions?",
	"options": [
	"Ignore convergence criteria",
	"Better initialization, feature scaling, or fewer components",
	"Increase number of iterations indefinitely",
	"Switch to K-Means always"
	],
	"correctAnswerIndex": 1,
	"explanation": "Good initialization and preprocessing improve EM efficiency."
	},
	{
	"id": 53,
	"questionText": "Scenario: GMM for anomaly detection in network traffic. Strategy?",
	"options": [
	"Ignore rare events",
	"Hard assign all points to clusters",
	"Flag low likelihood points as anomalies",
	"Use K-Means to cluster anomalies"
	],
	"correctAnswerIndex": 2,
	"explanation": "Points with low probability under the model are considered outliers."
	},
	{
	"id": 54,
	"questionText": "Scenario: EM oscillates between two solutions. Likely reason?",
	"options": [
	"Number of components too small",
	"Poor initialization causing local maxima",
	"Covariance matrix full",
	"Using diagonal covariance"
	],
	"correctAnswerIndex": 1,
	"explanation": "EM can get stuck in local maxima if initial parameters are suboptimal."
	},
	{
	"id": 55,
	"questionText": "Scenario: Soft clustering probability threshold used to assign points. Advantage?",
	"options": [
	"EM fails automatically",
	"Always misclassifies clusters",
	"Allows filtering uncertain points",
	"Covariance ignored"
	],
	"correctAnswerIndex": 2,
	"explanation": "Thresholding allows flexible assignment based on confidence."
	},
	{
	"id": 56,
	"questionText": "Scenario: Tied covariance for all components. Effect?",
	"options": [
	"All clusters share same shape/orientation",
	"Covariance ignored",
	"Number of components reduced automatically",
	"EM cannot run"
	],
	"correctAnswerIndex": 0,
	"explanation": "Tied covariance forces all Gaussians to share same covariance matrix."
	},
	{
	"id": 57,
	"questionText": "Scenario: GMM with diagonal covariance and correlated features. Effect?",
	"options": [
	"EM automatically switches to full",
	"Perfect modeling",
	"Covariance fails to compute",
	"Model may be suboptimal due to ignored correlations"
	],
	"correctAnswerIndex": 3,
	"explanation": "Diagonal covariance ignores feature correlations, which may reduce accuracy."
	},
	{
	"id": 58,
	"questionText": "Scenario: High-dimensional data, small sample size. Solution for GMM?",
	"options": [
	"Always full covariance",
	"Increase number of components",
	"Ignore dimension scaling",
	"Use diagonal covariance or reduce dimensions with PCA"
	],
	"correctAnswerIndex": 3,
	"explanation": "Reducing parameters via diagonal covariance or PCA helps prevent overfitting."
	},
	{
	"id": 59,
	"questionText": "Scenario: GMM applied on time-series data. Typical strategy?",
	"options": [
	"Switch to K-Means only",
	"Model features extracted per time window",
	"Use raw timestamps directly",
	"Ignore temporal ordering"
	],
	"correctAnswerIndex": 1,
	"explanation": "Features are extracted per window to apply GMM effectively."
	},
	{
	"id": 60,
	"questionText": "Scenario: You wish to compare two GMMs with different components. Metric?",
	"options": [
	"Silhouette score",
	"Mean squared error",
	"AIC/BIC",
	"Accuracy"
	],
	"correctAnswerIndex": 2,
	"explanation": "AIC/BIC compare likelihoods while penalizing complexity."
	},
	{
	"id": 61,
	"questionText": "Scenario: EM stops improving log-likelihood. Action?",
	"options": [
	"Reinitialize covariance",
	"Converged; training can stop",
	"Increase components automatically",
	"Reduce number of iterations"
	],
	"correctAnswerIndex": 1,
	"explanation": "Plateau in log-likelihood indicates convergence."
	},
	{
	"id": 62,
	"questionText": "Scenario: GMM applied on overlapping clusters. Which is true?",
	"options": [
	"Clusters must be separated manually",
	"GMM fails completely",
	"K-Means always better",
	"Soft assignments handle ambiguity better than hard assignments"
	],
	"correctAnswerIndex": 3,
	"explanation": "Soft probabilistic assignments capture uncertainty in overlapping regions."
	},
	{
	"id": 63,
	"questionText": "Scenario: EM converges to degenerate covariance. Likely cause?",
	"options": [
	"Full covariance required",
	"Component collapsed to single data point",
	"Too few iterations",
	"Random initialization"
	],
	"correctAnswerIndex": 1,
	"explanation": "A component may shrink variance toward zero, creating numerical issues."
	},
	{
	"id": 64,
	"questionText": "Scenario: Choosing between GMM and K-Means. Advantage of GMM?",
	"options": [
	"Always faster",
	"Works only on spherical clusters",
	"No parameters needed",
	"Handles overlapping, probabilistic clusters"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM can model overlap using probabilities, unlike K-Means."
	},
	{
	"id": 65,
	"questionText": "Scenario: GMM with full covariance. Drawback?",
	"options": [
	"EM does not converge",
	"Cannot model elliptical clusters",
	"Higher number of parameters; risk of overfitting",
	"Soft assignment ignored"
	],
	"correctAnswerIndex": 2,
	"explanation": "Full covariance requires estimating many parameters, sensitive to small datasets."
	},
	{
	"id": 66,
	"questionText": "Scenario: GMM for anomaly detection. Threshold selection?",
	"options": [
	"Use hard assignments only",
	"Ignore low-probability points",
	"Based on likelihood distribution of normal data",
	"Random selection"
	],
	"correctAnswerIndex": 2,
	"explanation": "Threshold is chosen based on typical likelihood values of normal data."
	},
	{
	"id": 67,
	"questionText": "Scenario: EM alternates but log-likelihood decreases. Cause?",
	"options": [
	"Soft assignments ignored",
	"Convergence achieved",
	"Number of components too low",
	"Numerical instability or rounding errors"
	],
	"correctAnswerIndex": 3,
	"explanation": "Numerical issues may cause slight decreases; regularization may help."
	},
	{
	"id": 68,
	"questionText": "Scenario: GMM on skewed data. Observation?",
	"options": [
	"Clusters automatically corrected",
	"EM converges faster",
	"Gaussian assumption may be violated",
	"Covariance ignored"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM assumes Gaussian distribution; skewed data may reduce accuracy."
	},
	{
	"id": 69,
	"questionText": "Scenario: Using GMM to compress data. How?",
	"options": [
	"Remove clusters randomly",
	"Use only spherical covariance",
	"Switch to K-Means",
	"Represent each point by cluster responsibilities instead of raw features"
	],
	"correctAnswerIndex": 3,
	"explanation": "Probabilities can serve as a compact representation of original features."
	},
	{
	"id": 70,
	"questionText": "Scenario: GMM applied to multimodal distribution. Advantage?",
	"options": [
	"Models multiple peaks naturally using several Gaussians",
	"Covariance must be diagonal",
	"Cannot handle multimodal data",
	"Requires K-Means preprocessing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Multiple Gaussian components allow GMM to capture multimodal patterns."
	},
	{
	"id": 71,
	"questionText": "Scenario: You want to model customer segments with GMM. Best approach?",
	"options": [
	"Use K-Means only",
	"Use soft clustering to capture overlapping preferences",
	"Assign each customer randomly",
	"Ignore continuous features"
	],
	"correctAnswerIndex": 1,
	"explanation": "Soft clustering captures overlapping behavior patterns between segments."
	},
	{
	"id": 72,
	"questionText": "Scenario: Data has outliers. How does GMM handle them?",
	"options": [
	"EM fails automatically",
	"Outliers dominate clusters",
	"Outliers get low probabilities; may need special handling",
	"Clusters merge to include outliers"
	],
	"correctAnswerIndex": 2,
	"explanation": "Outliers have low likelihood under Gaussian components, reducing their impact."
	},
	{
	"id": 73,
	"questionText": "Scenario: Using EM, you notice very slow convergence. Possible fix?",
	"options": [
	"Switch to hierarchical clustering",
	"Randomly assign clusters",
	"Improve initialization, scale features, or reduce number of components",
	"Increase iterations without changes"
	],
	"correctAnswerIndex": 2,
	"explanation": "Good initialization and preprocessing improve EM efficiency."
	},
	{
	"id": 74,
	"questionText": "Scenario: You apply GMM to cluster text embeddings. Challenge?",
	"options": [
	"High-dimensionality may make full covariance unstable",
	"GMM works perfectly without change",
	"Soft assignments are ignored",
	"Clusters must be one-dimensional"
	],
	"correctAnswerIndex": 0,
	"explanation": "High-dimensional embeddings require dimensionality reduction or diagonal covariance."
	},
	{
	"id": 75,
	"questionText": "Scenario: You use GMM with tied covariance. Effect?",
	"options": [
	"EM fails automatically",
	"Clusters become one",
	"Each cluster has unique covariance",
	"All clusters share same covariance matrix"
	],
	"correctAnswerIndex": 3,
	"explanation": "Tied covariance enforces shared shape/orientation for all Gaussians."
	},
	{
	"id": 76,
	"questionText": "Scenario: GMM model seems overfitted. Possible reasons?",
	"options": [
	"Too many components or full covariance on small data",
	"Diagonal covariance used",
	"EM converged perfectly",
	"Spherical covariance used"
	],
	"correctAnswerIndex": 0,
	"explanation": "Overfitting occurs when model complexity exceeds data size."
	},
	{
	"id": 77,
	"questionText": "Scenario: You want interpretable clusters with GMM. Strategy?",
	"options": [
	"Soft assignments ignored",
	"Use fewer components and diagonal covariance",
	"Random initialization",
	"Use full covariance and many components"
	],
	"correctAnswerIndex": 1,
	"explanation": "Fewer components and simpler covariance improve interpretability."
	},
	{
	"id": 78,
	"questionText": "Scenario: You combine GMM with PCA. Purpose?",
	"options": [
	"Ignore low-variance features",
	"Increase number of clusters",
	"EM converges automatically",
	"Reduce dimensionality to stabilize covariance estimation"
	],
	"correctAnswerIndex": 3,
	"explanation": "PCA reduces features, improving parameter estimation in high-dimensional GMM."
	},
	{
	"id": 79,
	"questionText": "Scenario: GMM applied on customer churn probability. Approach?",
	"options": [
	"Use only binary labels",
	"Switch to linear regression",
	"Hard cluster and ignore overlap",
	"Model feature distribution, assign probabilities to segments"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM probabilistically models customer groups for better segmentation."
	},
	{
	"id": 80,
	"questionText": "Scenario: You notice EM stuck in local optimum. Solution?",
	"options": [
	"Use fewer components only",
	"Increase iterations infinitely",
	"Try multiple random initializations",
	"Ignore convergence"
	],
	"correctAnswerIndex": 2,
	"explanation": "Multiple initializations reduce chance of getting trapped in local maxima."
	},
	{
	"id": 81,
	"questionText": "Scenario: Using GMM for speaker verification. Why suitable?",
	"options": [
	"K-Means performs better",
	"Captures probabilistic feature distributions per speaker",
	"Covariance must be spherical",
	"Hard assignments suffice"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM models variability in speaker features effectively."
	},
	{
	"id": 82,
	"questionText": "Scenario: EM applied to data with small clusters. Challenge?",
	"options": [
	"Soft assignments fail",
	"Small clusters may be ignored or collapse",
	"Covariance ignored",
	"EM always finds them"
	],
	"correctAnswerIndex": 1,
	"explanation": "EM may assign negligible weight to very small clusters."
	},
	{
	"id": 83,
	"questionText": "Scenario: GMM used in anomaly detection for fraud. Key idea?",
	"options": [
	"Transactions are clustered randomly",
	"High-probability transactions flagged",
	"Transactions with low probability under model are flagged",
	"EM ignores rare patterns"
	],
	"correctAnswerIndex": 2,
	"explanation": "Low-likelihood points are potential anomalies."
	},
	{
	"id": 84,
	"questionText": "Scenario: GMM applied to multimodal sensor readings. Advantage?",
	"options": [
	"EM fails automatically",
	"Single Gaussian suffices",
	"Spherical covariance required",
	"Multiple peaks captured by several Gaussian components"
	],
	"correctAnswerIndex": 3,
	"explanation": "Multiple Gaussians allow modeling multimodal distributions."
	},
	{
	"id": 85,
	"questionText": "Scenario: You notice EM log-likelihood plateauing early. Interpretation?",
	"options": [
	"EM converged; model parameters stabilized",
	"Covariance ignored",
	"EM failed",
	"Increase components immediately"
	],
	"correctAnswerIndex": 0,
	"explanation": "Plateau indicates convergence of EM algorithm."
	},
	{
	"id": 86,
	"questionText": "Scenario: GMM with full covariance on small dataset. Risk?",
	"options": [
	"Overfitting due to too many parameters",
	"EM fails automatically",
	"Better modeling",
	"Clusters ignored"
	],
	"correctAnswerIndex": 0,
	"explanation": "Full covariance requires estimating many parameters, risky for small data."
	},
	{
	"id": 87,
	"questionText": "Scenario: GMM used for image segmentation. Key step?",
	"options": [
	"Only grayscale images allowed",
	"K-Means replacement",
	"Assign pixels probabilistically to color clusters",
	"Ignore soft assignments"
	],
	"correctAnswerIndex": 2,
	"explanation": "Soft clustering assigns pixels to Gaussian components representing colors."
	},
	{
	"id": 88,
	"questionText": "Scenario: GMM applied to text clustering with embeddings. Key step?",
	"options": [
	"Increase components arbitrarily",
	"Soft assignments ignored",
	"Use dimensionality reduction to stabilize covariance estimation",
	"EM fails automatically"
	],
	"correctAnswerIndex": 2,
	"explanation": "Reducing dimensionality prevents overfitting in high-dimensional embeddings."
	},
	{
	"id": 89,
	"questionText": "Scenario: GMM with overlapping clusters. Hard labels used. Effect?",
	"options": [
	"Improves EM convergence",
	"Loss of probabilistic information; misclassification possible",
	"EM fails",
	"Covariance ignored"
	],
	"correctAnswerIndex": 1,
	"explanation": "Hard labels ignore uncertainty in overlapping regions."
	},
	{
	"id": 90,
	"questionText": "Scenario: EM for GMM shows component collapsing. Solution?",
	"options": [
	"Use diagonal covariance always",
	"Reduce number of components only",
	"Regularize covariance to prevent singularities",
	"Ignore component"
	],
	"correctAnswerIndex": 2,
	"explanation": "Regularization prevents variances from collapsing to zero."
	},
	{
	"id": 91,
	"questionText": "Scenario: Choosing GMM vs K-Means. Advantage?",
	"options": [
	"K-Means always faster",
	"EM not required",
	"GMM ignores probabilities",
	"Soft assignment, handles overlap and ellipsoidal clusters"
	],
	"correctAnswerIndex": 3,
	"explanation": "GMM models overlapping clusters with probabilistic assignments."
	},
	{
	"id": 92,
	"questionText": "Scenario: GMM applied for speech synthesis. Benefit?",
	"options": [
	"Clusters speakers only",
	"Soft assignments ignored",
	"Models probability distribution of acoustic features",
	"Only spherical clusters allowed"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM captures feature distributions needed for realistic speech synthesis."
	},
	{
	"id": 93,
	"questionText": "Scenario: EM fails to converge. Possible reasons?",
	"options": [
	"Too few iterations",
	"Poor initialization, singular covariance, or incompatible data",
	"Full covariance always fails",
	"Soft assignments ignored"
	],
	"correctAnswerIndex": 1,
	"explanation": "Bad initialization or degenerate covariances can prevent EM convergence."
	},
	{
	"id": 94,
	"questionText": "Scenario: You need probabilistic clustering on 2D sensor data. Choice?",
	"options": [
	"Hierarchical clustering",
	"DBSCAN only",
	"GMM with appropriate covariance type",
	"K-Means only"
	],
	"correctAnswerIndex": 2,
	"explanation": "GMM provides soft probabilistic clustering for continuous data."
	},
	{
	"id": 95,
	"questionText": "Scenario: GMM used for anomaly detection in machinery. How?",
	"options": [
	"Use hard assignments only",
	"Cluster readings randomly",
	"Flag low-likelihood sensor readings as anomalies",
	"Ignore rare readings"
	],
	"correctAnswerIndex": 2,
	"explanation": "Points that do not fit any Gaussian component well can indicate anomalies."
	},
	{
	"id": 96,
	"questionText": "Scenario: High-dimensional embeddings, GMM unstable. Solution?",
	"options": [
	"Increase components",
	"Ignore scaling",
	"Reduce dimensions with PCA or use diagonal covariance",
	"Use full covariance only"
	],
	"correctAnswerIndex": 2,
	"explanation": "Reducing parameters prevents overfitting and stabilizes EM."
	},
	{
	"id": 97,
	"questionText": "Scenario: GMM for multimodal customer behavior. Advantage?",
	"options": [
	"K-Means better",
	"Multiple components capture different behavioral modes",
	"Single Gaussian suffices",
	"Covariance must be spherical"
	],
	"correctAnswerIndex": 1,
	"explanation": "Different modes of behavior can be modeled with multiple Gaussians."
	},
	{
	"id": 98,
	"questionText": "Scenario: EM converges but log-likelihood decreases occasionally. Cause?",
	"options": [
	"Numerical instability; can use regularization",
	"Covariance ignored",
	"EM failed",
	"Increase components"
	],
	"correctAnswerIndex": 0,
	"explanation": "Small decreases may occur due to rounding errors; regularization helps."
	},
	{
	"id": 99,
	"questionText": "Scenario: Soft assignment threshold applied. Benefit?",
	"options": [
	"Filter uncertain points or highlight ambiguous memberships",
	"EM fails automatically",
	"Covariance ignored",
	"Always misclassifies clusters"
	],
	"correctAnswerIndex": 0,
	"explanation": "Thresholding helps decide which points are confidently assigned."
	},
	{
	"id": 100,
	"questionText": "Scenario: GMM applied in finance for risk clustering. Advantage?",
	"options": [
	"Requires categorical data only",
	"Models probability distribution of different risk profiles",
	"Soft assignment ignored",
	"Clusters randomly"
	],
	"correctAnswerIndex": 1,
	"explanation": "GMM provides probabilistic segmentation of customers or assets by risk levels."
	}
	]
	}