| { | |
| "title": "DBSCAN Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of DBSCAN (Density-Based Spatial Clustering of Applications with Noise), covering fundamental concepts, parameters, advantages, limitations, and practical scenarios.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the main idea behind DBSCAN clustering?", | |
| "options": [ | |
| "Clusters are dense regions separated by sparse regions", | |
| "All points are assigned to a cluster", | |
| "Clusters are linearly separable", | |
| "Clusters are formed by equal-sized groups" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN identifies clusters based on density: areas with many points form clusters, and sparse regions separate them." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "DBSCAN requires which key parameters?", | |
| "options": [ | |
| "Number of clusters (k) only", | |
| "Learning rate and iterations", | |
| "Distance metric only", | |
| "Epsilon (eps) and Minimum points (minPts)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN uses eps (neighborhood radius) and minPts (minimum points to form a dense region) to define clusters." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "In DBSCAN, what is a 'core point'?", | |
| "options": [ | |
| "Point on the boundary of clusters", | |
| "Point with no neighbors", | |
| "Point with at least minPts neighbors within eps", | |
| "Any point in the dataset" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "A core point has enough neighboring points within eps to be considered part of a dense cluster." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "In DBSCAN, what is a 'border point'?", | |
| "options": [ | |
| "Point not in any cluster", | |
| "Point reachable from a core point but with fewer than minPts neighbors", | |
| "Point with more than minPts neighbors", | |
| "Centroid of a cluster" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Border points are density-reachable from core points but do not have enough neighbors themselves to be core points." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "In DBSCAN, what is a 'noise point'?", | |
| "options": [ | |
| "Point with maximum density", | |
| "Point on the cluster centroid", | |
| "Point with exactly minPts neighbors", | |
| "Point not reachable from any core point" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Noise points are isolated points that do not belong to any cluster." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "Scenario: You have clusters of varying density. Challenge for DBSCAN?", | |
| "options": [ | |
| "DBSCAN fails to run", | |
| "Always finds all clusters perfectly", | |
| "Clusters become linearly separable", | |
| "May merge dense clusters and miss sparse ones" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN struggles with clusters of differing densities because eps and minPts are global parameters." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Scenario: Choosing eps too large. Effect?", | |
| "options": [ | |
| "Algorithm fails", | |
| "Noise increases", | |
| "More clusters detected", | |
| "Clusters may merge; noise reduced" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Large eps connects distant points, possibly merging distinct clusters." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Scenario: Choosing eps too small. Effect?", | |
| "options": [ | |
| "Clusters merge", | |
| "Many points labeled as noise; clusters fragmented", | |
| "No effect", | |
| "EM applied instead" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Small eps results in fewer neighbors; many points cannot form clusters and are marked as noise." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Scenario: Setting minPts too high. Effect?", | |
| "options": [ | |
| "Clusters merge", | |
| "More points labeled as noise; small clusters ignored", | |
| "Algorithm fails", | |
| "Clusters increase" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "High minPts requires dense regions to form clusters, excluding smaller or sparse clusters." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Scenario: Setting minPts too low. Effect?", | |
| "options": [ | |
| "DBSCAN fails", | |
| "Clusters disappear", | |
| "Many small clusters; noise reduced", | |
| "Clusters merge automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Scenario: A border point is connected to multiple core points of different clusters. How is it assigned?", | |
| "options": [ | |
| "Becomes noise automatically", | |
| "Forms a new cluster", | |
| "Assigned to any one cluster arbitrarily or first reachable", | |
| "Algorithm fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Border points can belong to one cluster; usually assigned to the first core point that reaches it." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Scenario: You have 2D spatial data with noise. DBSCAN advantage?", | |
| "options": [ | |
| "Detects clusters of arbitrary shape and identifies noise", | |
| "Requires clusters to be circular", | |
| "Sensitive to number of clusters parameter", | |
| "Assigns all points to clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN works well for arbitrary shapes and identifies noise points." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Scenario: Using Euclidean distance vs Manhattan distance in DBSCAN. Effect?", | |
| "options": [ | |
| "Distance metric affects cluster shapes and eps choice", | |
| "DBSCAN fails", | |
| "No effect; clusters same", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Different distance metrics affect neighborhood calculation, which can change clustering." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Scenario: DBSCAN applied on high-dimensional data. Challenge?", | |
| "options": [ | |
| "Distance measures become less meaningful (curse of dimensionality)", | |
| "Algorithm runs faster", | |
| "Clusters automatically reduce", | |
| "Noise decreases" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High dimensions can make points appear equidistant, complicating density estimation." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "Scenario: You have concentric clusters. DBSCAN challenge?", | |
| "options": [ | |
| "May fail to separate inner and outer clusters depending on eps", | |
| "Always separates perfectly", | |
| "Clusters merge automatically", | |
| "Noise increases" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Density difference between inner and outer rings may cause DBSCAN to merge or mislabel clusters." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Scenario: Using DBSCAN for geospatial clustering. Advantage?", | |
| "options": [ | |
| "Finds clusters of arbitrary shape like regions or neighborhoods", | |
| "Clusters must be circular", | |
| "All points assigned", | |
| "Sensitive to number of clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN can identify irregularly shaped spatial clusters without specifying cluster count." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Scenario: You want clusters of varying density. DBSCAN limitation?", | |
| "options": [ | |
| "Noise removed automatically", | |
| "Single global eps may not detect all clusters", | |
| "Algorithm adapts automatically", | |
| "All clusters found" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "DBSCAN uses a fixed eps, which can miss sparse clusters or merge dense clusters." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Scenario: You apply DBSCAN on streaming data. Challenge?", | |
| "options": [ | |
| "Noise ignored", | |
| "Automatically updates clusters", | |
| "All points reassigned automatically", | |
| "Standard DBSCAN is static; streaming adaptation required" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN is not incremental; modifications are needed for dynamic/streaming data." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Scenario: Using DBSCAN for anomaly detection. Approach?", | |
| "options": [ | |
| "Assign random labels", | |
| "Label points not in any cluster as anomalies", | |
| "Clusters merged manually", | |
| "Use all clusters for prediction" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Noise points are naturally flagged as outliers." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Scenario: DBSCAN vs K-Means on arbitrary-shaped clusters. Advantage?", | |
| "options": [ | |
| "DBSCAN fails for shapes", | |
| "Both perform equally", | |
| "K-Means better for arbitrary shapes", | |
| "DBSCAN can capture non-spherical clusters; K-Means cannot" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN works with clusters of any shape without requiring centroids." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Scenario: Two clusters are close together but separated by sparse points. DBSCAN outcome?", | |
| "options": [ | |
| "Fails to converge", | |
| "Marks everything as noise", | |
| "Correctly separates clusters using density differences", | |
| "Merges clusters automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Sparse points allow DBSCAN to distinguish dense clusters even if they are close." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: Applying DBSCAN on 3D point cloud data. Advantage?", | |
| "options": [ | |
| "Clusters must be spherical", | |
| "All points assigned to clusters", | |
| "Can find clusters of arbitrary 3D shape and ignore noise", | |
| "Requires predefining cluster centers" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN handles multi-dimensional data and can identify irregular clusters and noise." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Scenario: DBSCAN uses Manhattan distance on grid data. Effect?", | |
| "options": [ | |
| "Algorithm fails", | |
| "Noise increases automatically", | |
| "Clusters align with grid; eps choice differs", | |
| "No effect on clusters" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Distance metric changes the neighborhood definition, affecting cluster formation." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Scenario: You want small but dense clusters. How to set parameters?", | |
| "options": [ | |
| "Small eps and appropriate minPts", | |
| "Large eps", | |
| "Ignore parameters", | |
| "Large minPts" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Smaller eps ensures that small dense regions form separate clusters." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Scenario: You have noisy sensor data. DBSCAN benefit?", | |
| "options": [ | |
| "Clusters all points", | |
| "Fails with noise", | |
| "Requires K-Means preprocessing", | |
| "Automatically labels isolated points as noise" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN identifies low-density points as noise, avoiding misclassification." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Scenario: Data with hierarchical cluster structure. Limitation of DBSCAN?", | |
| "options": [ | |
| "Noise ignored", | |
| "All clusters merged", | |
| "Cannot detect hierarchy; only flat clusters", | |
| "Automatically finds hierarchy" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN provides flat clustering; hierarchical relationships are not captured." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: Using DBSCAN for image segmentation. Advantage?", | |
| "options": [ | |
| "Requires predefined number of segments", | |
| "All pixels assigned to clusters", | |
| "Identifies irregular regions and isolates noise", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN captures arbitrary-shaped regions and treats background/noisy pixels as noise." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: You have clusters of different densities. How to adapt DBSCAN?", | |
| "options": [ | |
| "Reduce dimensionality", | |
| "Use varying eps with methods like HDBSCAN", | |
| "Keep single global eps", | |
| "Increase minPts" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Standard DBSCAN struggles with varying densities; adaptive versions like HDBSCAN help." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Scenario: DBSCAN fails to detect clusters in high-dimensional text embeddings. Solution?", | |
| "options": [ | |
| "Increase minPts arbitrarily", | |
| "Use full covariance", | |
| "Reduce dimensions using PCA or t-SNE before clustering", | |
| "Ignore scaling" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Dimensionality reduction improves distance computation and density estimation." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?", | |
| "options": [ | |
| "MinPts = 1 always", | |
| "MinPts = dataset size", | |
| "MinPts ignored", | |
| "MinPts ≥ dimensionality + 1" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "MinPts should be slightly larger than the data dimensionality for meaningful clusters." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: DBSCAN applied on GPS data of taxis in a city. Best use case?", | |
| "options": [ | |
| "Assign random clusters", | |
| "Identify high-density pickup/drop-off hotspots", | |
| "Detect only circular areas", | |
| "Cluster by taxi color" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "DBSCAN can detect dense regions where taxis frequently gather without assuming cluster shape." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Scenario: You notice DBSCAN marks too many points as noise. Likely cause?", | |
| "options": [ | |
| "Algorithm failed", | |
| "All clusters are too dense", | |
| "Distance metric wrong", | |
| "eps too small or minPts too high" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Small eps or high minPts can make points unable to form clusters, labeling them as noise." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: DBSCAN applied to social network graph. Challenge?", | |
| "options": [ | |
| "Clusters are always detected", | |
| "DBSCAN works directly on graph", | |
| "Noise ignored", | |
| "Graph edges may not correspond to meaningful distances; need transformation" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN requires distance metrics; graphs need embedding or distance conversion." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Scenario: Using DBSCAN for anomaly detection in network traffic. How?", | |
| "options": [ | |
| "Label low-density patterns as anomalies", | |
| "All high-traffic nodes flagged", | |
| "Randomly assign anomalies", | |
| "Clusters merged manually" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low-density points correspond to unusual patterns, suitable for anomaly detection." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: High-dimensional DBSCAN performance issue. Solution?", | |
| "options": [ | |
| "Use dimensionality reduction or HDBSCAN", | |
| "Ignore distance metric", | |
| "Use K-Means instead", | |
| "Increase eps arbitrarily" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Reducing dimensions or using hierarchical density clustering helps in high-dimensional spaces." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: Clusters are elongated. DBSCAN vs K-Means?", | |
| "options": [ | |
| "K-Means works better", | |
| "All points assigned to noise", | |
| "DBSCAN captures arbitrary shapes better", | |
| "Both fail" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN does not assume spherical clusters, so elongated shapes are captured well." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: DBSCAN fails on variable-density clusters. Solution?", | |
| "options": [ | |
| "Reduce minPts to 1", | |
| "Use HDBSCAN for adaptive density clustering", | |
| "Increase eps globally", | |
| "Ignore problem" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "HDBSCAN handles clusters with varying density better than standard DBSCAN." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: You want reproducible DBSCAN results. Requirement?", | |
| "options": [ | |
| "Ignore minPts", | |
| "Deterministic neighbor search and consistent distance metric", | |
| "Random initialization", | |
| "Vary eps each run" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Reproducibility requires deterministic calculations for neighborhoods and distances." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: DBSCAN applied on time-series sensor readings. Approach?", | |
| "options": [ | |
| "Use sliding windows to extract features before clustering", | |
| "Clusters automatically detected", | |
| "Apply DBSCAN on raw timestamps", | |
| "Ignore feature extraction" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Time-series features are extracted to represent temporal patterns for density-based clustering." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: You need clusters and hierarchy. Limitation of DBSCAN?", | |
| "options": [ | |
| "Noise ignored", | |
| "DBSCAN provides only flat clustering", | |
| "Automatically generates hierarchy", | |
| "Clusters nested by default" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "DBSCAN produces flat clusters; hierarchical relationships require extensions like HDBSCAN." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: DBSCAN applied on customer purchase patterns. Advantage?", | |
| "options": [ | |
| "Requires predefined cluster number", | |
| "Sensitive to initial seed", | |
| "Detects dense buying behavior groups and isolates rare patterns", | |
| "All points assigned" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN identifies dense purchasing patterns and separates anomalies naturally." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: You want to tune DBSCAN eps parameter. Approach?", | |
| "options": [ | |
| "MinPts adjustment only", | |
| "Always choose maximum distance", | |
| "Use k-distance graph to identify elbow point", | |
| "Randomly guess eps" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Plotting k-distance helps find a suitable eps where distances start increasing sharply." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: DBSCAN with overlapping clusters. Effect?", | |
| "options": [ | |
| "Points duplicated", | |
| "Clusters fail completely", | |
| "Overlap handled by density; border points assigned to one cluster", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN assigns border points to a reachable cluster; soft assignment is not available." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: Applying DBSCAN to text embeddings. Challenge?", | |
| "options": [ | |
| "Noise ignored", | |
| "All points assigned to clusters", | |
| "DBSCAN always works", | |
| "High-dimensional distances may be less meaningful" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Distance measures in high dimensions can reduce effectiveness; dimensionality reduction helps." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: Noise proportion is high. DBSCAN behavior?", | |
| "options": [ | |
| "Many points labeled as noise; cluster detection limited", | |
| "Algorithm fails", | |
| "Clusters detected perfectly", | |
| "All points assigned to clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High noise density can prevent formation of dense clusters." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: DBSCAN on streaming data. Limitation?", | |
| "options": [ | |
| "Standard DBSCAN is static; needs incremental adaptation", | |
| "All points reassigned automatically", | |
| "Noise ignored", | |
| "Automatically updates clusters" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN is not inherently incremental; streaming data requires modified algorithms." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?", | |
| "options": [ | |
| "Both fail", | |
| "K-Means better", | |
| "Noise ignored", | |
| "DBSCAN detects arbitrary shapes; K-Means cannot" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN does not rely on centroid or spherical assumption." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: You apply DBSCAN on noisy sensor readings. Outcome?", | |
| "options": [ | |
| "Isolates isolated points as noise automatically", | |
| "Clusters all points", | |
| "Noise merged into clusters", | |
| "Algorithm fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low-density or isolated points are correctly treated as noise." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Choosing distance metric affects DBSCAN. Why?", | |
| "options": [ | |
| "All clusters merge", | |
| "No effect", | |
| "Neighborhood depends on distance; cluster shape affected", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Different metrics change neighbor counts, affecting core points and cluster formation." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: DBSCAN on highly skewed 2D data. Challenge?", | |
| "options": [ | |
| "Clusters detected automatically", | |
| "Algorithm fails", | |
| "Fixed eps may not capture sparse areas", | |
| "Noise reduced" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Single eps cannot adapt to varying densities; sparse regions may be misclassified." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: DBSCAN applied to customer segmentation with varying buying density. Issue?", | |
| "options": [ | |
| "Noise eliminated automatically", | |
| "Clusters merged randomly", | |
| "All clusters detected perfectly", | |
| "Some smaller or sparser clusters may be missed" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN’s global eps struggles with clusters of different densities; adaptive methods recommended." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: You want DBSCAN to detect small anomalies in large dataset. How to adjust?", | |
| "options": [ | |
| "Increase eps arbitrarily", | |
| "Decrease minPts and eps appropriately", | |
| "Ignore small clusters", | |
| "Use K-Means instead" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Smaller minPts and eps allow DBSCAN to detect small dense regions representing anomalies." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: Using DBSCAN for clustering Wi-Fi signals in a building. Advantage?", | |
| "options": [ | |
| "Identifies dense signal regions and ignores noise", | |
| "All points assigned to clusters", | |
| "Requires number of clusters", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN can detect regions with strong signal density and label isolated weak signals as noise." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: DBSCAN on image pixel intensities for segmentation. Outcome?", | |
| "options": [ | |
| "Requires predefined cluster number", | |
| "Clusters must be circular", | |
| "All pixels assigned", | |
| "Arbitrary-shaped regions segmented; noise isolated" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN can segment regions of any shape and label scattered pixels as noise." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Using DBSCAN on 3D point cloud of a city. Advantage?", | |
| "options": [ | |
| "Detects clusters like buildings, trees, and separates sparse points", | |
| "Noise merged into clusters", | |
| "All points assigned", | |
| "Clusters must be spherical" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN works in multi-dimensional data and identifies meaningful dense clusters." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: DBSCAN fails with high-dimensional word embeddings. Solution?", | |
| "options": [ | |
| "Apply dimensionality reduction before clustering", | |
| "Use K-Means", | |
| "Increase eps globally", | |
| "Ignore problem" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-dimensional spaces make distance less meaningful; reduction helps clustering performance." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: Border points connected to multiple core points. Assignment?", | |
| "options": [ | |
| "Assigned to one cluster reachable first", | |
| "Algorithm fails", | |
| "Assigned to all clusters simultaneously", | |
| "Become noise" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN assigns border points to a single cluster; typically the first reachable core point." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: DBSCAN on streaming data. Limitation?", | |
| "options": [ | |
| "Noise ignored", | |
| "Automatically updates clusters", | |
| "All points reassigned automatically", | |
| "Standard DBSCAN cannot update incrementally; adaptation needed" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Incremental or streaming adaptations of DBSCAN are required for dynamic datasets." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Clusters are elongated and dense. DBSCAN vs K-Means?", | |
| "options": [ | |
| "Both fail", | |
| "K-Means better", | |
| "All points assigned to noise", | |
| "DBSCAN captures shape; K-Means fails with elongated clusters" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN’s density-based approach handles arbitrary shapes like elongated clusters well." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: Choosing minPts parameter. Rule of thumb?", | |
| "options": [ | |
| "minPts = dataset size", | |
| "minPts ≥ dimensionality + 1", | |
| "minPts = 1 always", | |
| "minPts ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Choosing minPts slightly larger than data dimensionality ensures meaningful cluster formation." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: DBSCAN applied to weather station locations. Advantage?", | |
| "options": [ | |
| "All stations assigned", | |
| "Noise merged into clusters", | |
| "Detects dense station clusters and separates isolated stations as noise", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN can find groups of stations in dense regions while labeling isolated ones as noise." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: DBSCAN applied to vehicle GPS tracks. Best outcome?", | |
| "options": [ | |
| "All vehicles assigned to same cluster", | |
| "Requires predefined cluster number", | |
| "Clusters must be circular", | |
| "Detect hotspots of vehicle activity and identify sparse routes" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN identifies dense routes or locations and marks sparse movements as noise." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: DBSCAN applied to detect fraudulent transactions. Advantage?", | |
| "options": [ | |
| "All transactions clustered", | |
| "Isolates unusual low-density transactions as potential fraud", | |
| "Clusters merged arbitrarily", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Low-density points are flagged naturally, useful for anomaly detection in finance." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: eps too large. Effect on clusters?", | |
| "options": [ | |
| "Clusters may merge; noise reduced", | |
| "Noise increases", | |
| "More clusters detected", | |
| "Algorithm fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Large eps connects distant points, merging separate clusters and reducing noise." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: eps too small. Effect on clusters?", | |
| "options": [ | |
| "Clusters merge", | |
| "Noise decreases", | |
| "Many points labeled as noise; clusters fragmented", | |
| "Algorithm fails" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Small eps prevents points from forming dense clusters; many become noise." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: High-dimensional clustering. DBSCAN limitation?", | |
| "options": [ | |
| "Clusters detected perfectly", | |
| "All points assigned", | |
| "Distances lose meaning; density estimation difficult", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High dimensions make points appear equidistant, complicating density-based clustering." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: Data with multiple density clusters. Solution?", | |
| "options": [ | |
| "Use HDBSCAN for adaptive density clustering", | |
| "Increase eps globally", | |
| "Reduce minPts to 1", | |
| "Ignore problem" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "HDBSCAN adapts to varying densities, unlike standard DBSCAN." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: Using DBSCAN on customer browsing patterns. Advantage?", | |
| "options": [ | |
| "Noise ignored", | |
| "Requires predefined cluster number", | |
| "All points assigned", | |
| "Detects dense behavioral patterns and isolates outliers" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Dense browsing behaviors form clusters; rare patterns become noise." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Noise points in DBSCAN. Definition?", | |
| "options": [ | |
| "Cluster centroids", | |
| "All points in clusters", | |
| "Points not reachable from any core point", | |
| "Points with minPts neighbors" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Noise points are isolated points not part of any cluster." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: Choosing distance metric in DBSCAN. Effect?", | |
| "options": [ | |
| "Affects neighborhood definition and cluster shape", | |
| "Noise ignored", | |
| "No effect", | |
| "All points merged" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "The distance metric changes how neighbors are counted, affecting cluster formation." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: DBSCAN applied to earthquake epicenters. Advantage?", | |
| "options": [ | |
| "Clusters must be circular", | |
| "Detects clusters of seismic activity and isolates isolated events", | |
| "Noise merged into clusters", | |
| "All events assigned" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "DBSCAN identifies dense seismic regions and separates rare events as noise." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Varying eps across dataset. How to achieve?", | |
| "options": [ | |
| "Ignore variation", | |
| "Random eps each run", | |
| "Use adaptive DBSCAN variants like HDBSCAN", | |
| "Standard DBSCAN suffices" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Adaptive algorithms adjust density thresholds to handle varying densities." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: Applying DBSCAN on medical imaging. Benefit?", | |
| "options": [ | |
| "Clusters must be spherical", | |
| "Requires fixed cluster number", | |
| "Detects regions of interest and separates background noise", | |
| "All pixels clustered" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN segments irregular shapes and isolates sparse/noisy regions." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Using DBSCAN for anomaly detection in IoT sensors. Approach?", | |
| "options": [ | |
| "Ignore isolated readings", | |
| "Label low-density readings as anomalies", | |
| "Cluster all points", | |
| "Random assignment" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Isolated readings or sparse patterns naturally become noise, indicating anomalies." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: DBSCAN on financial transactions. Noise points indicate?", | |
| "options": [ | |
| "Noise merged", | |
| "All transactions are legitimate", | |
| "Potential fraudulent or unusual transactions", | |
| "Clusters merged" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Sparse points in dense transaction space are flagged as unusual or fraudulent." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: DBSCAN applied to traffic accident locations. Advantage?", | |
| "options": [ | |
| "Identifies accident hotspots and isolates rare events", | |
| "All accidents assigned", | |
| "Noise merged into clusters", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN finds dense accident regions and treats isolated incidents as noise." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: eps and minPts selection using k-distance plot. What is the elbow point?", | |
| "options": [ | |
| "Minimum distance", | |
| "Random point", | |
| "Point where distance sharply increases, suitable for eps", | |
| "Maximum distance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "The elbow in the k-distance graph indicates the transition from dense to sparse regions, guiding eps selection." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Border points connected to multiple clusters. Assignment in DBSCAN?", | |
| "options": [ | |
| "Assigned to all clusters", | |
| "Become noise", | |
| "Assigned to the first reachable cluster", | |
| "Clusters merge automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Border points are assigned to one cluster, typically the first one that reaches them." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: DBSCAN on social media check-ins. Benefit?", | |
| "options": [ | |
| "Clusters must be predefined", | |
| "All users assigned", | |
| "Noise ignored", | |
| "Detects popular locations and identifies sparse users" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN identifies dense activity areas and treats isolated check-ins as noise." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Standard DBSCAN fails on variable density data. Solution?", | |
| "options": [ | |
| "Ignore the problem", | |
| "Increase eps globally", | |
| "Decrease minPts arbitrarily", | |
| "Use HDBSCAN for hierarchical density-based clustering" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "HDBSCAN adapts to varying density, unlike standard DBSCAN." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: DBSCAN on genomic data. Advantage?", | |
| "options": [ | |
| "Clusters must be circular", | |
| "All genes assigned", | |
| "Identifies dense gene clusters and isolates rare genes", | |
| "Noise merged" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN can identify dense gene expression patterns and separate sparse or rare genes as noise." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: Choosing minPts too high. Effect?", | |
| "options": [ | |
| "Algorithm fails", | |
| "Clusters merge", | |
| "Small clusters ignored; many points labeled noise", | |
| "More clusters detected" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High minPts requires dense regions; sparse or small clusters are lost." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Choosing minPts too low. Effect?", | |
| "options": [ | |
| "Many small clusters formed; noise reduced", | |
| "Clusters merge", | |
| "Algorithm fails", | |
| "All points noise" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Low minPts allows small groups to form clusters, potentially splitting natural clusters." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: DBSCAN applied on customer location data. Advantage?", | |
| "options": [ | |
| "Requires predefined cluster count", | |
| "All points assigned", | |
| "Clusters must be circular", | |
| "Identifies dense shopping areas and isolates isolated customers" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN captures dense shopping locations and labels scattered customers as noise." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: High-dimensional text embeddings. DBSCAN limitation?", | |
| "options": [ | |
| "Distances lose meaning; clusters may be unreliable", | |
| "Algorithm faster", | |
| "Noise ignored", | |
| "Clusters always detected" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "In high dimensions, distances are less discriminative, affecting density and clustering." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Using DBSCAN on image feature vectors. Benefit?", | |
| "options": [ | |
| "Requires predefined cluster count", | |
| "All features assigned", | |
| "Groups similar image features and isolates outliers", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "DBSCAN detects dense feature groups and treats isolated features as noise." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: eps too small. Effect?", | |
| "options": [ | |
| "Clusters fragmented; many points labeled noise", | |
| "Clusters merge", | |
| "All points assigned", | |
| "Algorithm fails" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small eps prevents formation of dense clusters; isolated points become noise." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: eps too large. Effect?", | |
| "options": [ | |
| "More clusters detected", | |
| "Algorithm fails", | |
| "Clusters merge; fewer noise points", | |
| "Noise increases" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Large eps connects distant points, merging separate clusters and reducing noise." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: DBSCAN on irregularly shaped 2D clusters. Advantage?", | |
| "options": [ | |
| "Clusters must be circular", | |
| "Captures arbitrary shapes unlike K-Means", | |
| "All points assigned", | |
| "Noise ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "DBSCAN does not assume cluster shape, so it captures elongated or irregular clusters." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: Border point connected to multiple core points. Assignment?", | |
| "options": [ | |
| "Assigned to first reachable cluster", | |
| "Assigned to all clusters", | |
| "Clusters merge", | |
| "Becomes noise" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN assigns a border point to one cluster, typically the first core point that reaches it." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: DBSCAN on IoT sensor anomaly detection. Advantage?", | |
| "options": [ | |
| "Sparse readings flagged as anomalies automatically", | |
| "Noise ignored", | |
| "All readings clustered", | |
| "Clusters merged arbitrarily" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN labels low-density points as noise, which is useful for detecting anomalies." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: DBSCAN with streaming data. Limitation?", | |
| "options": [ | |
| "Needs adaptation; standard DBSCAN is static", | |
| "Noise ignored", | |
| "Automatically updates clusters", | |
| "All points reassigned automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN is not incremental; streaming or dynamic data requires modified algorithms." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Using DBSCAN on earthquake data. Benefit?", | |
| "options": [ | |
| "Detects dense seismic zones; isolates rare events", | |
| "All events clustered", | |
| "Noise merged", | |
| "Clusters must be circular" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN identifies dense clusters of earthquakes and labels isolated events as noise." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: Noise in DBSCAN definition?", | |
| "options": [ | |
| "Cluster centroids", | |
| "Points not reachable from any core point", | |
| "Points with minPts neighbors", | |
| "All points assigned" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Noise points are isolated points not part of any cluster." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: Varying density clusters. Best DBSCAN variant?", | |
| "options": [ | |
| "Standard DBSCAN", | |
| "K-Means", | |
| "HDBSCAN", | |
| "Agglomerative clustering" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "HDBSCAN adapts to different densities and creates a hierarchy of clusters." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: Choosing eps using k-distance plot. How?", | |
| "options": [ | |
| "Select maximum distance", | |
| "Select value at elbow point where distances sharply rise", | |
| "Randomly select eps", | |
| "Select minimum distance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The elbow point indicates the transition from dense to sparse points, guiding eps choice." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: DBSCAN on customer behavior patterns. Benefit?", | |
| "options": [ | |
| "Groups dense behavior patterns; isolates rare customers", | |
| "Requires fixed number of clusters", | |
| "Noise ignored", | |
| "All points assigned" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "DBSCAN identifies dense behavioral clusters and labels rare behaviors as noise." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: DBSCAN vs K-Means for non-spherical clusters. Advantage?", | |
| "options": [ | |
| "K-Means better", | |
| "Noise ignored", | |
| "Both fail", | |
| "DBSCAN captures arbitrary shapes" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "DBSCAN does not assume cluster shape and handles irregular or elongated clusters." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: High-dimensional DBSCAN problem. Solution?", | |
| "options": [ | |
| "Dimensionality reduction (PCA, t-SNE) or HDBSCAN", | |
| "Increase minPts arbitrarily", | |
| "Ignore scaling", | |
| "Use raw distances" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "High-dimensional spaces make distances less meaningful; reduction or adaptive methods improve clustering." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: Choosing minPts in DBSCAN. Rule of thumb?", | |
| "options": [ | |
| "minPts = 1 always", | |
| "minPts = dataset size", | |
| "Ignore minPts", | |
| "minPts ≥ dimensionality + 1" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "minPts should slightly exceed data dimensionality to ensure meaningful clusters." | |
| } | |
| ] | |
| } | |