Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import numpy as np | |
| import pickle | |
| from skimage.feature import local_binary_pattern, graycomatrix, graycoprops, hog | |
| # --------------------------------------------------------------------- | |
| # Feature Extraction Functions | |
| # --------------------------------------------------------------------- | |
| def get_average_color(image): | |
| """Compute the average color of the image in BGR space.""" | |
| return np.mean(image, axis=(0, 1)) | |
| def get_color_histogram(image, bins=(8, 8, 8)): | |
| """ | |
| Compute a normalized color histogram in HSV space. | |
| """ | |
| hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) | |
| hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256]) | |
| cv2.normalize(hist, hist) | |
| return hist.flatten() | |
| def get_lbp_histogram(image, numPoints=24, radius=8, bins=59): | |
| """ | |
| Compute a histogram of Local Binary Patterns (LBP) from the grayscale image. | |
| """ | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| lbp = local_binary_pattern(gray, numPoints, radius, method="uniform") | |
| hist, _ = np.histogram(lbp.ravel(), bins=bins, range=(0, bins)) | |
| hist = hist.astype("float") | |
| hist /= (hist.sum() + 1e-7) | |
| return hist | |
| def get_glcm_features(image, | |
| distances=[1, 2, 4], | |
| angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], | |
| properties=('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')): | |
| """ | |
| Compute GLCM (Gray Level Co-occurrence Matrix) based features (a.k.a. Haralick features). | |
| distances: List of pixel distances. | |
| angles: List of angles in radians. | |
| properties: GLCM properties to compute for each distance and angle. | |
| Returns a concatenated feature vector of all properties. | |
| """ | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| glcm = graycomatrix(gray, | |
| distances=distances, | |
| angles=angles, | |
| levels=256, | |
| symmetric=True, | |
| normed=True) | |
| feats = [] | |
| for prop in properties: | |
| vals = graycoprops(glcm, prop) | |
| feats.append(vals.ravel()) # flatten the NxM result for this property | |
| glcm_features = np.hstack(feats) | |
| return glcm_features | |
| def get_hog_features(image, | |
| orientations=9, | |
| pixels_per_cell=(8, 8), | |
| cells_per_block=(2, 2), | |
| block_norm='L2-Hys'): | |
| """ | |
| Compute Histogram of Oriented Gradients (HOG) from the grayscale image. | |
| By default, requires at least 16×16. | |
| """ | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| fd = hog(gray, | |
| orientations=orientations, | |
| pixels_per_cell=pixels_per_cell, | |
| cells_per_block=cells_per_block, | |
| block_norm=block_norm) | |
| return fd | |
| def get_combined_features(image): | |
| """ | |
| Combine the average color, color histogram, LBP histogram, | |
| GLCM-based features, and HOG features into one feature vector. | |
| IMPORTANT: We force-resize the tile to 16×16 (for HOG) | |
| if we want to match the mosaic script that | |
| also forces 16×16 before HOG. | |
| """ | |
| # -- Compute features from original image size -- | |
| avg_color = get_average_color(image) | |
| color_hist = get_color_histogram(image) | |
| lbp_hist = get_lbp_histogram(image) | |
| glcm_feats = get_glcm_features(image) | |
| # -- Force-resize to 16×16 for HOG to match mosaic script -- | |
| hog_input = cv2.resize(image, (16, 16), interpolation=cv2.INTER_LINEAR) | |
| hog_feats = get_hog_features(hog_input) | |
| # -- Concatenate everything -- | |
| combined = np.concatenate([ | |
| avg_color, | |
| color_hist, | |
| lbp_hist, | |
| glcm_feats, | |
| hog_feats | |
| ]) | |
| return combined | |
| # --------------------------------------------------------------------- | |
| # Main Data Preparation Function | |
| # --------------------------------------------------------------------- | |
| def prepare_tile_data(tiles_folder, output_file): | |
| """ | |
| Process all images in 'tiles_folder' to compute their feature vectors. | |
| Force-resize each tile to 16×16 for HOG (same as mosaic script). | |
| Save features + file paths to a pickle file. | |
| """ | |
| tile_features = [] | |
| tile_paths = [] | |
| valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff') | |
| # Gather all valid image files | |
| all_files = [f for f in os.listdir(tiles_folder) if f.lower().endswith(valid_extensions)] | |
| total_files = len(all_files) | |
| if total_files == 0: | |
| print("No valid image files found in", tiles_folder) | |
| return | |
| print(f"Found {total_files} image(s) in '{tiles_folder}'. Starting feature extraction...") | |
| for idx, filename in enumerate(all_files, start=1): | |
| filepath = os.path.join(tiles_folder, filename) | |
| image = cv2.imread(filepath) | |
| if image is None: | |
| print(f"[{idx}/{total_files}] Warning: Failed to read {filepath}") | |
| continue | |
| # Extract combined features (with forced 16×16 for HOG) | |
| features = get_combined_features(image) | |
| tile_features.append(features) | |
| tile_paths.append(filepath) | |
| # Log progress | |
| print(f"[{idx}/{total_files}] Processed: {filename}") | |
| # Convert to NumPy array (float32 for KDTree) | |
| tile_features = np.array(tile_features, dtype=np.float32) | |
| # Save features and paths | |
| data = {'features': tile_features, 'paths': tile_paths} | |
| with open(output_file, 'wb') as f: | |
| pickle.dump(data, f) | |
| print(f"Saved features for {len(tile_paths)} tiles to {output_file}") | |
| # --------------------------------------------------------------------- | |
| # Script Entry Point | |
| # --------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| # Adjust as needed: | |
| tiles_folder = "images_dataset" # Folder with tile images | |
| output_file = "tile_features.pkl" # Pickle file for precomputed features | |
| prepare_tile_data(tiles_folder, output_file) | |