@article {15613,
title = {An efficient k-means clustering algorithm: analysis and implementation},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
volume = {24},
year = {2002},
month = {2002/07//},
pages = {881 - 892},
abstract = {In k-means clustering, we are given a set of n data points in d-dimensional space R^{d} and an integer k and the problem is to determine a set of k points in Rd, called centers, so as to minimize the mean squared distance from each data point to its nearest center. A popular heuristic for k-means clustering is Lloyd{\textquoteright}s (1982) algorithm. We present a simple and efficient implementation of Lloyd{\textquoteright}s k-means clustering algorithm, which we call the filtering algorithm. This algorithm is easy to implement, requiring a kd-tree as the only major data structure. We establish the practical efficiency of the filtering algorithm in two ways. First, we present a data-sensitive analysis of the algorithm{\textquoteright}s running time, which shows that the algorithm runs faster as the separation between clusters increases. Second, we present a number of empirical studies both on synthetically generated data and on real data sets from applications in color quantization, data compression, and image segmentation},
keywords = {algorithm;color, algorithm;image, algorithm;kd-tree;mean, analysis;filtering, clustering, clustering;, compression;data, distance;covariance, Lloyd, matrices;filtering, quantization;data, segmentation;k-means, squared, structure;data-sensitive, theory;pattern},
isbn = {0162-8828},
doi = {10.1109/TPAMI.2002.1017616},
author = {Kanungo,T. and Mount, Dave and Netanyahu,N. S and Piatko,C. D and Silverman,R. and Wu,A. Y}
}