--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/toolkit/javascript/d3/lib/science/science.stats.js Thu Apr 10 14:20:23 2014 +0200
@@ -0,0 +1,720 @@
+(function(){science.stats = {};
+// Bandwidth selectors for Gaussian kernels.
+// Based on R's implementations in `stats.bw`.
+science.stats.bandwidth = {
+
+ // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
+ nrd0: function(x) {
+ var hi = Math.sqrt(science.stats.variance(x));
+ if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34)))
+ (lo = hi) || (lo = Math.abs(x[1])) || (lo = 1);
+ return .9 * lo * Math.pow(x.length, -.2);
+ },
+
+ // Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and
+ // Visualization. Wiley.
+ nrd: function(x) {
+ var h = science.stats.iqr(x) / 1.34;
+ return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h)
+ * Math.pow(x.length, -1/5);
+ }
+};
+science.stats.distance = {
+ euclidean: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0,
+ x;
+ while (++i < n) {
+ x = a[i] - b[i];
+ s += x * x;
+ }
+ return Math.sqrt(s);
+ },
+ manhattan: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0;
+ while (++i < n) s += Math.abs(a[i] - b[i]);
+ return s;
+ },
+ minkowski: function(p) {
+ return function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0;
+ while (++i < n) s += Math.pow(Math.abs(a[i] - b[i]), p);
+ return Math.pow(s, 1 / p);
+ };
+ },
+ chebyshev: function(a, b) {
+ var n = a.length,
+ i = -1,
+ max = 0,
+ x;
+ while (++i < n) {
+ x = Math.abs(a[i] - b[i]);
+ if (x > max) max = x;
+ }
+ return max;
+ },
+ hamming: function(a, b) {
+ var n = a.length,
+ i = -1,
+ d = 0;
+ while (++i < n) if (a[i] !== b[i]) d++;
+ return d;
+ },
+ jaccard: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0;
+ while (++i < n) if (a[i] === b[i]) s++;
+ return s / n;
+ },
+ braycurtis: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s0 = 0,
+ s1 = 0,
+ ai,
+ bi;
+ while (++i < n) {
+ ai = a[i];
+ bi = b[i];
+ s0 += Math.abs(ai - bi);
+ s1 += Math.abs(ai + bi);
+ }
+ return s0 / s1;
+ }
+};
+// Based on implementation in http://picomath.org/.
+science.stats.erf = function(x) {
+ var a1 = 0.254829592,
+ a2 = -0.284496736,
+ a3 = 1.421413741,
+ a4 = -1.453152027,
+ a5 = 1.061405429,
+ p = 0.3275911;
+
+ // Save the sign of x
+ var sign = x < 0 ? -1 : 1;
+ if (x < 0) {
+ sign = -1;
+ x = -x;
+ }
+
+ // A&S formula 7.1.26
+ var t = 1 / (1 + p * x);
+ return sign * (
+ 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1)
+ * t * Math.exp(-x * x));
+};
+science.stats.phi = function(x) {
+ return .5 * (1 + science.stats.erf(x / Math.SQRT2));
+};
+// See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
+science.stats.kernel = {
+ uniform: function(u) {
+ if (u <= 1 && u >= -1) return .5;
+ return 0;
+ },
+ triangular: function(u) {
+ if (u <= 1 && u >= -1) return 1 - Math.abs(u);
+ return 0;
+ },
+ epanechnikov: function(u) {
+ if (u <= 1 && u >= -1) return .75 * (1 - u * u);
+ return 0;
+ },
+ quartic: function(u) {
+ if (u <= 1 && u >= -1) {
+ var tmp = 1 - u * u;
+ return (15 / 16) * tmp * tmp;
+ }
+ return 0;
+ },
+ triweight: function(u) {
+ if (u <= 1 && u >= -1) {
+ var tmp = 1 - u * u;
+ return (35 / 32) * tmp * tmp * tmp;
+ }
+ return 0;
+ },
+ gaussian: function(u) {
+ return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u);
+ },
+ cosine: function(u) {
+ if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u);
+ return 0;
+ }
+};
+// http://exploringdata.net/den_trac.htm
+science.stats.kde = function() {
+ var kernel = science.stats.kernel.gaussian,
+ sample = [],
+ bandwidth = science.stats.bandwidth.nrd;
+
+ function kde(points, i) {
+ var bw = bandwidth.call(this, sample);
+ return points.map(function(x) {
+ var i = -1,
+ y = 0,
+ n = sample.length;
+ while (++i < n) {
+ y += kernel((x - sample[i]) / bw);
+ }
+ return [x, y / bw / n];
+ });
+ }
+
+ kde.kernel = function(x) {
+ if (!arguments.length) return kernel;
+ kernel = x;
+ return kde;
+ };
+
+ kde.sample = function(x) {
+ if (!arguments.length) return sample;
+ sample = x;
+ return kde;
+ };
+
+ kde.bandwidth = function(x) {
+ if (!arguments.length) return bandwidth;
+ bandwidth = science.functor(x);
+ return kde;
+ };
+
+ return kde;
+};
+// Based on figue implementation by Jean-Yves Delort.
+// http://code.google.com/p/figue/
+science.stats.kmeans = function() {
+ var distance = science.stats.distance.euclidean,
+ maxIterations = 1000,
+ k = 1;
+
+ function kmeans(vectors) {
+ var n = vectors.length,
+ assignments = [],
+ clusterSizes = [],
+ repeat = 1,
+ iterations = 0,
+ centroids = science_stats_kmeansRandom(k, vectors),
+ newCentroids,
+ i,
+ j,
+ x,
+ d,
+ min,
+ best;
+
+ while (repeat && iterations < maxIterations) {
+ // Assignment step.
+ j = -1; while (++j < k) {
+ clusterSizes[j] = 0;
+ }
+
+ i = -1; while (++i < n) {
+ x = vectors[i];
+ min = Infinity;
+ j = -1; while (++j < k) {
+ d = distance.call(this, centroids[j], x);
+ if (d < min) {
+ min = d;
+ best = j;
+ }
+ }
+ clusterSizes[assignments[i] = best]++;
+ }
+
+ // Update centroids step.
+ newCentroids = [];
+ i = -1; while (++i < n) {
+ x = assignments[i];
+ d = newCentroids[x];
+ if (d == null) newCentroids[x] = vectors[i].slice();
+ else {
+ j = -1; while (++j < d.length) {
+ d[j] += vectors[i][j];
+ }
+ }
+ }
+ j = -1; while (++j < k) {
+ x = newCentroids[j];
+ d = 1 / clusterSizes[j];
+ i = -1; while (++i < x.length) x[i] *= d;
+ }
+
+ // Check convergence.
+ repeat = 0;
+ j = -1; while (++j < k) {
+ if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
+ repeat = 1;
+ break;
+ }
+ }
+ centroids = newCentroids;
+ iterations++;
+ }
+ return {assignments: assignments, centroids: centroids};
+ }
+
+ kmeans.k = function(x) {
+ if (!arguments.length) return k;
+ k = x;
+ return kmeans;
+ };
+
+ kmeans.distance = function(x) {
+ if (!arguments.length) return distance;
+ distance = x;
+ return kmeans;
+ };
+
+ return kmeans;
+};
+
+function science_stats_kmeansCompare(a, b) {
+ if (!a || !b || a.length !== b.length) return false;
+ var n = a.length,
+ i = -1;
+ while (++i < n) if (a[i] !== b[i]) return false;
+ return true;
+}
+
+// Returns an array of k distinct vectors randomly selected from the input
+// array of vectors. Returns null if k > n or if there are less than k distinct
+// objects in vectors.
+function science_stats_kmeansRandom(k, vectors) {
+ var n = vectors.length;
+ if (k > n) return null;
+
+ var selected_vectors = [];
+ var selected_indices = [];
+ var tested_indices = {};
+ var tested = 0;
+ var selected = 0;
+ var i,
+ vector,
+ select;
+
+ while (selected < k) {
+ if (tested === n) return null;
+
+ var random_index = Math.floor(Math.random() * n);
+ if (random_index in tested_indices) continue;
+
+ tested_indices[random_index] = 1;
+ tested++;
+ vector = vectors[random_index];
+ select = true;
+ for (i = 0; i < selected; i++) {
+ if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
+ select = false;
+ break;
+ }
+ }
+ if (select) {
+ selected_vectors[selected] = vector;
+ selected_indices[selected] = random_index;
+ selected++;
+ }
+ }
+ return selected_vectors;
+}
+science.stats.hcluster = function() {
+ var distance = science.stats.distance.euclidean,
+ linkage = "simple"; // simple, complete or average
+
+ function hcluster(vectors) {
+ var n = vectors.length,
+ dMin = [],
+ cSize = [],
+ distMatrix = [],
+ clusters = [],
+ c1,
+ c2,
+ c1Cluster,
+ c2Cluster,
+ p,
+ root,
+ i,
+ j;
+
+ // Initialise distance matrix and vector of closest clusters.
+ i = -1; while (++i < n) {
+ dMin[i] = 0;
+ distMatrix[i] = [];
+ j = -1; while (++j < n) {
+ distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
+ if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j;
+ }
+ }
+
+ // create leaves of the tree
+ i = -1; while (++i < n) {
+ clusters[i] = [];
+ clusters[i][0] = {
+ left: null,
+ right: null,
+ dist: 0,
+ centroid: vectors[i],
+ size: 1,
+ depth: 0
+ };
+ cSize[i] = 1;
+ }
+
+ // Main loop
+ for (p = 0; p < n-1; p++) {
+ // find the closest pair of clusters
+ c1 = 0;
+ for (i = 0; i < n; i++) {
+ if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
+ }
+ c2 = dMin[c1];
+
+ // create node to store cluster info
+ c1Cluster = clusters[c1][0];
+ c2Cluster = clusters[c2][0];
+
+ newCluster = {
+ left: c1Cluster,
+ right: c2Cluster,
+ dist: distMatrix[c1][c2],
+ centroid: calculateCentroid(c1Cluster.size, c1Cluster.centroid,
+ c2Cluster.size, c2Cluster.centroid),
+ size: c1Cluster.size + c2Cluster.size,
+ depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)
+ };
+ clusters[c1].splice(0, 0, newCluster);
+ cSize[c1] += cSize[c2];
+
+ // overwrite row c1 with respect to the linkage type
+ for (j = 0; j < n; j++) {
+ switch (linkage) {
+ case "single":
+ if (distMatrix[c1][j] > distMatrix[c2][j])
+ distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
+ break;
+ case "complete":
+ if (distMatrix[c1][j] < distMatrix[c2][j])
+ distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j];
+ break;
+ case "average":
+ distMatrix[j][c1] = distMatrix[c1][j] = (cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]);
+ break;
+ }
+ }
+ distMatrix[c1][c1] = Infinity;
+
+ // infinity out old row c2 and column c2
+ for (i = 0; i < n; i++)
+ distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
+
+ // update dmin and replace ones that previous pointed to c2 to point to c1
+ for (j = 0; j < n; j++) {
+ if (dMin[j] == c2) dMin[j] = c1;
+ if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j;
+ }
+
+ // keep track of the last added cluster
+ root = newCluster;
+ }
+
+ return root;
+ }
+
+ hcluster.distance = function(x) {
+ if (!arguments.length) return distance;
+ distance = x;
+ return hcluster;
+ };
+
+ return hcluster;
+};
+
+function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
+ var newCentroid = [],
+ newSize = c1Size + c2Size,
+ n = c1Centroid.length,
+ i = -1;
+ while (++i < n) {
+ newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
+ }
+ return newCentroid;
+}
+science.stats.iqr = function(x) {
+ var quartiles = science.stats.quantiles(x, [.25, .75]);
+ return quartiles[1] - quartiles[0];
+};
+// Based on org.apache.commons.math.analysis.interpolation.LoessInterpolator
+// from http://commons.apache.org/math/
+science.stats.loess = function() {
+ var bandwidth = .3,
+ robustnessIters = 2,
+ accuracy = 1e-12;
+
+ function smooth(xval, yval, weights) {
+ var n = xval.length,
+ i;
+
+ if (n !== yval.length) throw {error: "Mismatched array lengths"};
+ if (n == 0) throw {error: "At least one point required."};
+
+ if (arguments.length < 3) {
+ weights = [];
+ i = -1; while (++i < n) weights[i] = 1;
+ }
+
+ science_stats_loessFiniteReal(xval);
+ science_stats_loessFiniteReal(yval);
+ science_stats_loessFiniteReal(weights);
+ science_stats_loessStrictlyIncreasing(xval);
+
+ if (n == 1) return [yval[0]];
+ if (n == 2) return [yval[0], yval[1]];
+
+ var bandwidthInPoints = Math.floor(bandwidth * n);
+
+ if (bandwidthInPoints < 2) throw {error: "Bandwidth too small."};
+
+ var res = [],
+ residuals = [],
+ robustnessWeights = [];
+
+ // Do an initial fit and 'robustnessIters' robustness iterations.
+ // This is equivalent to doing 'robustnessIters+1' robustness iterations
+ // starting with all robustness weights set to 1.
+ i = -1; while (++i < n) {
+ res[i] = 0;
+ residuals[i] = 0;
+ robustnessWeights[i] = 1;
+ }
+
+ var iter = -1;
+ while (++iter <= robustnessIters) {
+ var bandwidthInterval = [0, bandwidthInPoints - 1];
+ // At each x, compute a local weighted linear regression
+ var x;
+ i = -1; while (++i < n) {
+ x = xval[i];
+
+ // Find out the interval of source points on which
+ // a regression is to be made.
+ if (i > 0) {
+ science_stats_loessUpdateBandwidthInterval(xval, weights, i, bandwidthInterval);
+ }
+
+ var ileft = bandwidthInterval[0],
+ iright = bandwidthInterval[1];
+
+ // Compute the point of the bandwidth interval that is
+ // farthest from x
+ var edge = (xval[i] - xval[ileft]) > (xval[iright] - xval[i]) ? ileft : iright;
+
+ // Compute a least-squares linear fit weighted by
+ // the product of robustness weights and the tricube
+ // weight function.
+ // See http://en.wikipedia.org/wiki/Linear_regression
+ // (section "Univariate linear case")
+ // and http://en.wikipedia.org/wiki/Weighted_least_squares
+ // (section "Weighted least squares")
+ var sumWeights = 0,
+ sumX = 0,
+ sumXSquared = 0,
+ sumY = 0,
+ sumXY = 0,
+ denom = Math.abs(1 / (xval[edge] - x));
+
+ for (var k = ileft; k <= iright; ++k) {
+ var xk = xval[k],
+ yk = yval[k],
+ dist = k < i ? x - xk : xk - x,
+ w = science_stats_loessTricube(dist * denom) * robustnessWeights[k] * weights[k],
+ xkw = xk * w;
+ sumWeights += w;
+ sumX += xkw;
+ sumXSquared += xk * xkw;
+ sumY += yk * w;
+ sumXY += yk * xkw;
+ }
+
+ var meanX = sumX / sumWeights,
+ meanY = sumY / sumWeights,
+ meanXY = sumXY / sumWeights,
+ meanXSquared = sumXSquared / sumWeights;
+
+ var beta = (Math.sqrt(Math.abs(meanXSquared - meanX * meanX)) < accuracy)
+ ? 0 : ((meanXY - meanX * meanY) / (meanXSquared - meanX * meanX));
+
+ var alpha = meanY - beta * meanX;
+
+ res[i] = beta * x + alpha;
+ residuals[i] = Math.abs(yval[i] - res[i]);
+ }
+
+ // No need to recompute the robustness weights at the last
+ // iteration, they won't be needed anymore
+ if (iter === robustnessIters) {
+ break;
+ }
+
+ // Recompute the robustness weights.
+
+ // Find the median residual.
+ var sortedResiduals = residuals.slice();
+ sortedResiduals.sort();
+ var medianResidual = sortedResiduals[Math.floor(n / 2)];
+
+ if (Math.abs(medianResidual) < accuracy)
+ break;
+
+ var arg,
+ w;
+ i = -1; while (++i < n) {
+ arg = residuals[i] / (6 * medianResidual);
+ robustnessWeights[i] = (arg >= 1) ? 0 : ((w = 1 - arg * arg) * w);
+ }
+ }
+
+ return res;
+ }
+
+ smooth.bandwidth = function(x) {
+ if (!arguments.length) return x;
+ bandwidth = x;
+ return smooth;
+ };
+
+ smooth.robustnessIterations = function(x) {
+ if (!arguments.length) return x;
+ robustnessIters = x;
+ return smooth;
+ };
+
+ smooth.accuracy = function(x) {
+ if (!arguments.length) return x;
+ accuracy = x;
+ return smooth;
+ };
+
+ return smooth;
+};
+
+function science_stats_loessFiniteReal(values) {
+ var n = values.length,
+ i = -1;
+
+ while (++i < n) if (!isFinite(values[i])) return false;
+
+ return true;
+}
+
+function science_stats_loessStrictlyIncreasing(xval) {
+ var n = xval.length,
+ i = 0;
+
+ while (++i < n) if (xval[i - 1] >= xval[i]) return false;
+
+ return true;
+}
+
+// Compute the tricube weight function.
+// http://en.wikipedia.org/wiki/Local_regression#Weight_function
+function science_stats_loessTricube(x) {
+ return (x = 1 - x * x * x) * x * x;
+}
+
+// Given an index interval into xval that embraces a certain number of
+// points closest to xval[i-1], update the interval so that it embraces
+// the same number of points closest to xval[i], ignoring zero weights.
+function science_stats_loessUpdateBandwidthInterval(
+ xval, weights, i, bandwidthInterval) {
+
+ var left = bandwidthInterval[0],
+ right = bandwidthInterval[1];
+
+ // The right edge should be adjusted if the next point to the right
+ // is closer to xval[i] than the leftmost point of the current interval
+ var nextRight = science_stats_loessNextNonzero(weights, right);
+ if ((nextRight < xval.length) && (xval[nextRight] - xval[i]) < (xval[i] - xval[left])) {
+ var nextLeft = science_stats_loessNextNonzero(weights, left);
+ bandwidthInterval[0] = nextLeft;
+ bandwidthInterval[1] = nextRight;
+ }
+}
+
+function science_stats_loessNextNonzero(weights, i) {
+ var j = i + 1;
+ while (j < weights.length && weights[j] === 0) j++;
+ return j;
+}
+// Welford's algorithm.
+science.stats.mean = function(x) {
+ var n = x.length;
+ if (n === 0) return NaN;
+ var m = 0,
+ i = -1;
+ while (++i < n) m += (x[i] - m) / (i + 1);
+ return m;
+};
+science.stats.median = function(x) {
+ return science.stats.quantiles(x, [.5])[0];
+};
+science.stats.mode = function(x) {
+ x = x.slice().sort(science.ascending);
+ var mode,
+ n = x.length,
+ i = -1,
+ l = i,
+ last = null,
+ max = 0,
+ tmp,
+ v;
+ while (++i < n) {
+ if ((v = x[i]) !== last) {
+ if ((tmp = i - l) > max) {
+ max = tmp;
+ mode = last;
+ }
+ last = v;
+ l = i;
+ }
+ }
+ return mode;
+};
+// Uses R's quantile algorithm type=7.
+science.stats.quantiles = function(d, quantiles) {
+ d = d.slice().sort(science.ascending);
+ var n_1 = d.length - 1;
+ return quantiles.map(function(q) {
+ if (q === 0) return d[0];
+ else if (q === 1) return d[n_1];
+
+ var index = 1 + q * n_1,
+ lo = Math.floor(index),
+ h = index - lo,
+ a = d[lo - 1];
+
+ return h === 0 ? a : a + h * (d[lo] - a);
+ });
+};
+// Unbiased estimate of a sample's variance.
+// Also known as the sample variance, where the denominator is n - 1.
+science.stats.variance = function(x) {
+ var n = x.length;
+ if (n < 1) return NaN;
+ if (n === 1) return 0;
+ var mean = science.stats.mean(x),
+ i = -1,
+ s = 0;
+ while (++i < n) {
+ var v = x[i] - mean;
+ s += v * v;
+ }
+ return s / (n - 1);
+};
+})()
\ No newline at end of file