001/*- 002 ******************************************************************************* 003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd. 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Peter Chang - initial API and implementation and/or initial documentation 011 *******************************************************************************/ 012 013package org.eclipse.january.dataset; 014 015import java.lang.ref.SoftReference; 016import java.util.ArrayList; 017import java.util.Collections; 018import java.util.HashMap; 019import java.util.List; 020import java.util.Map; 021import java.util.TreeMap; 022 023import org.apache.commons.math3.complex.Complex; 024import org.apache.commons.math3.stat.descriptive.moment.Kurtosis; 025import org.apache.commons.math3.stat.descriptive.moment.Skewness; 026import org.eclipse.january.metadata.Dirtiable; 027import org.eclipse.january.metadata.MetadataType; 028 029 030/** 031 * Statistics class 032 * 033 * TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics) 034 * 035 */ 036public class Stats { 037 038 private static class ReferencedDataset extends SoftReference<Dataset> { 039 public ReferencedDataset(Dataset d) { 040 super(d); 041 } 042 } 043 044 private static class QStatisticsImpl<T> implements MetadataType { 045 private static final long serialVersionUID = -3296671666463190388L; 046 final static Double Q1 = 0.25; 047 final static Double Q2 = 0.5; 048 final static Double Q3 = 0.75; 049 Map<Double, T> qmap = new HashMap<Double, T>(); 050 transient Map<Integer, Map<Double, ReferencedDataset>> aqmap = new HashMap<Integer, Map<Double, ReferencedDataset>>(); 051 transient ReferencedDataset s; // store 0th element 052 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 053 054 @Dirtiable 055 private boolean isDirty = true; 056 057 @Override 058 public QStatisticsImpl<T> clone() { 059 return new QStatisticsImpl<T>(this); 060 } 061 062 public QStatisticsImpl() { 063 } 064 065 private QStatisticsImpl(QStatisticsImpl<T> qstats) { 066 if (qstats.s != null && qstats.s.get() != null) { 067 s = new ReferencedDataset(qstats.s.get().getView(false)); 068 } 069 qmap.putAll(qstats.qmap); 070 for (Integer i : qstats.aqmap.keySet()) { 071 aqmap.put(i, new HashMap<>(qstats.aqmap.get(i))); 072 } 073 smap.putAll(qstats.smap); 074 isDirty = qstats.isDirty; 075 } 076 077 public void setQuantile(double q, T v) { 078 qmap.put(q, v); 079 } 080 081 public T getQuantile(double q) { 082 return qmap.get(q); 083 } 084 085 private Map<Double, ReferencedDataset> getMap(int axis) { 086 Map<Double, ReferencedDataset> qm = aqmap.get(axis); 087 if (qm == null) { 088 qm = new HashMap<>(); 089 aqmap.put(axis, qm); 090 } 091 return qm; 092 } 093 094 public void setQuantile(int axis, double q, Dataset v) { 095 Map<Double, ReferencedDataset> qm = getMap(axis); 096 qm.put(q, new ReferencedDataset(v)); 097 } 098 099 public Dataset getQuantile(int axis, double q) { 100 Map<Double, ReferencedDataset> qm = getMap(axis); 101 ReferencedDataset rd = qm.get(q); 102 return rd == null ? null : rd.get(); 103 } 104 105 Dataset getSortedDataset(int axis) { 106 return smap.containsKey(axis) ? smap.get(axis).get() : null; 107 } 108 109 void setSortedDataset(int axis, Dataset v) { 110 smap.put(axis, new ReferencedDataset(v)); 111 } 112 } 113 114 // calculates statistics and returns sorted dataset (0th element if compound) 115 private static QStatisticsImpl<?> calcQuartileStats(final Dataset a) { 116 Dataset s = null; 117 final int is = a.getElementsPerItem(); 118 119 if (is == 1) { 120 s = DatasetUtils.sort(a); 121 122 QStatisticsImpl<Double> qstats = new QStatisticsImpl<Double>(); 123 124 qstats.setQuantile(QStatisticsImpl.Q1, pQuantile(s, QStatisticsImpl.Q1)); 125 qstats.setQuantile(QStatisticsImpl.Q2, pQuantile(s, QStatisticsImpl.Q2)); 126 qstats.setQuantile(QStatisticsImpl.Q3, pQuantile(s, QStatisticsImpl.Q3)); 127 qstats.s = new ReferencedDataset(s); 128 return qstats; 129 } 130 131 QStatisticsImpl<double[]> qstats = new QStatisticsImpl<double[]>(); 132 133 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 134 double[] q1 = new double[is]; 135 double[] q2 = new double[is]; 136 double[] q3 = new double[is]; 137 qstats.setQuantile(QStatisticsImpl.Q1, q1); 138 qstats.setQuantile(QStatisticsImpl.Q2, q2); 139 qstats.setQuantile(QStatisticsImpl.Q3, q3); 140 for (int j = 0; j < is; j++) { 141 ((CompoundDataset) a).copyElements(w, j); 142 w.sort(null); 143 144 q1[j] = pQuantile(w, QStatisticsImpl.Q1); 145 q2[j] = pQuantile(w, QStatisticsImpl.Q2); 146 q3[j] = pQuantile(w, QStatisticsImpl.Q3); 147 if (j == 0) 148 s = w.clone(); 149 } 150 qstats.s = new ReferencedDataset(s); 151 152 return qstats; 153 } 154 155 static private QStatisticsImpl<?> getQStatistics(final Dataset a) { 156 QStatisticsImpl<?> m = a.getFirstMetadata(QStatisticsImpl.class); 157 if (m == null || m.isDirty) { 158 m = calcQuartileStats(a); 159 a.setMetadata(m); 160 } 161 return m; 162 } 163 164 static private QStatisticsImpl<?> getQStatistics(final Dataset a, int axis) { 165 axis = a.checkAxis(axis); 166 final int is = a.getElementsPerItem(); 167 QStatisticsImpl<?> qstats = a.getFirstMetadata(QStatisticsImpl.class); 168 169 if (qstats == null || qstats.isDirty) { 170 if (is == 1) { 171 qstats = new QStatisticsImpl<Double>(); 172 } else { 173 qstats = new QStatisticsImpl<double[]>(); 174 } 175 a.setMetadata(qstats); 176 } 177 178 if (qstats.getQuantile(axis, QStatisticsImpl.Q2) == null) { 179 if (is == 1) { 180 Dataset s = DatasetUtils.sort(a, axis); 181 182 qstats.setQuantile(axis, QStatisticsImpl.Q1, pQuantile(s, axis, QStatisticsImpl.Q1)); 183 qstats.setQuantile(axis, QStatisticsImpl.Q2, pQuantile(s, axis, QStatisticsImpl.Q2)); 184 qstats.setQuantile(axis, QStatisticsImpl.Q3, pQuantile(s, axis, QStatisticsImpl.Q3)); 185 qstats.setSortedDataset(axis, s); 186 } else { 187 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 188 CompoundDoubleDataset q1 = null, q2 = null, q3 = null; 189 for (int j = 0; j < is; j++) { 190 ((CompoundDataset) a).copyElements(w, j); 191 w.sort(axis); 192 193 final Dataset c = pQuantile(w, axis, QStatisticsImpl.Q1); 194 if (j == 0) { 195 q1 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 196 q2 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 197 q3 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 198 } 199 q1.setElements(c, j); 200 201 q2.setElements(pQuantile(w, axis, QStatisticsImpl.Q2), j); 202 203 q3.setElements(pQuantile(w, axis, QStatisticsImpl.Q3), j); 204 } 205 qstats.setQuantile(axis, QStatisticsImpl.Q1, q1); 206 qstats.setQuantile(axis, QStatisticsImpl.Q2, q2); 207 qstats.setQuantile(axis, QStatisticsImpl.Q3, q3); 208 } 209 } 210 211 return qstats; 212 } 213 214 // process a sorted dataset 215 private static double pQuantile(final Dataset s, final double q) { 216 double f = (s.getSize() - 1) * q; // fraction of sample number 217 if (f < 0) 218 return Double.NaN; 219 int qpt = (int) Math.floor(f); // quantile point 220 f -= qpt; 221 222 double quantile = s.getElementDoubleAbs(qpt); 223 if (f > 0) { 224 quantile = (1-f)*quantile + f*s.getElementDoubleAbs(qpt+1); 225 } 226 return quantile; 227 } 228 229 // process a sorted dataset and returns a double or compound double dataset 230 private static Dataset pQuantile(final Dataset s, final int axis, final double q) { 231 final int rank = s.getRank(); 232 final int is = s.getElementsPerItem(); 233 234 int[] oshape = s.getShape(); 235 236 double f = (oshape[axis] - 1) * q; // fraction of sample number 237 int qpt = (int) Math.floor(f); // quantile point 238 f -= qpt; 239 240 oshape[axis] = 1; 241 int[] qshape = ShapeUtils.squeezeShape(oshape, false); 242 Dataset qds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 243 244 IndexIterator qiter = qds.getIterator(true); 245 int[] qpos = qiter.getPos(); 246 int[] spos = oshape; 247 248 while (qiter.hasNext()) { 249 int i = 0; 250 for (; i < axis; i++) { 251 spos[i] = qpos[i]; 252 } 253 spos[i++] = qpt; 254 for (; i < rank; i++) { 255 spos[i] = qpos[i-1]; 256 } 257 258 Object obj = s.getObject(spos); 259 qds.set(obj, qpos); 260 } 261 262 if (f > 0) { 263 qiter = qds.getIterator(true); 264 qpos = qiter.getPos(); 265 qpt++; 266 Dataset rds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 267 268 while (qiter.hasNext()) { 269 int i = 0; 270 for (; i < axis; i++) { 271 spos[i] = qpos[i]; 272 } 273 spos[i++] = qpt; 274 for (; i < rank; i++) { 275 spos[i] = qpos[i-1]; 276 } 277 278 Object obj = s.getObject(spos); 279 rds.set(obj, qpos); 280 } 281 rds.imultiply(f); 282 qds.imultiply(1-f); 283 qds.iadd(rds); 284 } 285 286 return qds; 287 } 288 289 /** 290 * Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF) 291 * @param a 292 * @param q 293 * @return point at which CDF has value q 294 */ 295 @SuppressWarnings("unchecked") 296 public static double quantile(final Dataset a, final double q) { 297 if (q < 0 || q > 1) { 298 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 299 } 300 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 301 Double qv = qs.getQuantile(q); 302 if (qv == null) { 303 qv = pQuantile(qs.s.get(), q); 304 qs.setQuantile(q, qv); 305 } 306 return qv; 307 } 308 309 /** 310 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 311 * @param a 312 * @param values 313 * @return points at which CDF has given values 314 */ 315 @SuppressWarnings("unchecked") 316 public static double[] quantile(final Dataset a, final double... values) { 317 final double[] points = new double[values.length]; 318 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 319 for (int i = 0; i < points.length; i++) { 320 final double q = values[i]; 321 if (q < 0 || q > 1) { 322 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 323 } 324 Double qv = qs.getQuantile(q); 325 if (qv == null) { 326 qv = pQuantile(qs.s.get(), q); 327 qs.setQuantile(q, qv); 328 } 329 points[i] = qv; 330 } 331 332 return points; 333 } 334 335 /** 336 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 337 * @param a 338 * @param axis 339 * @param values 340 * @return points at which CDF has given values 341 */ 342 @SuppressWarnings({ "unchecked" }) 343 public static Dataset[] quantile(final Dataset a, final int axis, final double... values) { 344 final Dataset[] points = new Dataset[values.length]; 345 final int is = a.getElementsPerItem(); 346 347 if (is == 1) { 348 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a, axis); 349 for (int i = 0; i < points.length; i++) { 350 final double q = values[i]; 351 if (q < 0 || q > 1) { 352 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 353 } 354 Dataset qv = qs.getQuantile(axis, q); 355 if (qv == null) { 356 qv = pQuantile(qs.getSortedDataset(axis), axis, q); 357 qs.setQuantile(axis, q, qv); 358 } 359 points[i] = qv; 360 } 361 } else { 362 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 363 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 364 for (int j = 0; j < is; j++) { 365 boolean copied = false; 366 367 for (int i = 0; i < points.length; i++) { 368 final double q = values[i]; 369 if (q < 0 || q > 1) { 370 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 371 } 372 Dataset qv = qs.getQuantile(axis, q); 373 if (qv == null) { 374 if (!copied) { 375 copied = true; 376 ((CompoundDataset) a).copyElements(w, j); 377 w.sort(axis); 378 } 379 qv = pQuantile(w, axis, q); 380 qs.setQuantile(axis, q, qv); 381 if (j == 0) { 382 points[i] = DatasetFactory.zeros(is, qv.getClass(), qv.getShapeRef()); 383 } 384 ((CompoundDoubleDataset) points[i]).setElements(qv, j); 385 } 386 } 387 } 388 } 389 390 return points; 391 } 392 393 /** 394 * @param a dataset 395 * @param axis 396 * @return median 397 */ 398 public static Dataset median(final Dataset a, final int axis) { 399 return getQStatistics(a, axis).getQuantile(axis, QStatisticsImpl.Q2); 400 } 401 402 /** 403 * @param a dataset 404 * @return median 405 */ 406 public static Object median(final Dataset a) { 407 return getQStatistics(a).getQuantile(QStatisticsImpl.Q2); 408 } 409 410 /** 411 * Interquartile range: Q3 - Q1 412 * @param a 413 * @return range 414 */ 415 @SuppressWarnings("unchecked") 416 public static Object iqr(final Dataset a) { 417 final int is = a.getElementsPerItem(); 418 if (is == 1) { 419 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 420 return qs.getQuantile(QStatisticsImpl.Q3) - qs.getQuantile(QStatisticsImpl.Q1); 421 } 422 423 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 424 double[] q1 = (double[]) qs.getQuantile(QStatisticsImpl.Q1); 425 double[] q3 = ((double[]) qs.getQuantile(QStatisticsImpl.Q3)).clone(); 426 for (int j = 0; j < is; j++) { 427 q3[j] -= q1[j]; 428 } 429 return q3; 430 } 431 432 /** 433 * Interquartile range: Q3 - Q1 434 * @param a 435 * @param axis 436 * @return range 437 */ 438 public static Dataset iqr(final Dataset a, final int axis) { 439 QStatisticsImpl<?> qs = getQStatistics(a, axis); 440 Dataset q3 = qs.getQuantile(axis, QStatisticsImpl.Q3); 441 442 return Maths.subtract(q3, qs.getQuantile(axis, QStatisticsImpl.Q1)); 443 } 444 445 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, boolean[] ignoreInvalids) { 446 boolean ignoreNaNs = false; 447 boolean ignoreInfs = false; 448 if (a.hasFloatingPointElements()) { 449 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 450 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 451 } 452 453 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 454 if (stats == null || stats.isDirty) { 455 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs); 456 a.setMetadata(stats); 457 } 458 459 return stats; 460 } 461 462 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, final boolean[] ignoreInvalids, int axis) { 463 boolean ignoreNaNs = false; 464 boolean ignoreInfs = false; 465 if (a.hasFloatingPointElements()) { 466 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 467 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 468 } 469 470 axis = a.checkAxis(axis); 471 472 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 473 if (stats == null || stats.getSkewness(axis) == null || stats.isDirty) { 474 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs, axis); 475 a.setMetadata(stats); 476 } 477 478 return stats; 479 } 480 481 private static class HigherStatisticsImpl<T> implements MetadataType { 482 private static final long serialVersionUID = -6587974784104116792L; 483 T skewness; 484 T kurtosis; 485 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 486 transient Map<Integer, ReferencedDataset> kmap = new HashMap<>(); 487 488 @Dirtiable 489 private boolean isDirty = true; 490 491 @Override 492 public HigherStatisticsImpl<T> clone() { 493 return new HigherStatisticsImpl<>(this); 494 } 495 496 public HigherStatisticsImpl() { 497 } 498 499 private HigherStatisticsImpl(HigherStatisticsImpl<T> hstats) { 500 skewness = hstats.skewness; 501 kurtosis = hstats.kurtosis; 502 smap.putAll(hstats.smap); 503 kmap.putAll(hstats.kmap); 504 isDirty = hstats.isDirty; 505 } 506 507// public void setSkewness(T skewness) { 508// this.skewness = skewness; 509// } 510// 511// public void setKurtosis(T kurtosis) { 512// this.kurtosis = kurtosis; 513// } 514// 515// public T getSkewness() { 516// return skewness; 517// } 518// 519// public T getKurtosis() { 520// return kurtosis; 521// } 522 523 public Dataset getSkewness(int axis) { 524 ReferencedDataset rd = smap.get(axis); 525 return rd == null ? null : rd.get(); 526 } 527 528 public Dataset getKurtosis(int axis) { 529 ReferencedDataset rd = kmap.get(axis); 530 return rd == null ? null : rd.get(); 531 } 532 533 public void setSkewness(int axis, Dataset s) { 534 smap.put(axis, new ReferencedDataset(s)); 535 } 536 537 public void setKurtosis(int axis, Dataset k) { 538 kmap.put(axis, new ReferencedDataset(k)); 539 } 540 } 541 542 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs) { 543 final int is = a.getElementsPerItem(); 544 final IndexIterator iter = a.getIterator(); 545 546 if (is == 1) { 547 Skewness s = new Skewness(); 548 Kurtosis k = new Kurtosis(); 549 if (ignoreNaNs) { 550 while (iter.hasNext()) { 551 final double x = a.getElementDoubleAbs(iter.index); 552 if (Double.isNaN(x)) 553 continue; 554 s.increment(x); 555 k.increment(x); 556 } 557 } else { 558 while (iter.hasNext()) { 559 final double x = a.getElementDoubleAbs(iter.index); 560 s.increment(x); 561 k.increment(x); 562 } 563 } 564 565 HigherStatisticsImpl<Double> stats = new HigherStatisticsImpl<Double>(); 566 stats.skewness = s.getResult(); 567 stats.kurtosis = k.getResult(); 568 return stats; 569 } 570 final Skewness[] s = new Skewness[is]; 571 final Kurtosis[] k = new Kurtosis[is]; 572 573 for (int j = 0; j < is; j++) { 574 s[j] = new Skewness(); 575 k[j] = new Kurtosis(); 576 } 577 if (ignoreNaNs) { 578 while (iter.hasNext()) { 579 boolean skip = false; 580 for (int j = 0; j < is; j++) { 581 if (Double.isNaN(a.getElementDoubleAbs(iter.index + j))) { 582 skip = true; 583 break; 584 } 585 } 586 if (!skip) 587 for (int j = 0; j < is; j++) { 588 final double val = a.getElementDoubleAbs(iter.index + j); 589 s[j].increment(val); 590 k[j].increment(val); 591 } 592 } 593 } else { 594 while (iter.hasNext()) { 595 for (int j = 0; j < is; j++) { 596 final double val = a.getElementDoubleAbs(iter.index + j); 597 s[j].increment(val); 598 k[j].increment(val); 599 } 600 } 601 } 602 final double[] ts = new double[is]; 603 final double[] tk = new double[is]; 604 for (int j = 0; j < is; j++) { 605 ts[j] = s[j].getResult(); 606 tk[j] = k[j].getResult(); 607 } 608 609 HigherStatisticsImpl<double[]> stats = new HigherStatisticsImpl<double[]>(); 610 stats.skewness = ts; 611 stats.kurtosis = tk; 612 return stats; 613 } 614 615 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { 616 final int rank = a.getRank(); 617 final int is = a.getElementsPerItem(); 618 final int[] oshape = a.getShape(); 619 final int alen = oshape[axis]; 620 oshape[axis] = 1; 621 622 final int[] nshape = ShapeUtils.squeezeShape(oshape, false); 623 final Dataset sk; 624 final Dataset ku; 625 HigherStatisticsImpl<?> stats = null; 626 627 if (is == 1) { 628 if (stats == null) { 629 stats = new HigherStatisticsImpl<Double>(); 630 a.setMetadata(stats); 631 } 632 sk = DatasetFactory.zeros(DoubleDataset.class, nshape); 633 ku = DatasetFactory.zeros(DoubleDataset.class, nshape); 634 final IndexIterator qiter = sk.getIterator(true); 635 final int[] qpos = qiter.getPos(); 636 final int[] spos = oshape; 637 638 while (qiter.hasNext()) { 639 int i = 0; 640 for (; i < axis; i++) { 641 spos[i] = qpos[i]; 642 } 643 spos[i++] = 0; 644 for (; i < rank; i++) { 645 spos[i] = qpos[i - 1]; 646 } 647 648 Skewness s = new Skewness(); 649 Kurtosis k = new Kurtosis(); 650 if (ignoreNaNs) { 651 for (int j = 0; j < alen; j++) { 652 spos[axis] = j; 653 final double val = a.getDouble(spos); 654 if (Double.isNaN(val)) 655 continue; 656 657 s.increment(val); 658 k.increment(val); 659 } 660 } else { 661 for (int j = 0; j < alen; j++) { 662 spos[axis] = j; 663 final double val = a.getDouble(spos); 664 s.increment(val); 665 k.increment(val); 666 } 667 } 668 sk.set(s.getResult(), qpos); 669 ku.set(k.getResult(), qpos); 670 } 671 } else { 672 if (stats == null) { 673 stats = new HigherStatisticsImpl<double[]>(); 674 a.setMetadata(stats); 675 } 676 sk = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 677 ku = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 678 final IndexIterator qiter = sk.getIterator(true); 679 final int[] qpos = qiter.getPos(); 680 final int[] spos = oshape; 681 final Skewness[] s = new Skewness[is]; 682 final Kurtosis[] k = new Kurtosis[is]; 683 final double[] ts = new double[is]; 684 final double[] tk = new double[is]; 685 686 for (int j = 0; j < is; j++) { 687 s[j] = new Skewness(); 688 k[j] = new Kurtosis(); 689 } 690 while (qiter.hasNext()) { 691 int i = 0; 692 for (; i < axis; i++) { 693 spos[i] = qpos[i]; 694 } 695 spos[i++] = 0; 696 for (; i < rank; i++) { 697 spos[i] = qpos[i-1]; 698 } 699 700 for (int j = 0; j < is; j++) { 701 s[j].clear(); 702 k[j].clear(); 703 } 704 int index = a.get1DIndex(spos); 705 if (ignoreNaNs) { 706 boolean skip = false; 707 for (int j = 0; j < is; j++) { 708 if (Double.isNaN(a.getElementDoubleAbs(index + j))) { 709 skip = true; 710 break; 711 } 712 } 713 if (!skip) 714 for (int j = 0; j < is; j++) { 715 final double val = a.getElementDoubleAbs(index + j); 716 s[j].increment(val); 717 k[j].increment(val); 718 } 719 } else { 720 for (int j = 0; j < is; j++) { 721 final double val = a.getElementDoubleAbs(index + j); 722 s[j].increment(val); 723 k[j].increment(val); 724 } 725 } 726 for (int j = 0; j < is; j++) { 727 ts[j] = s[j].getResult(); 728 tk[j] = k[j].getResult(); 729 } 730 sk.set(ts, qpos); 731 ku.set(tk, qpos); 732 } 733 } 734 735 stats.setSkewness(axis, sk); 736 stats.setKurtosis(axis, ku); 737 return stats; 738 } 739 740 /** 741 * @param a dataset 742 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 743 * @return skewness 744 * @since 2.0 745 */ 746 public static Object skewness(final Dataset a, final boolean... ignoreInvalids) { 747 return getHigherStatistic(a, ignoreInvalids).skewness; 748 } 749 750 /** 751 * @param a dataset 752 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 753 * @return kurtosis 754 * @since 2.0 755 */ 756 public static Object kurtosis(final Dataset a, final boolean... ignoreInvalids) { 757 return getHigherStatistic(a, ignoreInvalids).kurtosis; 758 } 759 760 /** 761 * @param a dataset 762 * @param axis 763 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 764 * @return skewness along axis in dataset 765 * @since 2.0 766 */ 767 public static Dataset skewness(final Dataset a, final int axis, final boolean... ignoreInvalids) { 768 return getHigherStatistic(a, ignoreInvalids, axis).getSkewness(axis); 769 } 770 771 /** 772 * @param a dataset 773 * @param axis 774 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 775 * @return kurtosis along axis in dataset 776 * @since 2.0 777 */ 778 public static Dataset kurtosis(final Dataset a, final int axis, final boolean... ignoreInvalids) { 779 return getHigherStatistic(a, ignoreInvalids, axis).getKurtosis(axis); 780 } 781 782 /** 783 * @param a dataset 784 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 785 * @return sum of dataset 786 * @since 2.0 787 */ 788 public static Object sum(final Dataset a, final boolean... ignoreInvalids) { 789 return a.sum(ignoreInvalids); 790 } 791 792 /** 793 * @param a dataset 794 * @param dtype 795 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 796 * @return typed sum of dataset 797 * @since 2.0 798 */ 799 public static Object typedSum(final Dataset a, int dtype, final boolean... ignoreInvalids) { 800 if (a.isComplex()) { 801 Complex m = (Complex) a.sum(ignoreInvalids); 802 return m; 803 } else if (a instanceof CompoundDataset) { 804 return DTypeUtils.fromDoublesToBiggestPrimitives((double[]) a.sum(ignoreInvalids), dtype); 805 } 806 return DTypeUtils.fromDoubleToBiggestNumber(DTypeUtils.toReal(a.sum(ignoreInvalids)), dtype); 807 } 808 809 /** 810 * @param a dataset 811 * @param dtype 812 * @param axis 813 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 814 * @return typed sum of items along axis in dataset 815 * @since 2.0 816 */ 817 public static Dataset typedSum(final Dataset a, int dtype, int axis, final boolean... ignoreInvalids) { 818 return DatasetUtils.cast(a.sum(axis, ignoreInvalids), dtype); 819 } 820 821 /** 822 * @param a dataset 823 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 824 * @return product of all items in dataset 825 * @since 2.0 826 */ 827 public static Object product(final Dataset a, final boolean... ignoreInvalids) { 828 return typedProduct(a, a.getDType(), ignoreInvalids); 829 } 830 831 /** 832 * @param a dataset 833 * @param axis 834 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 835 * @return product of items along axis in dataset 836 * @since 2.0 837 */ 838 public static Dataset product(final Dataset a, final int axis, final boolean... ignoreInvalids) { 839 return typedProduct(a, a.getDType(), axis, ignoreInvalids); 840 } 841 842 /** 843 * @param a dataset 844 * @param dtype 845 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 846 * @return typed product of all items in dataset 847 * @since 2.0 848 */ 849 public static Object typedProduct(final Dataset a, final int dtype, final boolean... ignoreInvalids) { 850 final boolean ignoreNaNs; 851 final boolean ignoreInfs; 852 if (a.hasFloatingPointElements()) { 853 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 854 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 855 } else { 856 ignoreNaNs = false; 857 ignoreInfs = false; 858 } 859 860 if (a.isComplex()) { 861 IndexIterator it = a.getIterator(); 862 double rv = 1, iv = 0; 863 864 while (it.hasNext()) { 865 final double r1 = a.getElementDoubleAbs(it.index); 866 final double i1 = a.getElementDoubleAbs(it.index + 1); 867 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 868 continue; 869 } 870 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 871 continue; 872 } 873 final double tv = r1*rv - i1*iv; 874 iv = r1*iv + i1*rv; 875 rv = tv; 876 if (Double.isNaN(rv) && Double.isNaN(iv)) { 877 break; 878 } 879 } 880 881 return new Complex(rv, iv); 882 } 883 884 IndexIterator it = a.getIterator(); 885 final int is; 886 final long[] lresults; 887 final double[] dresults; 888 889 switch (dtype) { 890 case Dataset.BOOL: 891 case Dataset.INT8: case Dataset.INT16: 892 case Dataset.INT32: case Dataset.INT64: 893 long lresult = 1; 894 while (it.hasNext()) { 895 lresult *= a.getElementLongAbs(it.index); 896 } 897 return new Long(lresult); 898 case Dataset.ARRAYINT8: case Dataset.ARRAYINT16: 899 case Dataset.ARRAYINT32: case Dataset.ARRAYINT64: 900 lresult = 1; 901 is = a.getElementsPerItem(); 902 lresults = new long[is]; 903 for (int j = 0; j < is; j++) { 904 lresults[j] = 1; 905 } 906 while (it.hasNext()) { 907 for (int j = 0; j < is; j++) 908 lresults[j] *= a.getElementLongAbs(it.index+j); 909 } 910 return lresults; 911 case Dataset.FLOAT32: case Dataset.FLOAT64: 912 double dresult = 1.; 913 while (it.hasNext()) { 914 final double x = a.getElementDoubleAbs(it.index); 915 if (ignoreNaNs && Double.isNaN(x)) { 916 continue; 917 } 918 if (ignoreInfs && Double.isInfinite(x)) { 919 continue; 920 } 921 dresult *= x; 922 if (Double.isNaN(dresult)) { 923 break; 924 } 925 } 926 return Double.valueOf(dresult); 927 case Dataset.ARRAYFLOAT32: 928 case Dataset.ARRAYFLOAT64: 929 is = a.getElementsPerItem(); 930 double[] vals = new double[is]; 931 dresults = new double[is]; 932 for (int j = 0; j < is; j++) { 933 dresults[j] = 1.; 934 } 935 while (it.hasNext()) { 936 boolean okay = true; 937 for (int j = 0; j < is; j++) { 938 final double val = a.getElementDoubleAbs(it.index + j); 939 if (ignoreNaNs && Double.isNaN(val)) { 940 okay = false; 941 break; 942 } 943 if (ignoreInfs && Double.isInfinite(val)) { 944 okay = false; 945 break; 946 } 947 vals[j] = val; 948 } 949 if (okay) { 950 for (int j = 0; j < is; j++) { 951 double val = vals[j]; 952 dresults[j] *= val; 953 } 954 } 955 } 956 return dresults; 957 } 958 959 return null; 960 } 961 962 /** 963 * @param a dataset 964 * @param dtype 965 * @param axis 966 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 967 * @return typed product of items along axis in dataset 968 * @since 2.0 969 */ 970 public static Dataset typedProduct(final Dataset a, final int dtype, int axis, final boolean... ignoreInvalids) { 971 axis = a.checkAxis(axis); 972 final int[] oshape = a.getShape(); 973 final int is = a.getElementsPerItem(); 974 final int alen = oshape[axis]; 975 oshape[axis] = 1; 976 977 final boolean ignoreNaNs; 978 final boolean ignoreInfs; 979 if (a.hasFloatingPointElements()) { 980 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 981 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 982 } else { 983 ignoreNaNs = false; 984 ignoreInfs = false; 985 } 986 @SuppressWarnings("deprecation") 987 Dataset result = DatasetFactory.zeros(is, oshape, dtype); 988 989 IndexIterator qiter = result.getIterator(true); 990 int[] qpos = qiter.getPos(); 991 int[] spos; 992 993 // TODO add getLongArray(long[], int...) to CompoundDataset 994 while (qiter.hasNext()) { 995 spos = qpos.clone(); 996 997 if (a.isComplex()) { 998 double rv = 1, iv = 0; 999 switch (dtype) { 1000 case Dataset.COMPLEX64: 1001 ComplexFloatDataset af = (ComplexFloatDataset) a; 1002 for (int j = 0; j < alen; j++) { 1003 spos[axis] = j; 1004 final float r1 = af.getReal(spos); 1005 final float i1 = af.getImag(spos); 1006 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1007 continue; 1008 } 1009 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1010 continue; 1011 } 1012 final double tv = r1*rv - i1*iv; 1013 iv = r1*iv + i1*rv; 1014 rv = tv; 1015 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1016 break; 1017 } 1018 } 1019 break; 1020 case Dataset.COMPLEX128: 1021 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1022 for (int j = 0; j < alen; j++) { 1023 spos[axis] = j; 1024 final double r1 = ad.getReal(spos); 1025 final double i1 = ad.getImag(spos); 1026 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1027 continue; 1028 } 1029 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1030 continue; 1031 } 1032 final double tv = r1*rv - i1*iv; 1033 iv = r1*iv + i1*rv; 1034 rv = tv; 1035 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1036 break; 1037 } 1038 } 1039 break; 1040 } 1041 1042 result.set(new Complex(rv, iv), qpos); 1043 } else { 1044 final long[] lresults; 1045 final double[] dresults; 1046 1047 switch (dtype) { 1048 case Dataset.BOOL: 1049 case Dataset.INT8: case Dataset.INT16: 1050 case Dataset.INT32: case Dataset.INT64: 1051 long lresult = 1; 1052 for (int j = 0; j < alen; j++) { 1053 spos[axis] = j; 1054 lresult *= a.getInt(spos); 1055 } 1056 result.set(lresult, qpos); 1057 break; 1058 case Dataset.ARRAYINT8: 1059 lresults = new long[is]; 1060 for (int k = 0; k < is; k++) { 1061 lresults[k] = 1; 1062 } 1063 for (int j = 0; j < alen; j++) { 1064 spos[axis] = j; 1065 final byte[] va = (byte[]) a.getObject(spos); 1066 for (int k = 0; k < is; k++) { 1067 lresults[k] *= va[k]; 1068 } 1069 } 1070 result.set(lresults, qpos); 1071 break; 1072 case Dataset.ARRAYINT16: 1073 lresults = new long[is]; 1074 for (int k = 0; k < is; k++) { 1075 lresults[k] = 1; 1076 } 1077 for (int j = 0; j < alen; j++) { 1078 spos[axis] = j; 1079 final short[] va = (short[]) a.getObject(spos); 1080 for (int k = 0; k < is; k++) { 1081 lresults[k] *= va[k]; 1082 } 1083 } 1084 result.set(lresults, qpos); 1085 break; 1086 case Dataset.ARRAYINT32: 1087 lresults = new long[is]; 1088 for (int k = 0; k < is; k++) { 1089 lresults[k] = 1; 1090 } 1091 for (int j = 0; j < alen; j++) { 1092 spos[axis] = j; 1093 final int[] va = (int[]) a.getObject(spos); 1094 for (int k = 0; k < is; k++) { 1095 lresults[k] *= va[k]; 1096 } 1097 } 1098 result.set(lresults, qpos); 1099 break; 1100 case Dataset.ARRAYINT64: 1101 lresults = new long[is]; 1102 for (int k = 0; k < is; k++) { 1103 lresults[k] = 1; 1104 } 1105 for (int j = 0; j < alen; j++) { 1106 spos[axis] = j; 1107 final long[] va = (long[]) a.getObject(spos); 1108 for (int k = 0; k < is; k++) { 1109 lresults[k] *= va[k]; 1110 } 1111 } 1112 result.set(lresults, qpos); 1113 break; 1114 case Dataset.FLOAT32: case Dataset.FLOAT64: 1115 double dresult = 1.; 1116 for (int j = 0; j < alen; j++) { 1117 spos[axis] = j; 1118 final double x = a.getDouble(spos); 1119 if (ignoreNaNs && Double.isNaN(x)) { 1120 continue; 1121 } 1122 if (ignoreInfs && Double.isInfinite(x)) { 1123 continue; 1124 } 1125 dresult *= x; 1126 if (Double.isNaN(dresult)) { 1127 break; 1128 } 1129 } 1130 result.set(dresult, qpos); 1131 break; 1132 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1133 CompoundDataset da = (CompoundDataset) a; 1134 double[] dvalues = new double[is]; 1135 dresults = new double[is]; 1136 for (int k = 0; k < is; k++) { 1137 dresults[k] = 1.; 1138 } 1139 for (int j = 0; j < alen; j++) { 1140 spos[axis] = j; 1141 da.getDoubleArray(dvalues, spos); 1142 boolean okay = true; 1143 for (int k = 0; k < is; k++) { 1144 final double val = dvalues[k]; 1145 if (ignoreNaNs && Double.isNaN(val)) { 1146 okay = false; 1147 break; 1148 } 1149 if (ignoreInfs && Double.isInfinite(val)) { 1150 okay = false; 1151 break; 1152 } 1153 } 1154 if (okay) { 1155 for (int k = 0; k < is; k++) { 1156 dresults[k] *= dvalues[k]; 1157 } 1158 } 1159 } 1160 result.set(dresults, qpos); 1161 break; 1162 } 1163 } 1164 } 1165 1166 result.setShape(ShapeUtils.squeezeShape(oshape, axis)); 1167 return result; 1168 } 1169 1170 /** 1171 * @param a dataset 1172 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1173 * @return cumulative product of items in flattened dataset 1174 * @since 2.0 1175 */ 1176 public static Dataset cumulativeProduct(final Dataset a, final boolean... ignoreInvalids) { 1177 return cumulativeProduct(a.flatten(), 0, ignoreInvalids); 1178 } 1179 1180 /** 1181 * @param a dataset 1182 * @param axis 1183 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1184 * @return cumulative product of items along axis in dataset 1185 * @since 2.0 1186 */ 1187 public static Dataset cumulativeProduct(final Dataset a, int axis, final boolean... ignoreInvalids) { 1188 axis = a.checkAxis(axis); 1189 int dtype = a.getDType(); 1190 int[] oshape = a.getShape(); 1191 int alen = oshape[axis]; 1192 oshape[axis] = 1; 1193 1194 final boolean ignoreNaNs; 1195 final boolean ignoreInfs; 1196 if (a.hasFloatingPointElements()) { 1197 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1198 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1199 } else { 1200 ignoreNaNs = false; 1201 ignoreInfs = false; 1202 } 1203 Dataset result = DatasetFactory.zeros(a); 1204 PositionIterator pi = result.getPositionIterator(axis); 1205 1206 int[] pos = pi.getPos(); 1207 1208 while (pi.hasNext()) { 1209 1210 if (a.isComplex()) { 1211 double rv = 1, iv = 0; 1212 switch (dtype) { 1213 case Dataset.COMPLEX64: 1214 ComplexFloatDataset af = (ComplexFloatDataset) a; 1215 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1216 for (int j = 0; j < alen; j++) { 1217 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1218 pos[axis] = j; 1219 final float r1 = af.getReal(pos); 1220 final float i1 = af.getImag(pos); 1221 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1222 continue; 1223 } 1224 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1225 continue; 1226 } 1227 final double tv = r1*rv - i1*iv; 1228 iv = r1*iv + i1*rv; 1229 rv = tv; 1230 } 1231 rf.set((float) rv, (float) iv, pos); 1232 } 1233 break; 1234 case Dataset.COMPLEX128: 1235 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1236 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1237 for (int j = 0; j < alen; j++) { 1238 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1239 pos[axis] = j; 1240 final double r1 = ad.getReal(pos); 1241 final double i1 = ad.getImag(pos); 1242 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1243 continue; 1244 } 1245 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1246 continue; 1247 } 1248 final double tv = r1*rv - i1*iv; 1249 iv = r1*iv + i1*rv; 1250 rv = tv; 1251 } 1252 rd.set(rv, iv, pos); 1253 } 1254 break; 1255 } 1256 } else { 1257 final int is; 1258 final long[] lresults; 1259 final double[] dresults; 1260 1261 switch (dtype) { 1262 case Dataset.BOOL: 1263 case Dataset.INT8: case Dataset.INT16: 1264 case Dataset.INT32: case Dataset.INT64: 1265 long lresult = 1; 1266 for (int j = 0; j < alen; j++) { 1267 pos[axis] = j; 1268 lresult *= a.getInt(pos); 1269 result.set(lresult, pos); 1270 } 1271 break; 1272 case Dataset.ARRAYINT8: 1273 is = a.getElementsPerItem(); 1274 lresults = new long[is]; 1275 for (int k = 0; k < is; k++) { 1276 lresults[k] = 1; 1277 } 1278 for (int j = 0; j < alen; j++) { 1279 pos[axis] = j; 1280 final byte[] va = (byte[]) a.getObject(pos); 1281 for (int k = 0; k < is; k++) { 1282 lresults[k] *= va[k]; 1283 } 1284 result.set(lresults, pos); 1285 } 1286 break; 1287 case Dataset.ARRAYINT16: 1288 is = a.getElementsPerItem(); 1289 lresults = new long[is]; 1290 for (int k = 0; k < is; k++) { 1291 lresults[k] = 1; 1292 } 1293 for (int j = 0; j < alen; j++) { 1294 pos[axis] = j; 1295 final short[] va = (short[]) a.getObject(pos); 1296 for (int k = 0; k < is; k++) { 1297 lresults[k] *= va[k]; 1298 } 1299 result.set(lresults, pos); 1300 } 1301 break; 1302 case Dataset.ARRAYINT32: 1303 is = a.getElementsPerItem(); 1304 lresults = new long[is]; 1305 for (int k = 0; k < is; k++) { 1306 lresults[k] = 1; 1307 } 1308 for (int j = 0; j < alen; j++) { 1309 pos[axis] = j; 1310 final int[] va = (int[]) a.getObject(pos); 1311 for (int k = 0; k < is; k++) { 1312 lresults[k] *= va[k]; 1313 } 1314 result.set(lresults, pos); 1315 } 1316 break; 1317 case Dataset.ARRAYINT64: 1318 is = a.getElementsPerItem(); 1319 lresults = new long[is]; 1320 for (int k = 0; k < is; k++) { 1321 lresults[k] = 1; 1322 } 1323 for (int j = 0; j < alen; j++) { 1324 pos[axis] = j; 1325 final long[] va = (long[]) a.getObject(pos); 1326 for (int k = 0; k < is; k++) { 1327 lresults[k] *= va[k]; 1328 } 1329 result.set(lresults, pos); 1330 } 1331 break; 1332 case Dataset.FLOAT32: case Dataset.FLOAT64: 1333 double dresult = 1.; 1334 for (int j = 0; j < alen; j++) { 1335 if (!Double.isNaN(dresult)) { 1336 pos[axis] = j; 1337 final double x = a.getDouble(pos); 1338 if (ignoreNaNs && Double.isNaN(x)) { 1339 continue; 1340 } 1341 if (ignoreInfs && Double.isInfinite(x)) { 1342 continue; 1343 } 1344 dresult *= x; 1345 } 1346 result.set(dresult, pos); 1347 } 1348 break; 1349 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1350 is = a.getElementsPerItem(); 1351 CompoundDataset da = (CompoundDataset) a; 1352 double[] dvalues = new double[is]; 1353 dresults = new double[is]; 1354 for (int k = 0; k < is; k++) { 1355 dresults[k] = 1.; 1356 } 1357 for (int j = 0; j < alen; j++) { 1358 pos[axis] = j; 1359 da.getDoubleArray(dvalues, pos); 1360 boolean okay = true; 1361 for (int k = 0; k < is; k++) { 1362 final double val = dvalues[k]; 1363 if (ignoreNaNs && Double.isNaN(val)) { 1364 okay = false; 1365 break; 1366 } 1367 if (ignoreInfs && Double.isInfinite(val)) { 1368 okay = false; 1369 break; 1370 } 1371 } 1372 if (okay) { 1373 for (int k = 0; k < is; k++) { 1374 dresults[k] *= dvalues[k]; 1375 } 1376 } 1377 result.set(dresults, pos); 1378 } 1379 break; 1380 } 1381 } 1382 } 1383 1384 return result; 1385 } 1386 1387 /** 1388 * @param a dataset 1389 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1390 * @return cumulative sum of items in flattened dataset 1391 * @since 2.0 1392 */ 1393 public static Dataset cumulativeSum(final Dataset a, final boolean... ignoreInvalids) { 1394 return cumulativeSum(a.flatten(), 0, ignoreInvalids); 1395 } 1396 1397 /** 1398 * @param a dataset 1399 * @param axis 1400 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1401 * @return cumulative sum of items along axis in dataset 1402 * @since 2.0 1403 */ 1404 public static Dataset cumulativeSum(final Dataset a, int axis, final boolean... ignoreInvalids) { 1405 axis = a.checkAxis(axis); 1406 int dtype = a.getDType(); 1407 int[] oshape = a.getShape(); 1408 int alen = oshape[axis]; 1409 oshape[axis] = 1; 1410 1411 final boolean ignoreNaNs; 1412 final boolean ignoreInfs; 1413 if (a.hasFloatingPointElements()) { 1414 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1415 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1416 } else { 1417 ignoreNaNs = false; 1418 ignoreInfs = false; 1419 } 1420 Dataset result = DatasetFactory.zeros(a); 1421 PositionIterator pi = result.getPositionIterator(axis); 1422 1423 int[] pos = pi.getPos(); 1424 1425 while (pi.hasNext()) { 1426 1427 if (a.isComplex()) { 1428 double rv = 0, iv = 0; 1429 switch (dtype) { 1430 case Dataset.COMPLEX64: 1431 ComplexFloatDataset af = (ComplexFloatDataset) a; 1432 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1433 for (int j = 0; j < alen; j++) { 1434 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1435 pos[axis] = j; 1436 final float r1 = af.getReal(pos); 1437 final float i1 = af.getImag(pos); 1438 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1439 continue; 1440 } 1441 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1442 continue; 1443 } 1444 rv += r1; 1445 iv += i1; 1446 } 1447 rf.set((float) rv, (float) iv, pos); 1448 } 1449 break; 1450 case Dataset.COMPLEX128: 1451 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1452 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1453 for (int j = 0; j < alen; j++) { 1454 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1455 pos[axis] = j; 1456 final double r1 = ad.getReal(pos); 1457 final double i1 = ad.getImag(pos); 1458 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1459 continue; 1460 } 1461 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1462 continue; 1463 } 1464 rv += r1; 1465 iv += i1; 1466 } 1467 rd.set(rv, iv, pos); 1468 } 1469 break; 1470 } 1471 } else { 1472 final int is; 1473 final long[] lresults; 1474 final double[] dresults; 1475 1476 switch (dtype) { 1477 case Dataset.BOOL: 1478 case Dataset.INT8: case Dataset.INT16: 1479 case Dataset.INT32: case Dataset.INT64: 1480 long lresult = 0; 1481 for (int j = 0; j < alen; j++) { 1482 pos[axis] = j; 1483 lresult += a.getInt(pos); 1484 result.set(lresult, pos); 1485 } 1486 break; 1487 case Dataset.ARRAYINT8: 1488 is = a.getElementsPerItem(); 1489 lresults = new long[is]; 1490 for (int j = 0; j < alen; j++) { 1491 pos[axis] = j; 1492 final byte[] va = (byte[]) a.getObject(pos); 1493 for (int k = 0; k < is; k++) { 1494 lresults[k] += va[k]; 1495 } 1496 result.set(lresults, pos); 1497 } 1498 break; 1499 case Dataset.ARRAYINT16: 1500 is = a.getElementsPerItem(); 1501 lresults = new long[is]; 1502 for (int j = 0; j < alen; j++) { 1503 pos[axis] = j; 1504 final short[] va = (short[]) a.getObject(pos); 1505 for (int k = 0; k < is; k++) { 1506 lresults[k] += va[k]; 1507 } 1508 result.set(lresults, pos); 1509 } 1510 break; 1511 case Dataset.ARRAYINT32: 1512 is = a.getElementsPerItem(); 1513 lresults = new long[is]; 1514 for (int j = 0; j < alen; j++) { 1515 pos[axis] = j; 1516 final int[] va = (int[]) a.getObject(pos); 1517 for (int k = 0; k < is; k++) { 1518 lresults[k] += va[k]; 1519 } 1520 result.set(lresults, pos); 1521 } 1522 break; 1523 case Dataset.ARRAYINT64: 1524 is = a.getElementsPerItem(); 1525 lresults = new long[is]; 1526 for (int j = 0; j < alen; j++) { 1527 pos[axis] = j; 1528 final long[] va = (long[]) a.getObject(pos); 1529 for (int k = 0; k < is; k++) { 1530 lresults[k] += va[k]; 1531 } 1532 result.set(lresults, pos); 1533 } 1534 break; 1535 case Dataset.FLOAT32: case Dataset.FLOAT64: 1536 double dresult = 0.; 1537 for (int j = 0; j < alen; j++) { 1538 if (!Double.isNaN(dresult)) { 1539 pos[axis] = j; 1540 final double x = a.getDouble(pos); 1541 if (ignoreNaNs && Double.isNaN(x)) { 1542 continue; 1543 } 1544 if (ignoreInfs && Double.isInfinite(x)) { 1545 continue; 1546 } 1547 dresult += x; 1548 } 1549 result.set(dresult, pos); 1550 } 1551 break; 1552 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1553 is = a.getElementsPerItem(); 1554 CompoundDataset da = (CompoundDataset) a; 1555 double[] dvalues = new double[is]; 1556 dresults = new double[is]; 1557 for (int j = 0; j < alen; j++) { 1558 pos[axis] = j; 1559 da.getDoubleArray(dvalues, pos); 1560 boolean okay = true; 1561 for (int k = 0; k < is; k++) { 1562 final double val = dvalues[k]; 1563 if (ignoreNaNs && Double.isNaN(val)) { 1564 okay = false; 1565 break; 1566 } 1567 if (ignoreInfs && Double.isInfinite(val)) { 1568 okay = false; 1569 break; 1570 } 1571 } 1572 if (okay) { 1573 for (int k = 0; k < is; k++) { 1574 dresults[k] += dvalues[k]; 1575 } 1576 } 1577 result.set(dresults, pos); 1578 } 1579 break; 1580 } 1581 } 1582 } 1583 1584 return result; 1585 } 1586 1587 /** 1588 * @param a 1589 * @return average deviation value of all items the dataset 1590 */ 1591 public static Object averageDeviation(final Dataset a) { 1592 final IndexIterator it = a.getIterator(); 1593 final int is = a.getElementsPerItem(); 1594 1595 if (is == 1) { 1596 double mean = (Double) a.mean(); 1597 double sum = 0.0; 1598 1599 while (it.hasNext()) { 1600 sum += Math.abs(a.getElementDoubleAbs(it.index) - mean); 1601 } 1602 1603 return sum / a.getSize(); 1604 } 1605 1606 double[] means = (double[]) a.mean(); 1607 double[] sums = new double[is]; 1608 1609 while (it.hasNext()) { 1610 for (int j = 0; j < is; j++) 1611 sums[j] += Math.abs(a.getElementDoubleAbs(it.index + j) - means[j]); 1612 } 1613 1614 double n = a.getSize(); 1615 for (int j = 0; j < is; j++) 1616 sums[j] /= n; 1617 1618 return sums; 1619 } 1620 1621 /** 1622 * The residual is the sum of squared differences 1623 * @param a 1624 * @param b 1625 * @return residual value 1626 */ 1627 public static double residual(final Dataset a, final Dataset b) { 1628 return a.residual(b); 1629 } 1630 1631 /** 1632 * The residual is the sum of squared differences 1633 * @param a 1634 * @param b 1635 * @param w 1636 * @return residual value 1637 */ 1638 public static double weightedResidual(final Dataset a, final Dataset b, final Dataset w) { 1639 return a.residual(b, w, false); 1640 } 1641 1642 /** 1643 * Calculate approximate outlier values. These are defined as the values in the dataset 1644 * that are approximately below and above the given thresholds - in terms of percentages 1645 * of dataset size. 1646 * <p> 1647 * It approximates by limiting the number of items (given by length) used internally by 1648 * data structures - the larger this is, the more accurate will those outlier values become. 1649 * The actual thresholds used are returned in the array. 1650 * <p> 1651 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1652 * @param a 1653 * @param lo percentage threshold for lower limit 1654 * @param hi percentage threshold for higher limit 1655 * @param length maximum number of items used internally, if negative, then unlimited 1656 * @return double array with low and high values, and low and high percentage thresholds 1657 */ 1658 public static double[] outlierValues(final Dataset a, double lo, double hi, final int length) { 1659 if (lo <= 0 || hi <= 0 || lo >= hi || hi >= 100 || Double.isNaN(lo)|| Double.isNaN(hi)) { 1660 throw new IllegalArgumentException("Thresholds must be between (0,100) and in order"); 1661 } 1662 final int size = a.getSize(); 1663 int nl = Math.max((int) ((lo*size)/100), 1); 1664 if (length > 0 && nl > length) 1665 nl = length; 1666 int nh = Math.max((int) (((100-hi)*size)/100), 1); 1667 if (length > 0 && nh > length) 1668 nh = length; 1669 1670 double[] results = Math.max(nl, nh) > 640 ? outlierValuesMap(a, nl, nh) : outlierValuesList(a, nl, nh); 1671 1672 results[2] = results[2]*100./size; 1673 results[3] = 100. - results[3]*100./size; 1674 return results; 1675 } 1676 1677 static double[] outlierValuesMap(final Dataset a, int nl, int nh) { 1678 final TreeMap<Double, Integer> lMap = new TreeMap<Double, Integer>(); 1679 final TreeMap<Double, Integer> hMap = new TreeMap<Double, Integer>(); 1680 1681 int ml = 0; 1682 int mh = 0; 1683 IndexIterator it = a.getIterator(); 1684 while (it.hasNext()) { 1685 Double x = a.getElementDoubleAbs(it.index); 1686 Integer i; 1687 if (ml == nl) { 1688 Double k = lMap.lastKey(); 1689 if (x < k) { 1690 i = lMap.get(k) - 1; 1691 if (i == 0) { 1692 lMap.remove(k); 1693 } else { 1694 lMap.put(k, i); 1695 } 1696 i = lMap.get(x); 1697 if (i == null) { 1698 lMap.put(x, 1); 1699 } else { 1700 lMap.put(x, i + 1); 1701 } 1702 } 1703 } else { 1704 i = lMap.get(x); 1705 if (i == null) { 1706 lMap.put(x, 1); 1707 } else { 1708 lMap.put(x, i + 1); 1709 } 1710 ml++; 1711 } 1712 1713 if (mh == nh) { 1714 Double k = hMap.firstKey(); 1715 if (x > k) { 1716 i = hMap.get(k) - 1; 1717 if (i == 0) { 1718 hMap.remove(k); 1719 } else { 1720 hMap.put(k, i); 1721 } 1722 i = hMap.get(x); 1723 if (i == null) { 1724 hMap.put(x, 1); 1725 } else { 1726 hMap.put(x, i+1); 1727 } 1728 } 1729 } else { 1730 i = hMap.get(x); 1731 if (i == null) { 1732 hMap.put(x, 1); 1733 } else { 1734 hMap.put(x, i+1); 1735 } 1736 mh++; 1737 } 1738 } 1739 1740 // Attempt to make values distinct 1741 double lx = lMap.lastKey(); 1742 double hx = hMap.firstKey(); 1743 if (lx >= hx) { 1744 Double h = hMap.higherKey(lx); 1745 if (h != null) { 1746 hx = h; 1747 mh--; 1748 } else { 1749 Double l = lMap.lowerKey(hx); 1750 if (l != null) { 1751 lx = l; 1752 ml--; 1753 } 1754 } 1755 1756 } 1757 return new double[] {lMap.lastKey(), hMap.firstKey(), ml, mh}; 1758 } 1759 1760 static double[] outlierValuesList(final Dataset a, int nl, int nh) { 1761 final List<Double> lList = new ArrayList<Double>(nl); 1762 final List<Double> hList = new ArrayList<Double>(nh); 1763// final List<Double> lList = new LinkedList<Double>(); 1764// final List<Double> hList = new LinkedList<Double>(); 1765 1766 double lx = Double.POSITIVE_INFINITY; 1767 double hx = Double.NEGATIVE_INFINITY; 1768 1769 IndexIterator it = a.getIterator(); 1770 while (it.hasNext()) { 1771 double x = a.getElementDoubleAbs(it.index); 1772 if (x < lx) { 1773 if (lList.size() == nl) { 1774 lList.remove(lx); 1775 } 1776 lList.add(x); 1777 lx = Collections.max(lList); 1778 } else if (x == lx) { 1779 if (lList.size() < nl) { 1780 lList.add(x); 1781 } 1782 } 1783 1784 if (x > hx) { 1785 if (hList.size() == nh) { 1786 hList.remove(hx); 1787 } 1788 hList.add(x); 1789 hx = Collections.min(hList); 1790 } else if (x == hx) { 1791 if (hList.size() < nh) { 1792 hList.add(x); 1793 } 1794 } 1795 } 1796 1797 nl = lList.size(); 1798 nh = hList.size(); 1799 1800 // Attempt to make values distinct 1801 if (lx >= hx) { 1802 Collections.sort(hList); 1803 for (double h : hList) { 1804 if (h > hx) { 1805 hx = h; 1806 break; 1807 } 1808 nh--; 1809 } 1810 if (lx >= hx) { 1811 Collections.sort(lList); 1812 Collections.reverse(lList); 1813 for (double l : lList) { 1814 if (l < lx) { 1815 lx = l; 1816 break; 1817 } 1818 nl--; 1819 } 1820 } 1821 } 1822 return new double[] {lx, hx, nl, nh}; 1823 } 1824 1825 /** 1826 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1827 * @param a 1828 * @return covariance array of a 1829 */ 1830 public static Dataset covariance(final Dataset a) { 1831 return covariance(a, true, false, null); 1832 } 1833 1834 /** 1835 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null. 1836 * @param a 1837 * @return covariance array of a 1838 * @since 2.0 1839 */ 1840 public static Dataset covariance(final Dataset a, 1841 boolean rowvar, boolean bias, Integer ddof) { 1842 return covariance(a, null, rowvar, bias, ddof); 1843 } 1844 1845 /** 1846 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1847 * @param a 1848 * @return covariance array of a concatenated with b 1849 */ 1850 public static Dataset covariance(final Dataset a, final Dataset b) { 1851 return covariance(a, b, true, false, null); 1852 } 1853 1854 /** 1855 * Calculate the covariance matrix (array) of a concatenated with b. This 1856 * method is directly based on the implementation in numpy (cov). 1857 * @param a Array containing multiple variable and observations. Each row represents a variable, each column an observation. 1858 * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 1859 * @param rowvar When true (default), each row is a variable; when false each column is a variable. 1860 * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 1861 * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof). 1862 * @return covariance array of a concatenated with b 1863 * @since 2.0 1864 */ 1865 public static Dataset covariance (final Dataset a, final Dataset b, 1866 boolean rowvar, boolean bias, Integer ddof) { 1867 1868 //Create a working copy of the dataset & check its rank. 1869 Dataset vars = a.clone(); 1870 if (a.getRank() == 1) { 1871 vars.setShape(1, a.getShape()[0]); 1872 } 1873 1874 //1D of variables, so consider rows as variables 1875 if (vars.getShape()[0] == 1) { 1876 rowvar = true; 1877 } 1878 1879 //nr is the number of records; axis is index 1880 int nr, axis; 1881 if (rowvar) { 1882 nr = vars.getShape()[1]; 1883 axis = 0; 1884 } else { 1885 nr = vars.getShape()[0]; 1886 axis = 1; 1887 } 1888 1889 //Set the reduced degrees of freedom & normalisation factor 1890 if (ddof == null) { 1891 if (bias == false) { 1892 ddof = 1; 1893 } else { 1894 ddof = 0; 1895 } 1896 } 1897 double norm_fact = nr - ddof; 1898 if (norm_fact <= 0.) { 1899 //TODO Some sort of warning here? 1900 norm_fact = 0.; 1901 } 1902 1903 //Concatenate additional set of variables with main set 1904 if (b != null) { 1905 //Create a working copy of the dataset & check its rank. 1906 Dataset extraVars = b.clone(); 1907 if (b.getRank() == 1) { 1908 extraVars.setShape(1, a.getShape()[0]); 1909 } 1910 vars = DatasetUtils.concatenate(new Dataset[]{vars, extraVars}, axis); 1911 } 1912 1913 //Calculate deviations & covariance matrix 1914 Dataset varsMean = vars.mean(1-axis, false); 1915 //-vars & varsMean need same shape (this is a hack!) 1916 int[] meanShape = vars.getShape(); 1917 meanShape[1-axis] = 1; 1918 varsMean.setShape(meanShape); 1919 vars.isubtract(varsMean); 1920 Dataset cov; 1921 if (rowvar) { 1922 cov = Maths.divide(LinearAlgebra.dotProduct(vars, Maths.conjugate(vars.transpose())), norm_fact).squeeze(); 1923 } else { 1924 cov = Maths.divide(LinearAlgebra.dotProduct(vars.transpose(), Maths.conjugate(vars)), norm_fact).squeeze(); 1925 } 1926 return cov; 1927 } 1928}