You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

168 lines
5.8 KiB
C++

/*=========================================================================
Program: Visualization Toolkit
Module: vtkDescriptiveStatistics.h
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
All rights reserved.
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notice for more information.
=========================================================================*/
/*-------------------------------------------------------------------------
Copyright 2010 Sandia Corporation.
Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------*/
/**
* @class vtkDescriptiveStatistics
* @brief A class for univariate descriptive statistics
*
*
* Given a selection of columns of interest in an input data table, this
* class provides the following functionalities, depending on the chosen
* execution options:
* * Learn: calculate extremal values, sample mean, and M2, M3, and M4 aggregates
* (cf. P. Pebay, Formulas for robust, one-pass parallel computation of covariances
* and Arbitrary-Order Statistical Moments, Sandia Report SAND2008-6212, Sep 2008,
* http://infoserve.sandia.gov/sand_doc/2008/086212.pdf for details)
* * Derive: calculate unbiased variance estimator, standard deviation estimator,
* two skewness estimators, and two kurtosis excess estimators.
* * Assess: given an input data set, a reference value and a non-negative deviation,
* mark each datum with corresponding relative deviation (1-dimensional Mahlanobis
* distance). If the deviation is zero, then mark each datum which are equal to the
* reference value with 0, and all others with 1. By default, the reference value
* and the deviation are, respectively, the mean and the standard deviation of the
* input model.
* * Test: calculate Jarque-Bera statistic and, if VTK to R interface is available,
* retrieve corresponding p-value for normality testing.
*
* @par Thanks:
* Thanks to Philippe Pebay and David Thompson from Sandia National Laboratories
* for implementing this class.
* Updated by Philippe Pebay, Kitware SAS 2012
*/
#ifndef vtkDescriptiveStatistics_h
#define vtkDescriptiveStatistics_h
#include "vtkFiltersStatisticsModule.h" // For export macro
#include "vtkStatisticsAlgorithm.h"
class vtkMultiBlockDataSet;
class vtkStringArray;
class vtkTable;
class vtkVariant;
class vtkDoubleArray;
class VTKFILTERSSTATISTICS_EXPORT vtkDescriptiveStatistics : public vtkStatisticsAlgorithm
{
public:
vtkTypeMacro(vtkDescriptiveStatistics, vtkStatisticsAlgorithm);
void PrintSelf(ostream& os, vtkIndent indent) override;
static vtkDescriptiveStatistics* New();
//@{
/**
* Set/get whether the unbiased estimator for the variance should be used, or if
* the population variance will be calculated.
* The default is that the unbiased estimator will be used.
*/
vtkSetMacro(UnbiasedVariance, vtkTypeBool);
vtkGetMacro(UnbiasedVariance, vtkTypeBool);
vtkBooleanMacro(UnbiasedVariance, vtkTypeBool);
//@}
//@{
/**
* Set/get whether the G1 estimator for the skewness should be used, or if
* the g1 skewness will be calculated.
* The default is that the g1 skewness estimator will be used.
*/
vtkSetMacro(G1Skewness, vtkTypeBool);
vtkGetMacro(G1Skewness, vtkTypeBool);
vtkBooleanMacro(G1Skewness, vtkTypeBool);
//@}
//@{
/**
* Set/get whether the G2 estimator for the kurtosis should be used, or if
* the g2 kurtosis will be calculated.
* The default is that the g2 kurtosis estimator will be used.
*/
vtkSetMacro(G2Kurtosis, vtkTypeBool);
vtkGetMacro(G2Kurtosis, vtkTypeBool);
vtkBooleanMacro(G2Kurtosis, vtkTypeBool);
//@}
//@{
/**
* Set/get whether the deviations returned should be signed, or should
* only have their magnitude reported.
* The default is that signed deviations will be computed.
*/
vtkSetMacro(SignedDeviations, vtkTypeBool);
vtkGetMacro(SignedDeviations, vtkTypeBool);
vtkBooleanMacro(SignedDeviations, vtkTypeBool);
//@}
/**
* Given a collection of models, calculate aggregate model
*/
void Aggregate(vtkDataObjectCollection*, vtkMultiBlockDataSet*) override;
protected:
vtkDescriptiveStatistics();
~vtkDescriptiveStatistics() override;
/**
* Execute the calculations required by the Learn option, given some input Data
* NB: input parameters are unused.
*/
void Learn(vtkTable*, vtkTable*, vtkMultiBlockDataSet*) override;
/**
* Execute the calculations required by the Derive option.
*/
void Derive(vtkMultiBlockDataSet*) override;
/**
* Execute the calculations required by the Test option.
*/
void Test(vtkTable*, vtkMultiBlockDataSet*, vtkTable*) override;
/**
* Execute the calculations required by the Assess option.
*/
void Assess(vtkTable* inData, vtkMultiBlockDataSet* inMeta, vtkTable* outData) override
{
this->Superclass::Assess(inData, inMeta, outData, 1);
}
/**
* Calculate p-value. This will be overridden using the object factory with an
* R implementation if R is present.
*/
virtual vtkDoubleArray* CalculatePValues(vtkDoubleArray*);
/**
* Provide the appropriate assessment functor.
*/
void SelectAssessFunctor(vtkTable* outData, vtkDataObject* inMeta, vtkStringArray* rowNames,
AssessFunctor*& dfunc) override;
vtkTypeBool UnbiasedVariance;
vtkTypeBool G1Skewness;
vtkTypeBool G2Kurtosis;
vtkTypeBool SignedDeviations;
private:
vtkDescriptiveStatistics(const vtkDescriptiveStatistics&) = delete;
void operator=(const vtkDescriptiveStatistics&) = delete;
};
#endif