rdbstats  2.0.9
RdbStats.cc
1 // File: RdbStats.cc
2 
3 // --8<--8<--8<--8<--
4 //
5 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
6 //
7 // This file is part of rdbstats
8 //
9 // rdbstats is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // rdbstats is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with this program; if not, write to the
21 // Free Software Foundation, Inc.
22 // 51 Franklin Street, Fifth Floor
23 // Boston, MA 02110-1301, USA
24 //
25 // -->8-->8-->8-->8--
26 
27 #include "RdbStats.h"
28 
29 RdbStats::~RdbStats( ) {
30 
31  // if ( output_stats ) {
32  // for ( int ii = 0; output_stats[ ii ] != NULL; ii++ )
33  // delete output_stats[ ii ];
34  // delete [] output_stats;
35  // output_stats = NULL;
36  // }
37 
38  // Note, input_column is not to be delete since RDB++ lib will do it.
39 
40 }
41 
42 
43 RdbStats::RdbStats( RDB& irdbtable, const std::string& name )
44  : input_column( NULL ), output_stats( ), num_n( 0 ), colname( name ) {
45  init( );
46  input_column = (RDBColumn*) irdbtable.getColumn( name );
47 }
48 
50 
51  if ( 0 == num_n )
52  return 0;
53 
54  if ( 1 == num_n )
55  the_statistics[ SD ] = 0.0;
56  else
57  the_statistics[ SD ] = sqrt( the_statistics[ SUM_T ] / ( num_n - 1 ) );
58 
59  the_statistics[ SUM2 ] = sqrt( the_statistics[ SUM2 ] );
60 
61  return num_n;
62 
63 }
64 
65 double RdbStats::get_value( ) {
66  return input_column->getDataDouble( );
67 }
68 
69 #define ARRAY_SIZE(arr)(sizeof(arr)/sizeof((arr)[0]))
70 
71 void RdbStats::init( ) {
72 
73  num_n = 0;
74 
75  for ( int ii = 0; ii < ARRAY_SIZE( the_statistics ); ii++ )
76  the_statistics[ ii ] = 0.0;
77 
78  the_statistics[ MAX ] = -DBL_MAX;
79  the_statistics[ MIN ] = DBL_MAX;
80 
81  // purposedly leave input_column alone.
82 
83 }
84 
85 #undef ARRAY_SIZE
86 
87 void RdbStats::normalize_results( const double norm ) {
88 
89  the_statistics[ SD ] /= norm;
90  the_statistics[ MIN ] /= norm;
91  the_statistics[ MAX ] /= norm;
92  the_statistics[ SUM2 ] /= norm;
93 
94 }
95 
96 void RdbStats::set_output_columns( RDB& ordbtable ) {
97 
98  static const char* suffix[] = { "_n", "_ave", "_max", "_min", "_dev",
99  "_sum", "_rss", NULL };
100 
101  const char* colname_ptr = colname.c_str( );
102 
103  size_t counter( 0 );
104  while ( suffix[ counter ] )
105  ++counter;
106  // + 1 to allocate an extra for the NULL sentinel.
107  output_stats.reserve( counter + 1 );
108 
109  char str[ 256 ];
110  sprintf( str, "%s%s", colname_ptr, suffix[ 0 ] );
111  ordbtable.setColumn( str, "N" );
112  output_stats[ 0 ] = ordbtable.getColumn( ordbtable.nColumns( ) - 1 );
113  output_stats[ 0 ]->mapData( &num_n, 1 );
114  for ( int ii = 1; ii < counter ; ii++ ) {
115 
116  int ii_1 ( ii - 1 );
117 
118  sprintf( str, "%s%s", colname_ptr, suffix[ ii ] );
119  ordbtable.setColumn( str, "N" );
120  output_stats[ ii_1 ] = ordbtable.getColumn( ordbtable.nColumns( ) - 1 ) ;
121  output_stats[ ii_1 ]->mapData( the_statistics + ii_1, 1 );
122 
123  // Make sure that the sentinel is set to NULL
124  output_stats[ ii ] = NULL;
125 
126  }
127 }
128 
130 
131  double val = get_value( );
132 
133  // only use value if it isn't NaN. This is a simple check for NaN,
134  // based upon Net Lore.
135 
136  if ( val == val ) {
137 
138  ++num_n;
139 
140  if ( 1 == num_n )
141  the_statistics[ AVG ] = val;
142  else {
143 
144  // Algorithm taken from the paper titled:
145  // Updating Mean and Variances Estimates: An Improved Method
146  // by D.H.D. West
147  // Communications of the ACM
148  // September 1979 Vol 22 Number 9
149  double tmp = ( val - the_statistics[ AVG ] );
150  the_statistics[ SUM_T ] += tmp * tmp * ( num_n - 1 ) / num_n;
151  the_statistics[ AVG ] += tmp / num_n;
152 
153  }
154 
155  the_statistics[ SUM2 ] += val * val;
156  the_statistics[ SUM ] += val;
157  the_statistics[ MAX ] = std::max( the_statistics[ MAX ], val );
158  the_statistics[ MIN ] = std::min( the_statistics[ MIN ], val );
159 
160  }
161 }
162 
163 std::string to_string( Normalize normalize ) {
164  return
165  normalize == Normalize::Average ? "average"
166  : normalize == Normalize::Median ? "median"
167  : normalize == Normalize::None ? "none"
168  : "UNKNOWN";
169 }
170 
171 std::string to_string( OverrideType override_defn ) {
172  return
173  override_defn == OverrideType::Toggle ? "toggle"
174  : override_defn == OverrideType::String ? "string"
175  : override_defn == OverrideType::Number ? "number"
176  : "UNKNOWN";
177 }
178 
179 std::string to_string( Percentile p) {
180  return std::to_string( p.percentile ) + '(' + p.colname + ')';
181 }
182 
183 std::string to_string( DefOverride d) {
184  return d.colname + '(' + to_string( d.type ) + ')';
185 }
186 
187 std::string to_string( std::string str ) { return str ; }
virtual void update_statistics()
Read the column from RDB++, update the statistics for the column.
Definition: RdbStats.cc:129
virtual int calculate_statistics()
Perform the final statistic for the set.
Definition: RdbStats.cc:49