rdbstats  2.0.9
RdbStatsPercentiles.cc
1 // File: RdbStatsPercentiles.cc
2 
3 // --8<--8<--8<--8<--
4 //
5 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
6 //
7 // This file is part of rdbstats
8 //
9 // rdbstats is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // rdbstats is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with this program; if not, write to the
21 // Free Software Foundation, Inc.
22 // 51 Franklin Street, Fifth Floor
23 // Boston, MA 02110-1301, USA
24 //
25 // -->8-->8-->8-->8--
26 
27 #include <sstream>
28 #include <algorithm>
29 
30 #include <suplibxx/str.h>
31 
32 #include "RdbStatsPercentiles.h"
33 
34 // The user specified percentiles. For example: --percentiles 12,34,45
35 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const std::string& name,
36  const PercentileList& percentiles )
37  : RdbStats( irdbtable, name ), the_median( 0.0 ), the_percentile( percentiles )
38 {}
39 
40 // --quartiles was entered at the command line.
41 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const std::string& name )
42 : RdbStats( irdbtable, name ), the_median( 0.0 ) {
43 
44  // the_percentile.emplace_back( "_fq", 25.0 );
45  // the_percentile.emplace_back( "_lq", 75.0 );
46 }
47 
48 double RdbStatsPercentiles::calculate_median( ) {
49 
50  std::vector< double >::iterator begin_ptr = data.begin( );
51 
52  size_t num_size = data.size( );
53  size_t num_size_div = num_size / 2;
54 
55  nth_element( begin_ptr, begin_ptr + num_size_div, data.end( ) );
56 
57  if ( num_size & 0001 ) {
58 
59  // num_size is odd:
60 
61  return data[ num_size_div ];
62 
63  } else {
64 
65  // num_size is even:
66 
67  double tmp = data[ num_size_div ];
68 
69  //
70  // element beyond num_size_div is guarranteed
71  // to be > then element at data[ num_size_div ].
72  //
73  nth_element( begin_ptr, begin_ptr + num_size_div - 1,
74  begin_ptr + num_size_div );
75  tmp += data[ num_size_div - 1 ];
76 
77  return 0.5 * tmp;
78 
79  }
80 }
81 
82 double RdbStatsPercentiles::calculate_percentile( const double percentile ) {
83 
84  std::vector< double >::iterator begin_ptr = data.begin( );
85 
86  size_t n = data.size( );
87  double f = percentile / 100.0;
88  int i = ( ( n - 1.0 ) * f );
89  double delta = ( n - 1.0 ) * f - i;
90 
91  nth_element( begin_ptr, begin_ptr + i + 1, data.end( ) );
92  double data_i_1 = data[ i + 1];
93 
94  nth_element( begin_ptr, begin_ptr + i, data.end( ) );
95  double data_i = data[ i ];
96 
97  double quantile = ( 1.0 - delta ) * data_i + delta * data_i_1;
98 
99  /*
100  cerr << "n: " << n << endl
101  << "f: " << f << endl
102  << "i: " << i << endl
103  << "delta: " << delta << endl
104  << "data_i: " << data_i << endl
105  << "data_i_1: " << data_i_1 << endl
106  << "quantile: " << quantile << endl << endl;
107  */
108  return quantile;
109 }
110 
112 
113  if ( 0 == this->RdbStats::calculate_statistics( ) )
114  return 0;
115 
116  the_median = calculate_median( );
117 
118  for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
119  the_percentile_result[ ii ] =
120  calculate_percentile( the_percentile[ ii ].percentile );
121 
122  return num_n;
123 }
124 
125 void RdbStatsPercentiles::init( ) {
126  this->RdbStats::init( );
127 
128  // Since we are, possibly, starting a new group so wipe out data.
129  data.clear( );
130 }
131 
132 void RdbStatsPercentiles::normalize_results( const double norm ) {
133 
134  this->RdbStats::normalize_results( norm );
135  for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
136  the_percentile_result[ ii ] /= norm;
137 
138 }
139 
140 void RdbStatsPercentiles::set_output_columns( RDB& ordbtable ) {
141 
142  this->RdbStats::set_output_columns( ordbtable );
143 
144  const int mysize( the_percentile.size( ) );
145  the_percentile_result.reserve( mysize );
146 
147  const char* colname_ptr = colname.c_str( );
148 
149  char str[ 256 ];
150  sprintf( str, "%s_median", colname_ptr );
151  ordbtable.setColumn( str, "N" );
152  RDBColumn* tmp = ordbtable.getColumn( str );
153  tmp->mapData( &the_median, 1 );
154 
155  for ( int ii = 0; ii < mysize; ii++ ) {
156 
157  std::ostringstream ost;
158  ost << colname_ptr << the_percentile[ ii ].colname ;
159  ordbtable.setColumn( ost.str( ), "N" );
160  RDBColumn* ptr = ordbtable.getColumn( ost.str( ) );
161  ptr->mapData( &the_percentile_result[ii], 1 );
162 
163  }
164 }
165 
168 
169  data.push_back( get_value( ) );
170 }
The base class to calculate : average, maximum, minimum, num, stddev and sum.
Definition: RdbStats.h:61
virtual void update_statistics()
Read the column from RDB++, update the statistics for the column.
virtual void update_statistics()
Read the column from RDB++, update the statistics for the column.
Definition: RdbStats.cc:129
RdbStatsPercentiles(RDB &irdbtable, const std::string &name, const PercentileList &percentiles)
–percentiles 12,34.. and –quartile
virtual int calculate_statistics()
Perform the final statistic for the set.
virtual int calculate_statistics()
Perform the final statistic for the set.
Definition: RdbStats.cc:49