rdbstats  2.0.9
main.cc
1 // File: rdbstats.cc
2 
3 // --8<--8<--8<--8<--
4 //
5 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
6 //
7 // This file is part of rdbstats
8 //
9 // rdbstats is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // rdbstats is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with this program; if not, write to the
21 // Free Software Foundation, Inc.
22 // 51 Franklin Street, Fifth Floor
23 // Boston, MA 02110-1301, USA
24 //
25 // -->8-->8-->8-->8--
26 
27 #include <iostream>
28 
29 #include <rdbxx/RDB.h>
30 #include <Exception/Exception.h>
31 
32 #include "Options.h"
33 #include "RdbStats.h"
34 #include "RdbStatsAve.h"
35 #include "RdbStatsPercentiles.h"
36 #include "RdbStatsPercentilesAve.h"
37 #include "RdbStatsPercentilesMed.h"
38 #include "Row.h"
39 #include "SelectedCols.h"
40 #include "StatsResult.h"
41 #include "config.h"
42 
46 template<class Type>
47 void rdbstats( RDB& irdbtable, Type& rdb_stats_columns,
48  const std::vector< std::pair< long, long > >& ranges, RDB& ordbtable, bool grouped ) {
49 
50  StatsResult< Type > stats_result( rdb_stats_columns, ordbtable );
51 
52  int status = RDB::REOL;
53 
54  int line_number = 1;
55  for( const auto& range : ranges ) {
56 
57  // Skip the un-requested rows.
58  // At the end of this loop, line_number == range[ ii ].first
59  while( line_number < range.first &&
60  RDB::REOF != (status = irdbtable.read( ))
61  )
62  ++line_number;
63 
64  if ( status == RDB::REOF )
65  break;
66 
67  // loop through the requested range of rows.
68  for ( ; line_number <= range.second; ++line_number ) {
69  if ( RDB::REOF == (status = irdbtable.read( )) ) {
70  if ( ! grouped )
71  stats_result.calculate_statistics( );
72  return;
73  }
74 
75  stats_result.update_statistics( );
76 
77  if ( RDB::REOG & status )
78  stats_result.calculate_statistics( );
79  // ordbtable.autoIdx( false );
80  }
81  }
82 
83  stats_result.calculate_statistics( );
84 }
85 
89 void rdbstats( Options& options ) {
90 
91  /*
92  * get the struct of options from the parser class so that you can
93  * access the option values.
94  */
95  // The input stream shall be the standard in
96  RDB irdbtable(
97  [&](){ return
98  options.input.empty()
99  ? RDB( &std::cin )
100  : RDB( options.input );
101  }()
102  );
103  // The output stream shall be the standard out
104  // ostream* os = &cout;
105  // os->precision( 15 );
106  RDB ordbtable(
107  [&](){ return
108  options.output.empty()
109  ? RDB( &std::cout )
110  : RDB( options.output, std::ios::out );
111  }()
112  );
113  ordbtable.setComment( irdbtable );
114 
115  // if the all-columns option has been selected, clear any selected
116  // on the command line
117  if ( options.all )
118  options.include_exact.clear();
119 
120  SelectedCols selected_cols( irdbtable,
121  options.include_exact,
122  options.include_re,
123  options.exclude_exact,
124  options.exclude_re,
125  options.group, options.override_defn );
126 
127  const std::vector< std::string >& colnames = selected_cols.get_selected_cols( );
128 
129  if ( colnames.empty() )
130  throw Exception( "no columns were selected or specified" );
131 
132  // Set the group.
133  bool grouped = ! options.group.empty();
134  for ( const auto& colname : options.group ) {
135  ordbtable.setColumn( irdbtable.getColumn( colname ) );
136  irdbtable.setGroup( colname );
137  }
138 
139  Row row( options.rows );
140 
141  if ( ! options.percentiles.empty() ) {
142 
143  switch( options.normalize ) {
144  case Normalize::Average:
145  {
146  std::vector< RdbStatsPercentilesAve > rdb_stats_columns;
147 
148  for ( const auto& colname : colnames )
149  rdb_stats_columns.emplace_back( irdbtable, colname, options.percentiles );
150  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
151  }
152  break;
153  case Normalize::Median:
154  {
155  std::vector< RdbStatsPercentilesMed > rdb_stats_columns;
156  for ( const auto& colname : colnames )
157  rdb_stats_columns.emplace_back( irdbtable, colname, options.percentiles );
158  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
159  }
160  break;
161  case Normalize::None:
162  {
163  std::vector< RdbStatsPercentiles > rdb_stats_columns;
164  for ( const auto& colname : colnames )
165  rdb_stats_columns.emplace_back( irdbtable, colname, options.percentiles );
166  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
167  }
168  break;
169  }
170 
171  } else {
172 
173  switch( options.normalize ) {
174  case Normalize::Average:
175  {
176  std::vector< RdbStatsAve > rdb_stats_columns;
177  for ( const auto& colname : colnames )
178  rdb_stats_columns.emplace_back( irdbtable, colname );
179  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
180  }
181  break;
182  case Normalize::Median:
183  {
184  std::vector< RdbStatsPercentilesMed > rdb_stats_columns;
185  for ( const auto& colname : colnames )
186  rdb_stats_columns.emplace_back( irdbtable, colname );
187  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
188  }
189  break;
190  case Normalize::None:
191  {
192  std::vector< RdbStats > rdb_stats_columns;
193  for ( const auto& colname : colnames )
194  rdb_stats_columns.emplace_back( irdbtable, colname );
195  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ), ordbtable, grouped );
196  }
197  break;
198  }
199  }
200 }
201 
202 int main( int argc, char* argv[] ) {
203 
204 #ifdef TRACEFCT
205  TraceFct tf( argv[0], 1, -1 );
206 #endif
207 
208  try {
209 
210  Options options( argc, argv );
211 
212  if ( options.exit_requested )
213  return options.exit_value;
214 
215  rdbstats( options );
216 
217  return EXIT_SUCCESS;
218 
219  } catch( Exception& E ) {
220 
221  std::cerr << E << std::endl;
222  return EXIT_FAILURE;
223 
224  } catch( std::exception &e ) {
225 
226  std::cerr << e.what() << std::endl;
227  return EXIT_FAILURE;
228 
229  } catch ( ... ) {
230 
231  std::cerr << "Unknown exception caught" << std::endl;
232 
233  return EXIT_FAILURE;
234  }
235 
236  return 0;
237 }
A class to figure out the name of the columns for which the statistics are to be calculated.
Definition: SelectedCols.h:48
A container class to hold the relevant data for the rdb data columns.
Definition: StatsResult.h:36